PPCISelLowering.cpp source code [llvm_projects/llvm/lib/Target/PowerPC/PPCISelLowering.cpp]

1	//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the PPCISelLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "PPCISelLowering.h"
14	#include "MCTargetDesc/PPCMCTargetDesc.h"
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCallingConv.h"
18	#include "PPCFrameLowering.h"
19	#include "PPCInstrInfo.h"
20	#include "PPCMachineFunctionInfo.h"
21	#include "PPCPerfectShuffle.h"
22	#include "PPCRegisterInfo.h"
23	#include "PPCSelectionDAGInfo.h"
24	#include "PPCSubtarget.h"
25	#include "PPCTargetMachine.h"
26	#include "llvm/ADT/APFloat.h"
27	#include "llvm/ADT/APInt.h"
28	#include "llvm/ADT/APSInt.h"
29	#include "llvm/ADT/ArrayRef.h"
30	#include "llvm/ADT/DenseMap.h"
31	#include "llvm/ADT/STLExtras.h"
32	#include "llvm/ADT/SmallPtrSet.h"
33	#include "llvm/ADT/SmallVector.h"
34	#include "llvm/ADT/Statistic.h"
35	#include "llvm/ADT/StringRef.h"
36	#include "llvm/CodeGen/CallingConvLower.h"
37	#include "llvm/CodeGen/ISDOpcodes.h"
38	#include "llvm/CodeGen/LivePhysRegs.h"
39	#include "llvm/CodeGen/MachineBasicBlock.h"
40	#include "llvm/CodeGen/MachineFrameInfo.h"
41	#include "llvm/CodeGen/MachineFunction.h"
42	#include "llvm/CodeGen/MachineInstr.h"
43	#include "llvm/CodeGen/MachineInstrBuilder.h"
44	#include "llvm/CodeGen/MachineJumpTableInfo.h"
45	#include "llvm/CodeGen/MachineLoopInfo.h"
46	#include "llvm/CodeGen/MachineMemOperand.h"
47	#include "llvm/CodeGen/MachineModuleInfo.h"
48	#include "llvm/CodeGen/MachineOperand.h"
49	#include "llvm/CodeGen/MachineRegisterInfo.h"
50	#include "llvm/CodeGen/SelectionDAG.h"
51	#include "llvm/CodeGen/SelectionDAGNodes.h"
52	#include "llvm/CodeGen/TargetInstrInfo.h"
53	#include "llvm/CodeGen/TargetLowering.h"
54	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
55	#include "llvm/CodeGen/TargetRegisterInfo.h"
56	#include "llvm/CodeGen/ValueTypes.h"
57	#include "llvm/CodeGenTypes/MachineValueType.h"
58	#include "llvm/IR/CallingConv.h"
59	#include "llvm/IR/Constant.h"
60	#include "llvm/IR/Constants.h"
61	#include "llvm/IR/DataLayout.h"
62	#include "llvm/IR/DebugLoc.h"
63	#include "llvm/IR/DerivedTypes.h"
64	#include "llvm/IR/Function.h"
65	#include "llvm/IR/GlobalValue.h"
66	#include "llvm/IR/IRBuilder.h"
67	#include "llvm/IR/Instructions.h"
68	#include "llvm/IR/Intrinsics.h"
69	#include "llvm/IR/IntrinsicsPowerPC.h"
70	#include "llvm/IR/Module.h"
71	#include "llvm/IR/Type.h"
72	#include "llvm/IR/Use.h"
73	#include "llvm/IR/Value.h"
74	#include "llvm/MC/MCContext.h"
75	#include "llvm/MC/MCExpr.h"
76	#include "llvm/MC/MCSectionXCOFF.h"
77	#include "llvm/MC/MCSymbolXCOFF.h"
78	#include "llvm/Support/AtomicOrdering.h"
79	#include "llvm/Support/BranchProbability.h"
80	#include "llvm/Support/Casting.h"
81	#include "llvm/Support/CodeGen.h"
82	#include "llvm/Support/CommandLine.h"
83	#include "llvm/Support/Compiler.h"
84	#include "llvm/Support/Debug.h"
85	#include "llvm/Support/ErrorHandling.h"
86	#include "llvm/Support/Format.h"
87	#include "llvm/Support/KnownBits.h"
88	#include "llvm/Support/MathExtras.h"
89	#include "llvm/Support/raw_ostream.h"
90	#include "llvm/Target/TargetMachine.h"
91	#include "llvm/Target/TargetOptions.h"
92	#include <algorithm>
93	#include <cassert>
94	#include <cstdint>
95	#include <iterator>
96	#include <list>
97	#include <optional>
98	#include <utility>
99	#include <vector>
100
101	using namespace llvm;
102
103	#define DEBUG_TYPE "ppc-lowering"
104
105	static cl::opt<bool> DisableP10StoreForward(
106	"disable-p10-store-forward",
107	cl::desc ("disable P10 store forward-friendly conversion"), cl::Hidden,
108	cl::init(Val: false));
109
110	static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111	cl::desc ("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113	static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114	cl::desc ("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116	static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117	cl::desc ("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119	static cl::opt<bool> DisableSCO("disable-ppc-sco",
120	cl::desc ("disable sibling call optimization on ppc"), cl::Hidden);
121
122	static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123	cl::desc ("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125	static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126	cl::desc ("use absolute jump tables on ppc"), cl::Hidden);
127
128	static cl::opt<bool>
129	DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130	cl::desc ("disable vector permute decomposition"),
131	cl::init(Val: true), cl::Hidden);
132
133	cl::opt<bool> DisableAutoPairedVecSt(
134	"disable-auto-paired-vec-st",
135	cl::desc ("disable automatically generated 32byte paired vector stores"),
136	cl::init(Val: true), cl::Hidden);
137
138	static cl::opt<unsigned> PPCMinimumJumpTableEntries(
139	"ppc-min-jump-table-entries", cl::init(Val: `64`), cl::Hidden,
140	cl::desc ("Set minimum number of entries to use a jump table on PPC"));
141
142	static cl::opt<unsigned> PPCMinimumBitTestCmps(
143	"ppc-min-bit-test-cmps", cl::init(Val: `3`), cl::Hidden,
144	cl::desc ("Set minimum of largest number of comparisons to use bit test for "
145	"switch on PPC."));
146
147	static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
148	"ppc-gather-alias-max-depth", cl::init(Val: `18`), cl::Hidden,
149	cl::desc ("max depth when checking alias info in GatherAllAliases()"));
150
151	static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
152	"ppc-aix-shared-lib-tls-model-opt-limit", cl::init(Val: `1`), cl::Hidden,
153	cl::desc ("Set inclusive limit count of TLS local-dynamic access(es) in a "
154	"function to use initial-exec"));
155
156	STATISTIC(NumTailCalls, "Number of tail calls");
157	STATISTIC(NumSiblingCalls, "Number of sibling calls");
158	STATISTIC(ShufflesHandledWithVPERM,
159	"Number of shuffles lowered to a VPERM or XXPERM");
160	STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162	static bool isNByteElemShuffleMask(ShuffleVectorSDNode , unsigned, int*);
163
164	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166	// A faster local-[exec\|dynamic] TLS access sequence (enabled with the
167	// -maix-small-local-[exec\|dynamic]-tls option) can be produced for TLS
168	// variables; consistent with the IBM XL compiler, we apply a max size of
169	// slightly under 32KB.
170	constexpr uint64_t AIXSmallTlsPolicySizeLimit = `32751`;
171
172	// FIXME: Remove this once the bug has been fixed!
173	extern cl::opt<bool> ANDIGlueBug;
174
175	PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
176	const PPCSubtarget &STI)
177	: TargetLowering (TM, STI), Subtarget(STI) {
178	// Initialize map that relates the PPC addressing modes to the computed flags
179	// of a load/store instruction. The map is used to determine the optimal
180	// addressing mode when selecting load and stores.
181	initializeAddrModeMap();
182	// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183	// arguments are at least 4/8 bytes aligned.
184	bool isPPC64 = Subtarget.isPPC64();
185	setMinStackArgumentAlignment(isPPC64 ? Align (`8`) : Align (`4`));
186	const MVT RegVT = Subtarget.getScalarIntVT();
187
188	// Set up the register classes.
189	addRegisterClass(VT: MVT::i32, RC: &PPC::GPRCRegClass);
190	if (!useSoftFloat()) {
191	if (hasSPE()) {
192	addRegisterClass(VT: MVT::f32, RC: &PPC::GPRCRegClass);
193	// EFPU2 APU only supports f32
194	if (!Subtarget.hasEFPU2())
195	addRegisterClass(VT: MVT::f64, RC: &PPC::SPERCRegClass);
196	} else {
197	addRegisterClass(VT: MVT::f32, RC: &PPC::F4RCRegClass);
198	addRegisterClass(VT: MVT::f64, RC: &PPC::F8RCRegClass);
199	}
200	}
201
202	setOperationAction(Op: ISD::UADDO, VT: RegVT, Action: Custom);
203	setOperationAction(Op: ISD::USUBO, VT: RegVT, Action: Custom);
204
205	// PowerPC uses addo_carry,subo_carry to propagate carry.
206	setOperationAction(Op: ISD::UADDO_CARRY, VT: RegVT, Action: Custom);
207	setOperationAction(Op: ISD::USUBO_CARRY, VT: RegVT, Action: Custom);
208
209	// On P10, the default lowering generates better code using the
210	// setbc instruction.
211	if (!Subtarget.hasP10Vector()) {
212	setOperationAction(Op: ISD::SSUBO, VT: MVT::i32, Action: Custom);
213	setOperationAction(Op: ISD::SADDO, VT: MVT::i32, Action: Custom);
214	if (isPPC64) {
215	setOperationAction(Op: ISD::SSUBO, VT: MVT::i64, Action: Custom);
216	setOperationAction(Op: ISD::SADDO, VT: MVT::i64, Action: Custom);
217	}
218	}
219
220	// Match BITREVERSE to customized fast code sequence in the td file.
221	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Legal);
222	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i64, Action: Legal);
223
224	// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
225	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i32, Action: Custom);
226
227	// Custom lower inline assembly to check for special registers.
228	setOperationAction(Op: ISD::INLINEASM, VT: MVT::Other, Action: Custom);
229	setOperationAction(Op: ISD::INLINEASM_BR, VT: MVT::Other, Action: Custom);
230
231	// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232	for (MVT VT : MVT::integer_valuetypes()) {
233	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
234	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
235	}
236
237	setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
238	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f128, Action: Expand);
239
240	if (Subtarget.isISA3_0()) {
241	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Legal);
242	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
243	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
244	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
245	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
246	} else {
247	// No extending loads from f16 or HW conversions back and forth.
248	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
249	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f128, Action: Expand);
250	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
251	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
252	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
253	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
254	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
255	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
256	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
257	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
258	}
259
260	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
261
262	// PowerPC has pre-inc load and store's.
263	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
264	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
265	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
266	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
267	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
268	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
269	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
270	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
271	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
272	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
273	if (!Subtarget.hasSPE()) {
274	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
275	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
276	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
277	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
278	}
279
280	if (Subtarget.useCRBits()) {
281	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
282
283	if (isPPC64 \|\| Subtarget.hasFPCVT()) {
284	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Promote);
285	AddPromotedToType(Opc: ISD::STRICT_SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
286	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Promote);
287	AddPromotedToType(Opc: ISD::STRICT_UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
288
289	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Promote);
290	AddPromotedToType(Opc: ISD::SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
291	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Promote);
292	AddPromotedToType(Opc: ISD::UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
293
294	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i1, Action: Promote);
295	AddPromotedToType(Opc: ISD::STRICT_FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
296	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i1, Action: Promote);
297	AddPromotedToType(Opc: ISD::STRICT_FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
298
299	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i1, Action: Promote);
300	AddPromotedToType(Opc: ISD::FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
301	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i1, Action: Promote);
302	AddPromotedToType(Opc: ISD::FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
303	} else {
304	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Custom);
305	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Custom);
306	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Custom);
307	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Custom);
308	}
309
310	// PowerPC does not support direct load/store of condition registers.
311	setOperationAction(Op: ISD::LOAD, VT: MVT::i1, Action: Custom);
312	setOperationAction(Op: ISD::STORE, VT: MVT::i1, Action: Custom);
313
314	// FIXME: Remove this once the ANDI glue bug is fixed:
315	if (ANDIGlueBug)
316	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::i1, Action: Custom);
317
318	for (MVT VT : MVT::integer_valuetypes()) {
319	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
320	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
321	setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
322	}
323
324	addRegisterClass(VT: MVT::i1, RC: &PPC::CRBITRCRegClass);
325	}
326
327	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328	// PPC (the libcall is not available).
329	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
330	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
331	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
332	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
333
334	// We do not currently implement these libm ops for PowerPC.
335	setOperationAction(Op: ISD::FFLOOR, VT: MVT::ppcf128, Action: Expand);
336	setOperationAction(Op: ISD::FCEIL, VT: MVT::ppcf128, Action: Expand);
337	setOperationAction(Op: ISD::FTRUNC, VT: MVT::ppcf128, Action: Expand);
338	setOperationAction(Op: ISD::FRINT, VT: MVT::ppcf128, Action: Expand);
339	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::ppcf128, Action: Expand);
340	setOperationAction(Op: ISD::FREM, VT: MVT::ppcf128, Action: LibCall);
341
342	// PowerPC has no SREM/UREM instructions unless we are on P9
343	// On P9 we may use a hardware instruction to compute the remainder.
344	// When the result of both the remainder and the division is required it is
345	// more efficient to compute the remainder from the result of the division
346	// rather than use the remainder instruction. The instructions are legalized
347	// directly because the DivRemPairsPass performs the transformation at the IR
348	// level.
349	if (Subtarget.isISA3_0()) {
350	setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
351	setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
352	setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
353	setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
354	} else {
355	setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Expand);
356	setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Expand);
357	setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Expand);
358	setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Expand);
359	}
360
361	// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362	setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
363	setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
364	setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
365	setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
366	setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
367	setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
368	setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
369	setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
370
371	// Handle constrained floating-point operations of scalar.
372	// TODO: Handle SPE specific operation.
373	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f32, Action: Legal);
374	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f32, Action: Legal);
375	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f32, Action: Legal);
376	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f32, Action: Legal);
377	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
378
379	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f64, Action: Legal);
380	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f64, Action: Legal);
381	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f64, Action: Legal);
382	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f64, Action: Legal);
383
384	if (!Subtarget.hasSPE()) {
385	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f32, Action: Legal);
386	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f64, Action: Legal);
387	}
388
389	if (Subtarget.hasVSX()) {
390	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f32, Action: Legal);
391	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f64, Action: Legal);
392	}
393
394	if (Subtarget.hasFSQRT()) {
395	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f32, Action: Legal);
396	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f64, Action: Legal);
397	}
398
399	if (Subtarget.hasFPRND()) {
400	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f32, Action: Legal);
401	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f32, Action: Legal);
402	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f32, Action: Legal);
403	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f32, Action: Legal);
404
405	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f64, Action: Legal);
406	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f64, Action: Legal);
407	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f64, Action: Legal);
408	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f64, Action: Legal);
409	}
410
411	// We don't support sin/cos/sqrt/fmod/pow
412	setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Expand);
413	setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Expand);
414	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
415	setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
416	setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Expand);
417	setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Expand);
418	setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Expand);
419	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
420	setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
421	setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Expand);
422
423	// MASS transformation for LLVM intrinsics with replicating fast-math flag
424	// to be consistent to PPCGenScalarMASSEntries pass
425	if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
426	setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Custom);
427	setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Custom);
428	setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Custom);
429	setOperationAction(Op: ISD::FLOG, VT: MVT::f64, Action: Custom);
430	setOperationAction(Op: ISD::FLOG10, VT: MVT::f64, Action: Custom);
431	setOperationAction(Op: ISD::FEXP, VT: MVT::f64, Action: Custom);
432	setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Custom);
433	setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Custom);
434	setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Custom);
435	setOperationAction(Op: ISD::FLOG, VT: MVT::f32, Action: Custom);
436	setOperationAction(Op: ISD::FLOG10, VT: MVT::f32, Action: Custom);
437	setOperationAction(Op: ISD::FEXP, VT: MVT::f32, Action: Custom);
438	}
439
440	if (Subtarget.hasSPE()) {
441	setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Expand);
442	setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Expand);
443	} else {
444	setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Legal);
445	setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Legal);
446	setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom);
447	setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom);
448	}
449
450	if (Subtarget.hasSPE())
451	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
452
453	// If we're enabling GP optimizations, use hardware square root
454	if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
455	setOperationAction(Op: ISD::FSQRT, VT: MVT::f64, Action: Expand);
456
457	if (!Subtarget.hasFSQRT() &&
458	!(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
459	setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Expand);
460
461	if (Subtarget.hasFCPSGN()) {
462	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Legal);
463	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Legal);
464	} else {
465	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Expand);
466	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Expand);
467	}
468
469	if (Subtarget.hasFPRND()) {
470	setOperationAction(Op: ISD::FFLOOR, VT: MVT::f64, Action: Legal);
471	setOperationAction(Op: ISD::FCEIL, VT: MVT::f64, Action: Legal);
472	setOperationAction(Op: ISD::FTRUNC, VT: MVT::f64, Action: Legal);
473	setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
474
475	setOperationAction(Op: ISD::FFLOOR, VT: MVT::f32, Action: Legal);
476	setOperationAction(Op: ISD::FCEIL, VT: MVT::f32, Action: Legal);
477	setOperationAction(Op: ISD::FTRUNC, VT: MVT::f32, Action: Legal);
478	setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
479	}
480
481	// Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482	// instruction xxbrd to speed up scalar BSWAP64.
483	if (Subtarget.isISA3_1()) {
484	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Legal);
485	setOperationAction(Op: ISD::BSWAP, VT: MVT::i64, Action: Legal);
486	} else {
487	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Expand);
488	setOperationAction(Op: ISD::BSWAP, VT: MVT::i64,
489	Action: (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490	}
491
492	// CTPOP or CTTZ were introduced in P8/P9 respectively
493	if (Subtarget.isISA3_0()) {
494	setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Legal);
495	setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Legal);
496	} else {
497	setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Expand);
498	setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Expand);
499	}
500
501	if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
502	setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Legal);
503	setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Legal);
504	} else {
505	setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Expand);
506	setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Expand);
507	}
508
509	// PowerPC does not have ROTR
510	setOperationAction(Op: ISD::ROTR, VT: MVT::i32 , Action: Expand);
511	setOperationAction(Op: ISD::ROTR, VT: MVT::i64 , Action: Expand);
512
513	if (!Subtarget.useCRBits()) {
514	// PowerPC does not have Select
515	setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Expand);
516	setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Expand);
517	setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Expand);
518	setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Expand);
519	}
520
521	// PowerPC wants to turn select_cc of FP into fsel when possible.
522	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Custom);
523	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Custom);
524
525	// PowerPC wants to optimize integer setcc a bit
526	if (!Subtarget.useCRBits())
527	setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Custom);
528
529	if (Subtarget.hasFPU()) {
530	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
531	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
532	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Legal);
533
534	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
535	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
536	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Legal);
537	}
538
539	// PowerPC does not have BRCOND which requires SetCC
540	if (!Subtarget.useCRBits())
541	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
542
543	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
544
545	if (Subtarget.hasSPE()) {
546	// SPE has built-in conversions
547	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Legal);
548	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Legal);
549	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Legal);
550	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Legal);
551	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Legal);
552	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Legal);
553
554	// SPE supports signaling compare of f32/f64.
555	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
556	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
557	} else {
558	// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
559	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
560	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
561
562	// PowerPC does not have [U\|S]INT_TO_FP
563	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Expand);
564	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Expand);
565	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Expand);
566	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Expand);
567	}
568
569	if (Subtarget.hasDirectMove() && isPPC64) {
570	setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Legal);
571	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Legal);
572	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Legal);
573	setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Legal);
574
575	setOperationAction(Op: ISD::STRICT_LRINT, VT: MVT::f64, Action: Custom);
576	setOperationAction(Op: ISD::STRICT_LRINT, VT: MVT::f32, Action: Custom);
577	setOperationAction(Op: ISD::STRICT_LLRINT, VT: MVT::f64, Action: Custom);
578	setOperationAction(Op: ISD::STRICT_LLRINT, VT: MVT::f32, Action: Custom);
579	setOperationAction(Op: ISD::STRICT_LROUND, VT: MVT::f64, Action: Custom);
580	setOperationAction(Op: ISD::STRICT_LROUND, VT: MVT::f32, Action: Custom);
581	setOperationAction(Op: ISD::STRICT_LLROUND, VT: MVT::f64, Action: Custom);
582	setOperationAction(Op: ISD::STRICT_LLROUND, VT: MVT::f32, Action: Custom);
583	} else {
584	setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Expand);
585	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Expand);
586	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Expand);
587	setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Expand);
588	}
589
590	// We cannot sextinreg(i1). Expand to shifts.
591	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
592
593	// Custom handling for PowerPC ucmp instruction
594	setOperationAction(Op: ISD::UCMP, VT: MVT::i32, Action: Custom);
595	setOperationAction(Op: ISD::UCMP, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
596
597	// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598	// SjLj exception handling but a light-weight setjmp/longjmp replacement to
599	// support continuation, user-level threading, and etc.. As a result, no
600	// other SjLj exception interfaces are implemented and please don't build
601	// your own exception handling based on them.
602	// LLVM/Clang supports zero-cost DWARF exception handling.
603	setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
604	setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
605
606	// We want to legalize GlobalAddress and ConstantPool nodes into the
607	// appropriate instructions to materialize the address.
608	setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
609	setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i32, Action: Custom);
610	setOperationAction(Op: ISD::BlockAddress, VT: MVT::i32, Action: Custom);
611	setOperationAction(Op: ISD::ConstantPool, VT: MVT::i32, Action: Custom);
612	setOperationAction(Op: ISD::JumpTable, VT: MVT::i32, Action: Custom);
613	setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i64, Action: Custom);
614	setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i64, Action: Custom);
615	setOperationAction(Op: ISD::BlockAddress, VT: MVT::i64, Action: Custom);
616	setOperationAction(Op: ISD::ConstantPool, VT: MVT::i64, Action: Custom);
617	setOperationAction(Op: ISD::JumpTable, VT: MVT::i64, Action: Custom);
618
619	// TRAP is legal.
620	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
621
622	// TRAMPOLINE is custom lowered.
623	setOperationAction(Op: ISD::INIT_TRAMPOLINE, VT: MVT::Other, Action: Custom);
624	setOperationAction(Op: ISD::ADJUST_TRAMPOLINE, VT: MVT::Other, Action: Custom);
625
626	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
627	setOperationAction(Op: ISD::VASTART , VT: MVT::Other, Action: Custom);
628
629	if (Subtarget.is64BitELFABI()) {
630	// VAARG always uses double-word chunks, so promote anything smaller.
631	setOperationAction(Op: ISD::VAARG, VT: MVT::i1, Action: Promote);
632	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i1, DestVT: MVT::i64);
633	setOperationAction(Op: ISD::VAARG, VT: MVT::i8, Action: Promote);
634	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i8, DestVT: MVT::i64);
635	setOperationAction(Op: ISD::VAARG, VT: MVT::i16, Action: Promote);
636	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i16, DestVT: MVT::i64);
637	setOperationAction(Op: ISD::VAARG, VT: MVT::i32, Action: Promote);
638	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i32, DestVT: MVT::i64);
639	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
640	} else if (Subtarget.is32BitELFABI()) {
641	// VAARG is custom lowered with the 32-bit SVR4 ABI.
642	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
643	setOperationAction(Op: ISD::VAARG, VT: MVT::i64, Action: Custom);
644	} else
645	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
646
647	// VACOPY is custom lowered with the 32-bit SVR4 ABI.
648	if (Subtarget.is32BitELFABI())
649	setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Custom);
650	else
651	setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Expand);
652
653	// Use the default implementation.
654	setOperationAction(Op: ISD::VAEND , VT: MVT::Other, Action: Expand);
655	setOperationAction(Op: ISD::STACKSAVE , VT: MVT::Other, Action: Expand);
656	setOperationAction(Op: ISD::STACKRESTORE , VT: MVT::Other, Action: Custom);
657	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32 , Action: Custom);
658	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64 , Action: Custom);
659	setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i32, Action: Custom);
660	setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i64, Action: Custom);
661	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
662	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom);
663
664	if (Subtarget.isISA3_0() && isPPC64) {
665	setOperationAction(Op: ISD::VP_STORE, VT: MVT::v16i1, Action: Custom);
666	setOperationAction(Op: ISD::VP_STORE, VT: MVT::v8i1, Action: Custom);
667	setOperationAction(Op: ISD::VP_STORE, VT: MVT::v4i1, Action: Custom);
668	setOperationAction(Op: ISD::VP_STORE, VT: MVT::v2i1, Action: Custom);
669	setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v16i1, Action: Custom);
670	setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v8i1, Action: Custom);
671	setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v4i1, Action: Custom);
672	setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v2i1, Action: Custom);
673	}
674
675	// We want to custom lower some of our intrinsics.
676	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
677	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::f64, Action: Custom);
678	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::ppcf128, Action: Custom);
679	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v4f32, Action: Custom);
680	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v2f64, Action: Custom);
681
682	// To handle counter-based loop conditions.
683	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i1, Action: Custom);
684	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
685
686	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i8, Action: Custom);
687	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i16, Action: Custom);
688	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
689	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
690
691	// Comparisons that require checking two conditions.
692	if (Subtarget.hasSPE()) {
693	setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
694	setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
695	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
696	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f64, Action: Expand);
697	}
698	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
699	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f64, Action: Expand);
700	setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
701	setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
702	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
703	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f64, Action: Expand);
704	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
705	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
706	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f32, Action: Expand);
707	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f64, Action: Expand);
708	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
709	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
710
711	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal);
712	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal);
713
714	if (Subtarget.has64BitSupport()) {
715	// They also have instructions for converting between i64 and fp.
716	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
717	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Expand);
718	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
719	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Expand);
720	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
721	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
722	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
723	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
724	// This is just the low 32 bits of a (signed) fp->i64 conversion.
725	// We cannot do this with Promote because i64 is not a legal type.
726	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
727	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
728
729	if (Subtarget.hasLFIWAX() \|\| isPPC64) {
730	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
731	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
732	}
733	} else {
734	// PowerPC does not have FP_TO_UINT on 32-bit implementations.
735	if (Subtarget.hasSPE()) {
736	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Legal);
737	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Legal);
738	} else {
739	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Expand);
740	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Expand);
741	}
742	}
743
744	// With the instructions enabled under FPCVT, we can do everything.
745	if (Subtarget.hasFPCVT()) {
746	if (Subtarget.has64BitSupport()) {
747	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
748	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Custom);
749	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
750	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Custom);
751	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
752	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Custom);
753	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
754	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
755	}
756
757	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
758	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
759	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
760	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Custom);
761	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
762	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
763	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
764	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Custom);
765	}
766
767	if (Subtarget.use64BitRegs()) {
768	// 64-bit PowerPC implementations can support i64 types directly
769	addRegisterClass(VT: MVT::i64, RC: &PPC::G8RCRegClass);
770	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
771	setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
772	// 64-bit PowerPC wants to expand i128 shifts itself.
773	setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Custom);
774	setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Custom);
775	setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Custom);
776	} else {
777	// 32-bit PowerPC wants to expand i64 shifts itself.
778	setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i32, Action: Custom);
779	setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i32, Action: Custom);
780	setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i32, Action: Custom);
781	}
782
783	// PowerPC has better expansions for funnel shifts than the generic
784	// TargetLowering::expandFunnelShift.
785	if (Subtarget.has64BitSupport()) {
786	setOperationAction(Op: ISD::FSHL, VT: MVT::i64, Action: Custom);
787	setOperationAction(Op: ISD::FSHR, VT: MVT::i64, Action: Custom);
788	}
789	setOperationAction(Op: ISD::FSHL, VT: MVT::i32, Action: Custom);
790	setOperationAction(Op: ISD::FSHR, VT: MVT::i32, Action: Custom);
791
792	if (Subtarget.hasVSX()) {
793	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
794	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
795	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
796	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
797	setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
798	setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
799	setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
800	setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
801	setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
802	setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
803	}
804
805	if (Subtarget.hasAltivec()) {
806	for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
807	setOperationAction(Op: ISD::AVGCEILS, VT, Action: Legal);
808	setOperationAction(Op: ISD::AVGCEILU, VT, Action: Legal);
809	setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
810	setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
811	setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
812	setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
813	}
814	// First set operation action for all vector types to expand. Then we
815	// will selectively turn on ones that can be effectively codegen'd.
816	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
817	// add/sub are legal for all supported vector VT's.
818	setOperationAction(Op: ISD::ADD, VT, Action: Legal);
819	setOperationAction(Op: ISD::SUB, VT, Action: Legal);
820
821	// For v2i64, these are only valid with P8Vector. This is corrected after
822	// the loop.
823	if (VT.getSizeInBits() <= `128` && VT.getScalarSizeInBits() <= `64`) {
824	setOperationAction(Op: ISD::SMAX, VT, Action: Legal);
825	setOperationAction(Op: ISD::SMIN, VT, Action: Legal);
826	setOperationAction(Op: ISD::UMAX, VT, Action: Legal);
827	setOperationAction(Op: ISD::UMIN, VT, Action: Legal);
828	}
829	else {
830	setOperationAction(Op: ISD::SMAX, VT, Action: Expand);
831	setOperationAction(Op: ISD::SMIN, VT, Action: Expand);
832	setOperationAction(Op: ISD::UMAX, VT, Action: Expand);
833	setOperationAction(Op: ISD::UMIN, VT, Action: Expand);
834	}
835
836	if (Subtarget.hasVSX()) {
837	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT, Action: Legal);
838	setOperationAction(Op: ISD::FMINNUM_IEEE, VT, Action: Legal);
839	setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
840	setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
841	setOperationAction(Op: ISD::FCANONICALIZE, VT, Action: Legal);
842	}
843
844	// Vector instructions introduced in P8
845	if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
846	setOperationAction(Op: ISD::CTPOP, VT, Action: Legal);
847	setOperationAction(Op: ISD::CTLZ, VT, Action: Legal);
848	}
849	else {
850	setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
851	setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
852	}
853
854	// Vector instructions introduced in P9
855	if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
856	setOperationAction(Op: ISD::CTTZ, VT, Action: Legal);
857	else
858	setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
859
860	// We promote all shuffles to v16i8.
861	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Promote);
862	AddPromotedToType (Opc: ISD::VECTOR_SHUFFLE, OrigVT: VT, DestVT: MVT::v16i8);
863
864	// We promote all non-typed operations to v4i32.
865	setOperationAction(Op: ISD::AND , VT, Action: Promote);
866	AddPromotedToType (Opc: ISD::AND , OrigVT: VT, DestVT: MVT::v4i32);
867	setOperationAction(Op: ISD::OR , VT, Action: Promote);
868	AddPromotedToType (Opc: ISD::OR , OrigVT: VT, DestVT: MVT::v4i32);
869	setOperationAction(Op: ISD::XOR , VT, Action: Promote);
870	AddPromotedToType (Opc: ISD::XOR , OrigVT: VT, DestVT: MVT::v4i32);
871	setOperationAction(Op: ISD::LOAD , VT, Action: Promote);
872	AddPromotedToType (Opc: ISD::LOAD , OrigVT: VT, DestVT: MVT::v4i32);
873	setOperationAction(Op: ISD::SELECT, VT, Action: Promote);
874	AddPromotedToType (Opc: ISD::SELECT, OrigVT: VT, DestVT: MVT::v4i32);
875	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
876	setOperationAction(Op: ISD::SELECT_CC, VT, Action: Promote);
877	AddPromotedToType (Opc: ISD::SELECT_CC, OrigVT: VT, DestVT: MVT::v4i32);
878	setOperationAction(Op: ISD::STORE, VT, Action: Promote);
879	AddPromotedToType (Opc: ISD::STORE, OrigVT: VT, DestVT: MVT::v4i32);
880
881	// No other operations are legal.
882	setOperationAction(Op: ISD::MUL , VT, Action: Expand);
883	setOperationAction(Op: ISD::SDIV, VT, Action: Expand);
884	setOperationAction(Op: ISD::SREM, VT, Action: Expand);
885	setOperationAction(Op: ISD::UDIV, VT, Action: Expand);
886	setOperationAction(Op: ISD::UREM, VT, Action: Expand);
887	setOperationAction(Op: ISD::FDIV, VT, Action: Expand);
888	setOperationAction(Op: ISD::FREM, VT, Action: Expand);
889	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
890	setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
891	setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
892	setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
893	setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
894	setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
895	setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
896	setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
897	setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
898	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
899	setOperationAction(Op: ISD::FFLOOR, VT, Action: Expand);
900	setOperationAction(Op: ISD::FCEIL, VT, Action: Expand);
901	setOperationAction(Op: ISD::FTRUNC, VT, Action: Expand);
902	setOperationAction(Op: ISD::FRINT, VT, Action: Expand);
903	setOperationAction(Op: ISD::FLDEXP, VT, Action: Expand);
904	setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Expand);
905	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Expand);
906	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Expand);
907	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Expand);
908	setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
909	setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
910	setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
911	setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
912	setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
913	setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
914	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Expand);
915	setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
916	setOperationAction(Op: ISD::BSWAP, VT, Action: Expand);
917	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT, Action: Expand);
918	setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
919	setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
920
921	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922	setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
923	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
924	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
925	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
926	}
927	}
928	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::v4i32, Action: Expand);
929	if (!Subtarget.hasP8Vector()) {
930	setOperationAction(Op: ISD::SMAX, VT: MVT::v2i64, Action: Expand);
931	setOperationAction(Op: ISD::SMIN, VT: MVT::v2i64, Action: Expand);
932	setOperationAction(Op: ISD::UMAX, VT: MVT::v2i64, Action: Expand);
933	setOperationAction(Op: ISD::UMIN, VT: MVT::v2i64, Action: Expand);
934	}
935
936	// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937	// with merges, splats, etc.
938	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v16i8, Action: Custom);
939
940	// Vector truncates to sub-word integer that fit in an Altivec/VSX register
941	// are cheap, so handle them before they get expanded to scalar.
942	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v8i8, Action: Custom);
943	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i8, Action: Custom);
944	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i8, Action: Custom);
945	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i16, Action: Custom);
946	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i16, Action: Custom);
947
948	setOperationAction(Op: ISD::AND , VT: MVT::v4i32, Action: Legal);
949	setOperationAction(Op: ISD::OR , VT: MVT::v4i32, Action: Legal);
950	setOperationAction(Op: ISD::XOR , VT: MVT::v4i32, Action: Legal);
951	setOperationAction(Op: ISD::LOAD , VT: MVT::v4i32, Action: Legal);
952	setOperationAction(Op: ISD::SELECT, VT: MVT::v4i32,
953	Action: Subtarget.useCRBits() ? Legal : Expand);
954	setOperationAction(Op: ISD::STORE , VT: MVT::v4i32, Action: Legal);
955	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
956	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
957	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
958	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
959	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
960	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
961	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
962	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
963	setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal);
964	setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal);
965	setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal);
966	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
967
968	// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969	setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Custom);
970	// With hasAltivec set, we can lower ISD::ROTL to vrl(b\|h\|w).
971	if (Subtarget.hasAltivec())
972	for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
973	setOperationAction(Op: ISD::ROTL, VT, Action: Legal);
974	// With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975	if (Subtarget.hasP8Altivec())
976	setOperationAction(Op: ISD::ROTL, VT: MVT::v2i64, Action: Legal);
977
978	addRegisterClass(VT: MVT::v4f32, RC: &PPC::VRRCRegClass);
979	addRegisterClass(VT: MVT::v4i32, RC: &PPC::VRRCRegClass);
980	addRegisterClass(VT: MVT::v8i16, RC: &PPC::VRRCRegClass);
981	addRegisterClass(VT: MVT::v16i8, RC: &PPC::VRRCRegClass);
982
983	setOperationAction(Op: ISD::MUL, VT: MVT::v4f32, Action: Legal);
984	setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal);
985
986	if (Subtarget.hasVSX()) {
987	setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal);
988	setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal);
989	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
990	}
991
992	if (Subtarget.hasP8Altivec())
993	setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Legal);
994	else
995	setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Custom);
996
997	if (Subtarget.isISA3_1()) {
998	setOperationAction(Op: ISD::MUL, VT: MVT::v2i64, Action: Legal);
999	setOperationAction(Op: ISD::MULHS, VT: MVT::v2i64, Action: Legal);
1000	setOperationAction(Op: ISD::MULHU, VT: MVT::v2i64, Action: Legal);
1001	setOperationAction(Op: ISD::MULHS, VT: MVT::v4i32, Action: Legal);
1002	setOperationAction(Op: ISD::MULHU, VT: MVT::v4i32, Action: Legal);
1003	setOperationAction(Op: ISD::UDIV, VT: MVT::v2i64, Action: Legal);
1004	setOperationAction(Op: ISD::SDIV, VT: MVT::v2i64, Action: Legal);
1005	setOperationAction(Op: ISD::UDIV, VT: MVT::v4i32, Action: Legal);
1006	setOperationAction(Op: ISD::SDIV, VT: MVT::v4i32, Action: Legal);
1007	setOperationAction(Op: ISD::UREM, VT: MVT::v2i64, Action: Legal);
1008	setOperationAction(Op: ISD::SREM, VT: MVT::v2i64, Action: Legal);
1009	setOperationAction(Op: ISD::UREM, VT: MVT::v4i32, Action: Legal);
1010	setOperationAction(Op: ISD::SREM, VT: MVT::v4i32, Action: Legal);
1011	setOperationAction(Op: ISD::UREM, VT: MVT::v1i128, Action: Legal);
1012	setOperationAction(Op: ISD::SREM, VT: MVT::v1i128, Action: Legal);
1013	setOperationAction(Op: ISD::UDIV, VT: MVT::v1i128, Action: Legal);
1014	setOperationAction(Op: ISD::SDIV, VT: MVT::v1i128, Action: Legal);
1015	setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Legal);
1016	}
1017
1018	setOperationAction(Op: ISD::MUL, VT: MVT::v8i16, Action: Legal);
1019	setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Custom);
1020
1021	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Custom);
1022	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Custom);
1023	// LE is P8+/64-bit so direct moves are supported and these operations
1024	// are legal. The custom transformation requires 64-bit since we need a
1025	// pair of stores that will cover a 128-bit load for P10.
1026	if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1027	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Custom);
1028	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Custom);
1029	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Custom);
1030	}
1031
1032	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v16i8, Action: Custom);
1033	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v8i16, Action: Custom);
1034	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4i32, Action: Custom);
1035	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4f32, Action: Custom);
1036
1037	// Altivec does not contain unordered floating-point compare instructions
1038	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v4f32, Action: Expand);
1039	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v4f32, Action: Expand);
1040	setCondCodeAction(CCs: ISD::SETO, VT: MVT::v4f32, Action: Expand);
1041	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v4f32, Action: Expand);
1042
1043	if (Subtarget.hasVSX()) {
1044	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal);
1045	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1046	if (Subtarget.hasP8Vector()) {
1047	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal);
1048	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Legal);
1049	}
1050	if (Subtarget.hasDirectMove() && isPPC64) {
1051	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Legal);
1052	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Legal);
1053	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Legal);
1054	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Legal);
1055	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1056	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1057	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1058	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1059	}
1060	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1061
1062	// The nearbyint variants are not allowed to raise the inexact exception
1063	// so we can only code-gen them with fpexcept.ignore.
1064	setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f64, Action: Custom);
1065	setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f32, Action: Custom);
1066	setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v2f64, Action: Custom);
1067	setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v4f32, Action: Custom);
1068
1069	setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal);
1070	setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal);
1071	setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal);
1072	setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal);
1073	setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal);
1074	setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
1075	setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
1076
1077	setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal);
1078	setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal);
1079	setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
1080	setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
1081
1082	setOperationAction(Op: ISD::MUL, VT: MVT::v2f64, Action: Legal);
1083	setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal);
1084
1085	setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal);
1086	setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal);
1087
1088	// Share the Altivec comparison restrictions.
1089	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v2f64, Action: Expand);
1090	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v2f64, Action: Expand);
1091	setCondCodeAction(CCs: ISD::SETO, VT: MVT::v2f64, Action: Expand);
1092	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v2f64, Action: Expand);
1093
1094	setOperationAction(Op: ISD::LOAD, VT: MVT::v2f64, Action: Legal);
1095	setOperationAction(Op: ISD::STORE, VT: MVT::v2f64, Action: Legal);
1096
1097	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2f64, Action: Custom);
1098
1099	if (Subtarget.hasP8Vector())
1100	addRegisterClass(VT: MVT::f32, RC: &PPC::VSSRCRegClass);
1101
1102	addRegisterClass(VT: MVT::f64, RC: &PPC::VSFRCRegClass);
1103
1104	addRegisterClass(VT: MVT::v4i32, RC: &PPC::VSRCRegClass);
1105	addRegisterClass(VT: MVT::v4f32, RC: &PPC::VSRCRegClass);
1106	addRegisterClass(VT: MVT::v2f64, RC: &PPC::VSRCRegClass);
1107
1108	if (Subtarget.hasP8Altivec()) {
1109	setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Legal);
1110	setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Legal);
1111	setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Legal);
1112
1113	// 128 bit shifts can be accomplished via 3 instructions for SHL and
1114	// SRL, but not for SRA because of the instructions available:
1115	// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116	// doing
1117	setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Expand);
1118	setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Expand);
1119	setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1120
1121	setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Legal);
1122	}
1123	else {
1124	setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Expand);
1125	setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Expand);
1126	setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Expand);
1127
1128	setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Custom);
1129
1130	// VSX v2i64 only supports non-arithmetic operations.
1131	setOperationAction(Op: ISD::ADD, VT: MVT::v2i64, Action: Expand);
1132	setOperationAction(Op: ISD::SUB, VT: MVT::v2i64, Action: Expand);
1133	}
1134
1135	if (Subtarget.isISA3_1())
1136	setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Legal);
1137	else
1138	setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Expand);
1139
1140	setOperationAction(Op: ISD::LOAD, VT: MVT::v2i64, Action: Promote);
1141	AddPromotedToType (Opc: ISD::LOAD, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1142	setOperationAction(Op: ISD::STORE, VT: MVT::v2i64, Action: Promote);
1143	AddPromotedToType (Opc: ISD::STORE, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1144
1145	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2i64, Action: Custom);
1146
1147	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1148	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1149	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1150	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1151	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1152	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1153	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1154	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1155
1156	// Custom handling for partial vectors of integers converted to
1157	// floating point. We already have optimal handling for v2i32 through
1158	// the DAG combine, so those aren't necessary.
1159	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1160	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1161	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1162	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1163	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1164	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1165	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1166	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1167	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1168	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1169	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1170	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1171	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1172	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1173	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1174	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1175
1176	setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal);
1177	setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal);
1178	setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal);
1179	setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal);
1180	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v4f32, Action: Legal);
1181	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v2f64, Action: Legal);
1182
1183	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2i64, Action: Custom);
1184	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2f64, Action: Custom);
1185
1186	// Handle constrained floating-point operations of vector.
1187	// The predictor is `hasVSX` because altivec instruction has
1188	// no exception but VSX vector instruction has.
1189	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal);
1190	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal);
1191	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal);
1192	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal);
1193	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal);
1194	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal);
1195	setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v4f32, Action: Legal);
1196	setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v4f32, Action: Legal);
1197	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal);
1198	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal);
1199	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal);
1200	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal);
1201	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal);
1202
1203	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal);
1204	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal);
1205	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal);
1206	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal);
1207	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal);
1208	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal);
1209	setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v2f64, Action: Legal);
1210	setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v2f64, Action: Legal);
1211	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal);
1212	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal);
1213	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal);
1214	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal);
1215	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal);
1216
1217	addRegisterClass(VT: MVT::v2i64, RC: &PPC::VSRCRegClass);
1218	addRegisterClass(VT: MVT::f128, RC: &PPC::VRRCRegClass);
1219
1220	for (MVT FPT : MVT::fp_valuetypes())
1221	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: FPT, Action: Expand);
1222
1223	// Expand the SELECT to SELECT_CC
1224	setOperationAction(Op: ISD::SELECT, VT: MVT::f128, Action: Expand);
1225
1226	setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
1227	setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
1228
1229	// No implementation for these ops for PowerPC.
1230	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f128, Action: Expand);
1231	setOperationAction(Op: ISD::FSIN, VT: MVT::f128, Action: Expand);
1232	setOperationAction(Op: ISD::FCOS, VT: MVT::f128, Action: Expand);
1233	setOperationAction(Op: ISD::FPOW, VT: MVT::f128, Action: Expand);
1234	setOperationAction(Op: ISD::FPOWI, VT: MVT::f128, Action: Expand);
1235	setOperationAction(Op: ISD::FREM, VT: MVT::f128, Action: LibCall);
1236	}
1237
1238	if (Subtarget.hasP8Altivec()) {
1239	addRegisterClass(VT: MVT::v2i64, RC: &PPC::VRRCRegClass);
1240	addRegisterClass(VT: MVT::v1i128, RC: &PPC::VRRCRegClass);
1241	}
1242
1243	if (Subtarget.hasP9Vector()) {
1244	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Custom);
1245	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
1246
1247	// Test data class instructions store results in CR bits.
1248	if (Subtarget.useCRBits()) {
1249	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom);
1250	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom);
1251	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f128, Action: Custom);
1252	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::ppcf128, Action: Custom);
1253	}
1254
1255	// 128 bit shifts can be accomplished via 3 instructions for SHL and
1256	// SRL, but not for SRA because of the instructions available:
1257	// VS{RL} and VS{RL}O.
1258	setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Legal);
1259	setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Legal);
1260	setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1261
1262	setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: Legal);
1263	setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: Legal);
1264	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Legal);
1265	setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Legal);
1266	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Legal);
1267
1268	setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Legal);
1269	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f128, Action: Expand);
1270	setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f128, Action: Expand);
1271	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f128, Action: Expand);
1272	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f128, Action: Expand);
1273	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f128, Action: Expand);
1274	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f128, Action: Expand);
1275
1276	setOperationAction(Op: ISD::FTRUNC, VT: MVT::f128, Action: Legal);
1277	setOperationAction(Op: ISD::FRINT, VT: MVT::f128, Action: Legal);
1278	setOperationAction(Op: ISD::FFLOOR, VT: MVT::f128, Action: Legal);
1279	setOperationAction(Op: ISD::FCEIL, VT: MVT::f128, Action: Legal);
1280	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f128, Action: Legal);
1281	setOperationAction(Op: ISD::FROUND, VT: MVT::f128, Action: Legal);
1282
1283	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f64, Action: Legal);
1284	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f32, Action: Legal);
1285	setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
1286
1287	// Handle constrained floating-point operations of fp128
1288	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f128, Action: Legal);
1289	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f128, Action: Legal);
1290	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f128, Action: Legal);
1291	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f128, Action: Legal);
1292	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Legal);
1293	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f128, Action: Legal);
1294	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Legal);
1295	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f64, Action: Legal);
1296	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
1297	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f128, Action: Legal);
1298	setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f128, Action: Legal);
1299	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f128, Action: Legal);
1300	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f128, Action: Legal);
1301	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f128, Action: Legal);
1302	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f128, Action: Legal);
1303	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v2f32, Action: Custom);
1304	setOperationAction(Op: ISD::BSWAP, VT: MVT::v8i16, Action: Legal);
1305	setOperationAction(Op: ISD::BSWAP, VT: MVT::v4i32, Action: Legal);
1306	setOperationAction(Op: ISD::BSWAP, VT: MVT::v2i64, Action: Legal);
1307	setOperationAction(Op: ISD::BSWAP, VT: MVT::v1i128, Action: Legal);
1308	} else if (Subtarget.hasVSX()) {
1309	setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Promote);
1310	setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Promote);
1311
1312	AddPromotedToType(Opc: ISD::LOAD, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1313	AddPromotedToType(Opc: ISD::STORE, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1314
1315	// Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316	// fp_to_uint and int_to_fp.
1317	setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: LibCall);
1318	setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: LibCall);
1319
1320	setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Expand);
1321	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
1322	setOperationAction(Op: ISD::FNEG, VT: MVT::f128, Action: Expand);
1323	setOperationAction(Op: ISD::FABS, VT: MVT::f128, Action: Expand);
1324	setOperationAction(Op: ISD::FSQRT, VT: MVT::f128, Action: Expand);
1325	setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand);
1326	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand);
1327
1328	// Expand the fp_extend if the target type is fp128.
1329	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Expand);
1330	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Expand);
1331
1332	// Expand the fp_round if the source type is fp128.
1333	for (MVT VT : {MVT::f32, MVT::f64}) {
1334	setOperationAction(Op: ISD::FP_ROUND, VT, Action: Custom);
1335	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Custom);
1336	}
1337
1338	setOperationAction(Op: ISD::SETCC, VT: MVT::f128, Action: Custom);
1339	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Custom);
1340	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Custom);
1341	setOperationAction(Op: ISD::BR_CC, VT: MVT::f128, Action: Expand);
1342
1343	// Lower following f128 select_cc pattern:
1344	// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1345	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1346
1347	// We need to handle f128 SELECT_CC with integer result type.
1348	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Custom);
1349	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
1350	}
1351
1352	if (Subtarget.hasP9Altivec()) {
1353	if (Subtarget.isISA3_1()) {
1354	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1355	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1356	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1357	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1358	} else {
1359	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Custom);
1360	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Custom);
1361	}
1362	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i8, Action: Legal);
1363	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i16, Action: Legal);
1364	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i32, Action: Legal);
1365	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i8, Action: Legal);
1366	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i16, Action: Legal);
1367	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i32, Action: Legal);
1368	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i64, Action: Legal);
1369
1370	setOperationAction(Op: ISD::ABDU, VT: MVT::v16i8, Action: Legal);
1371	setOperationAction(Op: ISD::ABDU, VT: MVT::v8i16, Action: Legal);
1372	setOperationAction(Op: ISD::ABDU, VT: MVT::v4i32, Action: Legal);
1373	setOperationAction(Op: ISD::ABDS, VT: MVT::v4i32, Action: Legal);
1374	}
1375
1376	if (Subtarget.hasP10Vector()) {
1377	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1378	}
1379	}
1380
1381	if (Subtarget.pairedVectorMemops()) {
1382	addRegisterClass(VT: MVT::v256i1, RC: &PPC::VSRpRCRegClass);
1383	setOperationAction(Op: ISD::LOAD, VT: MVT::v256i1, Action: Custom);
1384	setOperationAction(Op: ISD::STORE, VT: MVT::v256i1, Action: Custom);
1385	}
1386	if (Subtarget.hasMMA()) {
1387	if (Subtarget.isISAFuture()) {
1388	addRegisterClass(VT: MVT::v512i1, RC: &PPC::WACCRCRegClass);
1389	addRegisterClass(VT: MVT::v1024i1, RC: &PPC::DMRRCRegClass);
1390	addRegisterClass(VT: MVT::v2048i1, RC: &PPC::DMRpRCRegClass);
1391	setOperationAction(Op: ISD::LOAD, VT: MVT::v1024i1, Action: Custom);
1392	setOperationAction(Op: ISD::STORE, VT: MVT::v1024i1, Action: Custom);
1393	setOperationAction(Op: ISD::LOAD, VT: MVT::v2048i1, Action: Custom);
1394	setOperationAction(Op: ISD::STORE, VT: MVT::v2048i1, Action: Custom);
1395	} else {
1396	addRegisterClass(VT: MVT::v512i1, RC: &PPC::UACCRCRegClass);
1397	}
1398	setOperationAction(Op: ISD::LOAD, VT: MVT::v512i1, Action: Custom);
1399	setOperationAction(Op: ISD::STORE, VT: MVT::v512i1, Action: Custom);
1400	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v512i1, Action: Custom);
1401	}
1402
1403	if (Subtarget.has64BitSupport())
1404	setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal);
1405
1406	if (Subtarget.isISA3_1())
1407	setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Legal);
1408
1409	setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: isPPC64 ? Legal : Custom);
1410
1411	if (!isPPC64) {
1412	setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i64, Action: Expand);
1413	setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i64, Action: Expand);
1414	}
1415
1416	if (shouldInlineQuadwordAtomics()) {
1417	setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom);
1418	setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom);
1419	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i128, Action: Custom);
1420	}
1421
1422	setBooleanContents(ZeroOrOneBooleanContent);
1423
1424	if (Subtarget.hasAltivec()) {
1425	// Altivec instructions set fields to all zeros or all ones.
1426	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1427	}
1428
1429	if (shouldInlineQuadwordAtomics())
1430	setMaxAtomicSizeInBitsSupported(`128`);
1431	else if (isPPC64)
1432	setMaxAtomicSizeInBitsSupported(`64`);
1433	else
1434	setMaxAtomicSizeInBitsSupported(`32`);
1435
1436	setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438	// We have target-specific dag combine patterns for the following nodes:
1439	setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
1440	ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
1441	ISD::BUILD_VECTOR});
1442	if (Subtarget.hasFPCVT())
1443	setTargetDAGCombine(ISD::UINT_TO_FP);
1444	setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1445	if (Subtarget.useCRBits())
1446	setTargetDAGCombine(ISD::BRCOND);
1447	setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1448	ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1449
1450	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1451
1452	setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1453
1454	if (Subtarget.useCRBits()) {
1455	setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1456	}
1457
1458	// With 32 condition bits, we don't need to sink (and duplicate) compares
1459	// aggressively in CodeGenPrep.
1460	if (Subtarget.useCRBits()) {
1461	setJumpIsExpensive();
1462	}
1463
1464	// TODO: The default entry number is set to 64. This stops most jump table
1465	// generation on PPC. But it is good for current PPC HWs because the indirect
1466	// branch instruction mtctr to the jump table may lead to bad branch predict.
1467	// Re-evaluate this value on future HWs that can do better with mtctr.
1468	setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1469
1470	// The default minimum of largest number in a BitTest cluster is 3.
1471	setMinimumBitTestCmps(PPCMinimumBitTestCmps);
1472
1473	setMinFunctionAlignment(Align (`4`));
1474	setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? `8` : `32`);
1475
1476	auto CPUDirective = Subtarget.getCPUDirective();
1477	switch (CPUDirective) {
1478	default: break;
1479	case PPC::DIR_970:
1480	case PPC::DIR_A2:
1481	case PPC::DIR_E500:
1482	case PPC::DIR_E500mc:
1483	case PPC::DIR_E5500:
1484	case PPC::DIR_PWR4:
1485	case PPC::DIR_PWR5:
1486	case PPC::DIR_PWR5X:
1487	case PPC::DIR_PWR6:
1488	case PPC::DIR_PWR6X:
1489	case PPC::DIR_PWR7:
1490	case PPC::DIR_PWR8:
1491	case PPC::DIR_PWR9:
1492	case PPC::DIR_PWR10:
1493	case PPC::DIR_PWR11:
1494	case PPC::DIR_PWR_FUTURE:
1495	setPrefLoopAlignment(Align (`16`));
1496	setPrefFunctionAlignment(Align (`16`));
1497	break;
1498	}
1499
1500	if (Subtarget.enableMachineScheduler())
1501	setSchedulingPreference(Sched::Source);
1502	else
1503	setSchedulingPreference(Sched::Hybrid);
1504
1505	computeRegisterProperties(TRI: STI.getRegisterInfo());
1506
1507	// The Freescale cores do better with aggressive inlining of memcpy and
1508	// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1509	if (CPUDirective == PPC::DIR_E500mc \|\| CPUDirective == PPC::DIR_E5500) {
1510	MaxStoresPerMemset = `32`;
1511	MaxStoresPerMemsetOptSize = `16`;
1512	MaxStoresPerMemcpy = `32`;
1513	MaxStoresPerMemcpyOptSize = `8`;
1514	MaxStoresPerMemmove = `32`;
1515	MaxStoresPerMemmoveOptSize = `8`;
1516	} else if (CPUDirective == PPC::DIR_A2) {
1517	// The A2 also benefits from (very) aggressive inlining of memcpy and
1518	// friends. The overhead of a the function call, even when warm, can be
1519	// over one hundred cycles.
1520	MaxStoresPerMemset = `128`;
1521	MaxStoresPerMemcpy = `128`;
1522	MaxStoresPerMemmove = `128`;
1523	MaxLoadsPerMemcmp = `128`;
1524	} else {
1525	MaxLoadsPerMemcmp = `8`;
1526	MaxLoadsPerMemcmpOptSize = `4`;
1527	}
1528
1529	// Enable generation of STXVP instructions by default for mcpu=future.
1530	if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1531	DisableAutoPairedVecSt.getNumOccurrences() == `0`)
1532	DisableAutoPairedVecSt = false;
1533
1534	IsStrictFPEnabled = true;
1535
1536	// Let the subtarget (CPU) decide if a predictable select is more expensive
1537	// than the corresponding branch. This information is used in CGP to decide
1538	// when to convert selects into branches.
1539	PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1540
1541	GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1542	}
1543
1544	// ********************************* NOTE **********************************
1545	// For selecting load and store instructions, the addressing modes are defined
1546	// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1547	// patterns to match the load the store instructions.
1548	//
1549	// The TD definitions for the addressing modes correspond to their respective
1550	// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1551	// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1552	// address mode flags of a particular node. Afterwards, the computed address
1553	// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1554	// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1555	// accordingly, based on the preferred addressing mode.
1556	//
1557	// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1558	// MemOpFlags contains all the possible flags that can be used to compute the
1559	// optimal addressing mode for load and store instructions.
1560	// AddrMode contains all the possible load and store addressing modes available
1561	// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1562	//
1563	// When adding new load and store instructions, it is possible that new address
1564	// flags may need to be added into MemOpFlags, and a new addressing mode will
1565	// need to be added to AddrMode. An entry of the new addressing mode (consisting
1566	// of the minimal and main distinguishing address flags for the new load/store
1567	// instructions) will need to be added into initializeAddrModeMap() below.
1568	// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1569	// need to be updated to account for selecting the optimal addressing mode.
1570	// *****************************************************************************
1571	/// Initialize the map that relates the different addressing modes of the load
1572	/// and store instructions to a set of flags. This ensures the load/store
1573	/// instruction is correctly matched during instruction selection.
1574	void PPCTargetLowering::initializeAddrModeMap() {
1575	AddrModesMap [PPC::AM_DForm] = {
1576	// LWZ, STW
1577	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_WordInt,
1578	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_WordInt,
1579	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
1580	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
1581	// LBZ, LHZ, STB, STH
1582	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
1583	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
1584	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
1585	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
1586	// LHA
1587	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
1588	PPC::MOF_SExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
1589	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
1590	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
1591	// LFS, LFD, STFS, STFD
1592	PPC::MOF_RPlusSImm16 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1593	PPC::MOF_RPlusLo \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1594	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1595	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1596	};
1597	AddrModesMap [PPC::AM_DSForm] = {
1598	// LWA
1599	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_WordInt,
1600	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
1601	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
1602	// LD, STD
1603	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_DoubleWordInt,
1604	PPC::MOF_NotAddNorCst \| PPC::MOF_DoubleWordInt,
1605	PPC::MOF_AddrIsSImm32 \| PPC::MOF_DoubleWordInt,
1606	// DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1607	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1608	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1609	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1610	};
1611	AddrModesMap [PPC::AM_DQForm] = {
1612	// LXV, STXV
1613	PPC::MOF_RPlusSImm16Mult16 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1614	PPC::MOF_NotAddNorCst \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1615	PPC::MOF_AddrIsSImm32 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1616	};
1617	AddrModesMap [PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 \|
1618	PPC::MOF_SubtargetP10};
1619	// TODO: Add mapping for quadword load/store.
1620	}
1621
1622	/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1623	/// the desired ByVal argument alignment.
1624	static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1625	if (MaxAlign == MaxMaxAlign)
1626	return;
1627	if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
1628	if (MaxMaxAlign >= `32` &&
1629	VTy->getPrimitiveSizeInBits().getFixedValue() >= `256`)
1630	MaxAlign = Align (`32`);
1631	else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= `128` &&
1632	MaxAlign < `16`)
1633	MaxAlign = Align (`16`);
1634	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
1635	Align EltAlign;
1636	getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign, MaxMaxAlign);
1637	if (EltAlign > MaxAlign)
1638	MaxAlign = EltAlign;
1639	} else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
1640	for (auto *EltTy : STy->elements()) {
1641	Align EltAlign;
1642	getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign, MaxMaxAlign);
1643	if (EltAlign > MaxAlign)
1644	MaxAlign = EltAlign;
1645	if (MaxAlign == MaxMaxAlign)
1646	break;
1647	}
1648	}
1649	}
1650
1651	/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1652	/// function arguments in the caller parameter area.
1653	Align PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1654	const DataLayout &DL) const {
1655	// 16byte and wider vectors are passed on 16byte boundary.
1656	// The rest is 8 on PPC64 and 4 on PPC32 boundary.
1657	Align Alignment = Subtarget.isPPC64() ? Align (`8`) : Align (`4`);
1658	if (Subtarget.hasAltivec())
1659	getMaxByValAlign(Ty, MaxAlign&: Alignment, MaxMaxAlign: Align (`16`));
1660	return Alignment;
1661	}
1662
1663	bool PPCTargetLowering::useSoftFloat() const {
1664	return Subtarget.useSoftFloat();
1665	}
1666
1667	bool PPCTargetLowering::hasSPE() const {
1668	return Subtarget.hasSPE();
1669	}
1670
1671	bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1672	return VT.isScalarInteger();
1673	}
1674
1675	bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1676	Type VectorTy, unsigned* ElemSizeInBits, unsigned &Index) const {
1677	if (!Subtarget.isPPC64() \|\| !Subtarget.hasVSX())
1678	return false;
1679
1680	if (auto *VTy = dyn_cast<VectorType>(Val: VectorTy)) {
1681	if (VTy->getScalarType()->isIntegerTy()) {
1682	// ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1683	if (ElemSizeInBits == `32`) {
1684	Index = Subtarget.isLittleEndian() ? `2` : `1`;
1685	return true;
1686	}
1687	if (ElemSizeInBits == `64`) {
1688	Index = Subtarget.isLittleEndian() ? `1` : `0`;
1689	return true;
1690	}
1691	}
1692	}
1693	return false;
1694	}
1695
1696	EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1697	EVT VT) const {
1698	if (!VT.isVector())
1699	return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1700
1701	return VT.changeVectorElementTypeToInteger();
1702	}
1703
1704	bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1705	assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1706	return true;
1707	}
1708
1709	//===----------------------------------------------------------------------===//
1710	// Node matching predicates, for use by the tblgen matching code.
1711	//===----------------------------------------------------------------------===//
1712
1713	/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1714	static bool isFloatingPointZero(SDValue Op) {
1715	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
1716	return CFP->getValueAPF().isZero();
1717	else if (ISD::isEXTLoad(N: Op.getNode()) \|\| ISD::isNON_EXTLoad(N: Op.getNode())) {
1718	// Maybe this has already been legalized into the constant pool?
1719	if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val: Op.getOperand(i: `1`)))
1720	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: CP->getConstVal()))
1721	return CFP->getValueAPF().isZero();
1722	}
1723	return false;
1724	}
1725
1726	/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1727	/// true if Op is undef or if it matches the specified value.
1728	static bool isConstantOrUndef(int Op, int Val) {
1729	return Op < `0` \|\| Op == Val;
1730	}
1731
1732	/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1733	/// VPKUHUM instruction.
1734	/// The ShuffleKind distinguishes between big-endian operations with
1735	/// two different inputs (0), either-endian operations with two identical
1736	/// inputs (1), and little-endian operations with two different inputs (2).
1737	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1738	bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1739	SelectionDAG &DAG) {
1740	bool IsLE = DAG.getDataLayout().isLittleEndian();
1741	if (ShuffleKind == `0`) {
1742	if (IsLE)
1743	return false;
1744	for (unsigned i = `0`; i != `16`; ++i)
1745	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`+`1`))
1746	return false;
1747	} else if (ShuffleKind == `2`) {
1748	if (!IsLE)
1749	return false;
1750	for (unsigned i = `0`; i != `16`; ++i)
1751	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`))
1752	return false;
1753	} else if (ShuffleKind == `1`) {
1754	unsigned j = IsLE ? `0` : `1`;
1755	for (unsigned i = `0`; i != `8`; ++i)
1756	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`+j) \|\|
1757	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j))
1758	return false;
1759	}
1760	return true;
1761	}
1762
1763	/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1764	/// VPKUWUM instruction.
1765	/// The ShuffleKind distinguishes between big-endian operations with
1766	/// two different inputs (0), either-endian operations with two identical
1767	/// inputs (1), and little-endian operations with two different inputs (2).
1768	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1769	bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1770	SelectionDAG &DAG) {
1771	bool IsLE = DAG.getDataLayout().isLittleEndian();
1772	if (ShuffleKind == `0`) {
1773	if (IsLE)
1774	return false;
1775	for (unsigned i = `0`; i != `16`; i += `2`)
1776	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+`2`) \|\|
1777	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`3`))
1778	return false;
1779	} else if (ShuffleKind == `2`) {
1780	if (!IsLE)
1781	return false;
1782	for (unsigned i = `0`; i != `16`; i += `2`)
1783	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`) \|\|
1784	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`1`))
1785	return false;
1786	} else if (ShuffleKind == `1`) {
1787	unsigned j = IsLE ? `0` : `2`;
1788	for (unsigned i = `0`; i != `8`; i += `2`)
1789	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+j) \|\|
1790	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+j+`1`) \|\|
1791	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j) \|\|
1792	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`9`), Val: i*`2`+j+`1`))
1793	return false;
1794	}
1795	return true;
1796	}
1797
1798	/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1799	/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1800	/// current subtarget.
1801	///
1802	/// The ShuffleKind distinguishes between big-endian operations with
1803	/// two different inputs (0), either-endian operations with two identical
1804	/// inputs (1), and little-endian operations with two different inputs (2).
1805	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1806	bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1807	SelectionDAG &DAG) {
1808	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1809	if (!Subtarget.hasP8Vector())
1810	return false;
1811
1812	bool IsLE = DAG.getDataLayout().isLittleEndian();
1813	if (ShuffleKind == `0`) {
1814	if (IsLE)
1815	return false;
1816	for (unsigned i = `0`; i != `16`; i += `4`)
1817	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+`4`) \|\|
1818	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`5`) \|\|
1819	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+`6`) \|\|
1820	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+`7`))
1821	return false;
1822	} else if (ShuffleKind == `2`) {
1823	if (!IsLE)
1824	return false;
1825	for (unsigned i = `0`; i != `16`; i += `4`)
1826	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`) \|\|
1827	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`1`) \|\|
1828	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+`2`) \|\|
1829	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+`3`))
1830	return false;
1831	} else if (ShuffleKind == `1`) {
1832	unsigned j = IsLE ? `0` : `4`;
1833	for (unsigned i = `0`; i != `8`; i += `4`)
1834	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+j) \|\|
1835	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+j+`1`) \|\|
1836	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+j+`2`) \|\|
1837	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+j+`3`) \|\|
1838	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j) \|\|
1839	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`9`), Val: i*`2`+j+`1`) \|\|
1840	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`10`), Val: i*`2`+j+`2`) \|\|
1841	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`11`), Val: i*`2`+j+`3`))
1842	return false;
1843	}
1844	return true;
1845	}
1846
1847	/// isVMerge - Common function, used to match vmrg shuffles.*
1848	///
1849	static bool isVMerge(ShuffleVectorSDNode N, unsigned* UnitSize,
1850	unsigned LHSStart, unsigned RHSStart) {
1851	if (N->getValueType(ResNo: `0`) != MVT::v16i8)
1852	return false;
1853	assert((UnitSize == `1` \|\| UnitSize == `2` \|\| UnitSize == `4`) &&
1854	"Unsupported merge size!");
1855
1856	for (unsigned i = `0`; i != `8`/UnitSize; ++i) // Step over units
1857	for (unsigned j = `0`; j != UnitSize; ++j) { // Step over bytes within unit
1858	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: iUnitSize`2`+j),
1859	Val: LHSStart+j+i*UnitSize) \|\|
1860	!isConstantOrUndef(Op: N->getMaskElt(Idx: iUnitSize`2`+UnitSize+j),
1861	Val: RHSStart+j+i*UnitSize))
1862	return false;
1863	}
1864	return true;
1865	}
1866
1867	/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1868	/// a VMRGL instruction with the specified unit size (1,2 or 4 bytes).*
1869	/// The ShuffleKind distinguishes between big-endian merges with two
1870	/// different inputs (0), either-endian merges with two identical inputs (1),
1871	/// and little-endian merges with two different inputs (2). For the latter,
1872	/// the input operands are swapped (see PPCInstrAltivec.td).
1873	bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode N, unsigned* UnitSize,
1874	unsigned ShuffleKind, SelectionDAG &DAG) {
1875	if (DAG.getDataLayout().isLittleEndian()) {
1876	if (ShuffleKind == `1`) // unary
1877	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `0`);
1878	else if (ShuffleKind == `2`) // swapped
1879	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `16`);
1880	else
1881	return false;
1882	} else {
1883	if (ShuffleKind == `1`) // unary
1884	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `8`);
1885	else if (ShuffleKind == `0`) // normal
1886	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `24`);
1887	else
1888	return false;
1889	}
1890	}
1891
1892	/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1893	/// a VMRGH instruction with the specified unit size (1,2 or 4 bytes).*
1894	/// The ShuffleKind distinguishes between big-endian merges with two
1895	/// different inputs (0), either-endian merges with two identical inputs (1),
1896	/// and little-endian merges with two different inputs (2). For the latter,
1897	/// the input operands are swapped (see PPCInstrAltivec.td).
1898	bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode N, unsigned* UnitSize,
1899	unsigned ShuffleKind, SelectionDAG &DAG) {
1900	if (DAG.getDataLayout().isLittleEndian()) {
1901	if (ShuffleKind == `1`) // unary
1902	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `8`);
1903	else if (ShuffleKind == `2`) // swapped
1904	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `24`);
1905	else
1906	return false;
1907	} else {
1908	if (ShuffleKind == `1`) // unary
1909	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `0`);
1910	else if (ShuffleKind == `0`) // normal
1911	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `16`);
1912	else
1913	return false;
1914	}
1915	}
1916
1917	/**
1918	* Common function used to match vmrgew and vmrgow shuffles
1919	*
1920	* The indexOffset determines whether to look for even or odd words in
1921	* the shuffle mask. This is based on the of the endianness of the target
1922	* machine.
1923	* - Little Endian:
1924	* - Use offset of 0 to check for odd elements
1925	* - Use offset of 4 to check for even elements
1926	* - Big Endian:
1927	* - Use offset of 0 to check for even elements
1928	* - Use offset of 4 to check for odd elements
1929	* A detailed description of the vector element ordering for little endian and
1930	* big endian can be found at
1931	* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1932	* Targeting your applications - what little endian and big endian IBM XL C/C++
1933	* compiler differences mean to you
1934	*
1935	* The mask to the shuffle vector instruction specifies the indices of the
1936	* elements from the two input vectors to place in the result. The elements are
1937	* numbered in array-access order, starting with the first vector. These vectors
1938	* are always of type v16i8, thus each vector will contain 16 elements of size
1939	* 8. More info on the shuffle vector can be found in the
1940	* http://llvm.org/docs/LangRef.html#shufflevector-instruction
1941	* Language Reference.
1942	*
1943	* The RHSStartValue indicates whether the same input vectors are used (unary)
1944	* or two different input vectors are used, based on the following:
1945	* - If the instruction uses the same vector for both inputs, the range of the
1946	* indices will be 0 to 15. In this case, the RHSStart value passed should
1947	* be 0.
1948	* - If the instruction has two different vectors then the range of the
1949	* indices will be 0 to 31. In this case, the RHSStart value passed should
1950	* be 16 (indices 0-15 specify elements in the first vector while indices 16
1951	* to 31 specify elements in the second vector).
1952	*
1953	* \param[in] N The shuffle vector SD Node to analyze
1954	* \param[in] IndexOffset Specifies whether to look for even or odd elements
1955	* \param[in] RHSStartValue Specifies the starting index for the righthand input
1956	* vector to the shuffle_vector instruction
1957	* \return true iff this shuffle vector represents an even or odd word merge
1958	*/
1959	static bool isVMerge(ShuffleVectorSDNode N, unsigned* IndexOffset,
1960	unsigned RHSStartValue) {
1961	if (N->getValueType(ResNo: `0`) != MVT::v16i8)
1962	return false;
1963
1964	for (unsigned i = `0`; i < `2`; ++i)
1965	for (unsigned j = `0`; j < `4`; ++j)
1966	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*`4`+j),
1967	Val: i*RHSStartValue+j+IndexOffset) \|\|
1968	!isConstantOrUndef(Op: N->getMaskElt(Idx: i*`4`+j+`8`),
1969	Val: i*RHSStartValue+j+IndexOffset+`8`))
1970	return false;
1971	return true;
1972	}
1973
1974	/**
1975	* Determine if the specified shuffle mask is suitable for the vmrgew or
1976	* vmrgow instructions.
1977	*
1978	* \param[in] N The shuffle vector SD Node to analyze
1979	* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1980	* \param[in] ShuffleKind Identify the type of merge:
1981	* - 0 = big-endian merge with two different inputs;
1982	* - 1 = either-endian merge with two identical inputs;
1983	* - 2 = little-endian merge with two different inputs (inputs are swapped for
1984	* little-endian merges).
1985	* \param[in] DAG The current SelectionDAG
1986	* \return true iff this shuffle mask
1987	*/
1988	bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode N, bool* CheckEven,
1989	unsigned ShuffleKind, SelectionDAG &DAG) {
1990	if (DAG.getDataLayout().isLittleEndian()) {
1991	unsigned indexOffset = CheckEven ? `4` : `0`;
1992	if (ShuffleKind == `1`) // Unary
1993	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `0`);
1994	else if (ShuffleKind == `2`) // swapped
1995	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `16`);
1996	else
1997	return false;
1998	}
1999	else {
2000	unsigned indexOffset = CheckEven ? `0` : `4`;
2001	if (ShuffleKind == `1`) // Unary
2002	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `0`);
2003	else if (ShuffleKind == `0`) // Normal
2004	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `16`);
2005	else
2006	return false;
2007	}
2008	return false;
2009	}
2010
2011	/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2012	/// amount, otherwise return -1.
2013	/// The ShuffleKind distinguishes between big-endian operations with two
2014	/// different inputs (0), either-endian operations with two identical inputs
2015	/// (1), and little-endian operations with two different inputs (2). For the
2016	/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2017	int PPC::isVSLDOIShuffleMask(SDNode N, unsigned* ShuffleKind,
2018	SelectionDAG &DAG) {
2019	if (N->getValueType(ResNo: `0`) != MVT::v16i8)
2020	return -`1`;
2021
2022	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2023
2024	// Find the first non-undef value in the shuffle mask.
2025	unsigned i;
2026	for (i = `0`; i != `16` && SVOp->getMaskElt(Idx: i) < `0`; ++i)
2027	/search/;
2028
2029	if (i == `16`) return -`1`; // all undef.
2030
2031	// Otherwise, check to see if the rest of the elements are consecutively
2032	// numbered from this value.
2033	unsigned ShiftAmt = SVOp->getMaskElt(Idx: i);
2034	if (ShiftAmt < i) return -`1`;
2035
2036	ShiftAmt -= i;
2037	bool isLE = DAG.getDataLayout().isLittleEndian();
2038
2039	if ((ShuffleKind == `0` && !isLE) \|\| (ShuffleKind == `2` && isLE)) {
2040	// Check the rest of the elements to see if they are consecutive.
2041	for (++i; i != `16`; ++i)
2042	if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: ShiftAmt+i))
2043	return -`1`;
2044	} else if (ShuffleKind == `1`) {
2045	// Check the rest of the elements to see if they are consecutive.
2046	for (++i; i != `16`; ++i)
2047	if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: (ShiftAmt+i) & `15`))
2048	return -`1`;
2049	} else
2050	return -`1`;
2051
2052	if (isLE)
2053	ShiftAmt = `16` - ShiftAmt;
2054
2055	return ShiftAmt;
2056	}
2057
2058	/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2059	/// specifies a splat of a single element that is suitable for input to
2060	/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2061	bool PPC::isSplatShuffleMask(ShuffleVectorSDNode N, unsigned* EltSize) {
2062	EVT VT = N->getValueType(ResNo: `0`);
2063	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
2064	return EltSize == `8` && N->getMaskElt(Idx: `0`) == N->getMaskElt(Idx: `1`);
2065
2066	assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2067	EltSize <= `8` && "Can only handle 1,2,4,8 byte element sizes");
2068
2069	// The consecutive indices need to specify an element, not part of two
2070	// different elements. So abandon ship early if this isn't the case.
2071	if (N->getMaskElt(Idx: `0`) % EltSize != `0`)
2072	return false;
2073
2074	// This is a splat operation if each element of the permute is the same, and
2075	// if the value doesn't reference the second vector.
2076	unsigned ElementBase = N->getMaskElt(Idx: `0`);
2077
2078	// FIXME: Handle UNDEF elements too!
2079	if (ElementBase >= `16`)
2080	return false;
2081
2082	// Check that the indices are consecutive, in the case of a multi-byte element
2083	// splatted with a v16i8 mask.
2084	for (unsigned i = `1`; i != EltSize; ++i)
2085	if (N->getMaskElt(Idx: i) < `0` \|\| N->getMaskElt(Idx: i) != (int)(i+ElementBase))
2086	return false;
2087
2088	for (unsigned i = EltSize, e = `16`; i != e; i += EltSize) {
2089	// An UNDEF element is a sequence of UNDEF bytes.
2090	if (N->getMaskElt(Idx: i) < `0`) {
2091	for (unsigned j = `1`; j != EltSize; ++j)
2092	if (N->getMaskElt(Idx: i + j) >= `0`)
2093	return false;
2094	} else
2095	for (unsigned j = `0`; j != EltSize; ++j)
2096	if (N->getMaskElt(Idx: i + j) != N->getMaskElt(Idx: j))
2097	return false;
2098	}
2099	return true;
2100	}
2101
2102	/// Check that the mask is shuffling N byte elements. Within each N byte
2103	/// element of the mask, the indices could be either in increasing or
2104	/// decreasing order as long as they are consecutive.
2105	/// \param[in] N the shuffle vector SD Node to analyze
2106	/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2107	/// Word/DoubleWord/QuadWord).
2108	/// \param[in] StepLen the delta indices number among the N byte element, if
2109	/// the mask is in increasing/decreasing order then it is 1/-1.
2110	/// \return true iff the mask is shuffling N byte elements.
2111	static bool isNByteElemShuffleMask(ShuffleVectorSDNode N, unsigned* Width,
2112	int StepLen) {
2113	assert((Width == `2` \|\| Width == `4` \|\| Width == `8` \|\| Width == `16`) &&
2114	"Unexpected element width.");
2115	assert((StepLen == `1` \|\| StepLen == -`1`) && "Unexpected element width.");
2116
2117	unsigned NumOfElem = `16` / Width;
2118	unsigned MaskVal[`16`]; // Width is never greater than 16
2119	for (unsigned i = `0`; i < NumOfElem; ++i) {
2120	MaskVal[`0`] = N->getMaskElt(Idx: i * Width);
2121	if ((StepLen == `1`) && (MaskVal[`0`] % Width)) {
2122	return false;
2123	} else if ((StepLen == -`1`) && ((MaskVal[`0`] + `1`) % Width)) {
2124	return false;
2125	}
2126
2127	for (unsigned int j = `1`; j < Width; ++j) {
2128	MaskVal[j] = N->getMaskElt(Idx: i * Width + j);
2129	if (MaskVal[j] != MaskVal[j-`1`] + StepLen) {
2130	return false;
2131	}
2132	}
2133	}
2134
2135	return true;
2136	}
2137
2138	bool PPC::isXXINSERTWMask(ShuffleVectorSDNode N, unsigned* &ShiftElts,
2139	unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2140	if (!isNByteElemShuffleMask(N, Width: `4`, StepLen: `1`))
2141	return false;
2142
2143	// Now we look at mask elements 0,4,8,12
2144	unsigned M0 = N->getMaskElt(Idx: `0`) / `4`;
2145	unsigned M1 = N->getMaskElt(Idx: `4`) / `4`;
2146	unsigned M2 = N->getMaskElt(Idx: `8`) / `4`;
2147	unsigned M3 = N->getMaskElt(Idx: `12`) / `4`;
2148	unsigned LittleEndianShifts[] = { `2`, `1`, `0`, `3` };
2149	unsigned BigEndianShifts[] = { `3`, `0`, `1`, `2` };
2150
2151	// Below, let H and L be arbitrary elements of the shuffle mask
2152	// where H is in the range [4,7] and L is in the range [0,3].
2153	// H, 1, 2, 3 or L, 5, 6, 7
2154	if ((M0 > `3` && M1 == `1` && M2 == `2` && M3 == `3`) \|\|
2155	(M0 < `4` && M1 == `5` && M2 == `6` && M3 == `7`)) {
2156	ShiftElts = IsLE ? LittleEndianShifts[M0 & `0x3`] : BigEndianShifts[M0 & `0x3`];
2157	InsertAtByte = IsLE ? `12` : `0`;
2158	Swap = M0 < `4`;
2159	return true;
2160	}
2161	// 0, H, 2, 3 or 4, L, 6, 7
2162	if ((M1 > `3` && M0 == `0` && M2 == `2` && M3 == `3`) \|\|
2163	(M1 < `4` && M0 == `4` && M2 == `6` && M3 == `7`)) {
2164	ShiftElts = IsLE ? LittleEndianShifts[M1 & `0x3`] : BigEndianShifts[M1 & `0x3`];
2165	InsertAtByte = IsLE ? `8` : `4`;
2166	Swap = M1 < `4`;
2167	return true;
2168	}
2169	// 0, 1, H, 3 or 4, 5, L, 7
2170	if ((M2 > `3` && M0 == `0` && M1 == `1` && M3 == `3`) \|\|
2171	(M2 < `4` && M0 == `4` && M1 == `5` && M3 == `7`)) {
2172	ShiftElts = IsLE ? LittleEndianShifts[M2 & `0x3`] : BigEndianShifts[M2 & `0x3`];
2173	InsertAtByte = IsLE ? `4` : `8`;
2174	Swap = M2 < `4`;
2175	return true;
2176	}
2177	// 0, 1, 2, H or 4, 5, 6, L
2178	if ((M3 > `3` && M0 == `0` && M1 == `1` && M2 == `2`) \|\|
2179	(M3 < `4` && M0 == `4` && M1 == `5` && M2 == `6`)) {
2180	ShiftElts = IsLE ? LittleEndianShifts[M3 & `0x3`] : BigEndianShifts[M3 & `0x3`];
2181	InsertAtByte = IsLE ? `0` : `12`;
2182	Swap = M3 < `4`;
2183	return true;
2184	}
2185
2186	// If both vector operands for the shuffle are the same vector, the mask will
2187	// contain only elements from the first one and the second one will be undef.
2188	if (N->getOperand(Num: `1`).isUndef()) {
2189	ShiftElts = `0`;
2190	Swap = true;
2191	unsigned XXINSERTWSrcElem = IsLE ? `2` : `1`;
2192	if (M0 == XXINSERTWSrcElem && M1 == `1` && M2 == `2` && M3 == `3`) {
2193	InsertAtByte = IsLE ? `12` : `0`;
2194	return true;
2195	}
2196	if (M0 == `0` && M1 == XXINSERTWSrcElem && M2 == `2` && M3 == `3`) {
2197	InsertAtByte = IsLE ? `8` : `4`;
2198	return true;
2199	}
2200	if (M0 == `0` && M1 == `1` && M2 == XXINSERTWSrcElem && M3 == `3`) {
2201	InsertAtByte = IsLE ? `4` : `8`;
2202	return true;
2203	}
2204	if (M0 == `0` && M1 == `1` && M2 == `2` && M3 == XXINSERTWSrcElem) {
2205	InsertAtByte = IsLE ? `0` : `12`;
2206	return true;
2207	}
2208	}
2209
2210	return false;
2211	}
2212
2213	bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode N, unsigned* &ShiftElts,
2214	bool &Swap, bool IsLE) {
2215	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2216	// Ensure each byte index of the word is consecutive.
2217	if (!isNByteElemShuffleMask(N, Width: `4`, StepLen: `1`))
2218	return false;
2219
2220	// Now we look at mask elements 0,4,8,12, which are the beginning of words.
2221	unsigned M0 = N->getMaskElt(Idx: `0`) / `4`;
2222	unsigned M1 = N->getMaskElt(Idx: `4`) / `4`;
2223	unsigned M2 = N->getMaskElt(Idx: `8`) / `4`;
2224	unsigned M3 = N->getMaskElt(Idx: `12`) / `4`;
2225
2226	// If both vector operands for the shuffle are the same vector, the mask will
2227	// contain only elements from the first one and the second one will be undef.
2228	if (N->getOperand(Num: `1`).isUndef()) {
2229	assert(M0 < `4` && "Indexing into an undef vector?");
2230	if (M1 != (M0 + `1`) % `4` \|\| M2 != (M1 + `1`) % `4` \|\| M3 != (M2 + `1`) % `4`)
2231	return false;
2232
2233	ShiftElts = IsLE ? (`4` - M0) % `4` : M0;
2234	Swap = false;
2235	return true;
2236	}
2237
2238	// Ensure each word index of the ShuffleVector Mask is consecutive.
2239	if (M1 != (M0 + `1`) % `8` \|\| M2 != (M1 + `1`) % `8` \|\| M3 != (M2 + `1`) % `8`)
2240	return false;
2241
2242	if (IsLE) {
2243	if (M0 == `0` \|\| M0 == `7` \|\| M0 == `6` \|\| M0 == `5`) {
2244	// Input vectors don't need to be swapped if the leading element
2245	// of the result is one of the 3 left elements of the second vector
2246	// (or if there is no shift to be done at all).
2247	Swap = false;
2248	ShiftElts = (`8` - M0) % `8`;
2249	} else if (M0 == `4` \|\| M0 == `3` \|\| M0 == `2` \|\| M0 == `1`) {
2250	// Input vectors need to be swapped if the leading element
2251	// of the result is one of the 3 left elements of the first vector
2252	// (or if we're shifting by 4 - thereby simply swapping the vectors).
2253	Swap = true;
2254	ShiftElts = (`4` - M0) % `4`;
2255	}
2256
2257	return true;
2258	} else { // BE
2259	if (M0 == `0` \|\| M0 == `1` \|\| M0 == `2` \|\| M0 == `3`) {
2260	// Input vectors don't need to be swapped if the leading element
2261	// of the result is one of the 4 elements of the first vector.
2262	Swap = false;
2263	ShiftElts = M0;
2264	} else if (M0 == `4` \|\| M0 == `5` \|\| M0 == `6` \|\| M0 == `7`) {
2265	// Input vectors need to be swapped if the leading element
2266	// of the result is one of the 4 elements of the right vector.
2267	Swap = true;
2268	ShiftElts = M0 - `4`;
2269	}
2270
2271	return true;
2272	}
2273	}
2274
2275	bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode N, int* Width) {
2276	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2277
2278	if (!isNByteElemShuffleMask(N, Width, StepLen: -`1`))
2279	return false;
2280
2281	for (int i = `0`; i < `16`; i += Width)
2282	if (N->getMaskElt(Idx: i) != i + Width - `1`)
2283	return false;
2284
2285	return true;
2286	}
2287
2288	bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2289	return isXXBRShuffleMaskHelper(N, Width: `2`);
2290	}
2291
2292	bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2293	return isXXBRShuffleMaskHelper(N, Width: `4`);
2294	}
2295
2296	bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2297	return isXXBRShuffleMaskHelper(N, Width: `8`);
2298	}
2299
2300	bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2301	return isXXBRShuffleMaskHelper(N, Width: `16`);
2302	}
2303
2304	/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2305	/// if the inputs to the instruction should be swapped and set \p DM to the
2306	/// value for the immediate.
2307	/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2308	/// AND element 0 of the result comes from the first input (LE) or second input
2309	/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2310	/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2311	/// mask.
2312	bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode N, unsigned* &DM,
2313	bool &Swap, bool IsLE) {
2314	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2315
2316	// Ensure each byte index of the double word is consecutive.
2317	if (!isNByteElemShuffleMask(N, Width: `8`, StepLen: `1`))
2318	return false;
2319
2320	unsigned M0 = N->getMaskElt(Idx: `0`) / `8`;
2321	unsigned M1 = N->getMaskElt(Idx: `8`) / `8`;
2322	assert(((M0 \| M1) < `4`) && "A mask element out of bounds?");
2323
2324	// If both vector operands for the shuffle are the same vector, the mask will
2325	// contain only elements from the first one and the second one will be undef.
2326	if (N->getOperand(Num: `1`).isUndef()) {
2327	if ((M0 \| M1) < `2`) {
2328	DM = IsLE ? (((~M1) & `1`) << `1`) + ((~M0) & `1`) : (M0 << `1`) + (M1 & `1`);
2329	Swap = false;
2330	return true;
2331	} else
2332	return false;
2333	}
2334
2335	if (IsLE) {
2336	if (M0 > `1` && M1 < `2`) {
2337	Swap = false;
2338	} else if (M0 < `2` && M1 > `1`) {
2339	M0 = (M0 + `2`) % `4`;
2340	M1 = (M1 + `2`) % `4`;
2341	Swap = true;
2342	} else
2343	return false;
2344
2345	// Note: if control flow comes here that means Swap is already set above
2346	DM = (((~M1) & `1`) << `1`) + ((~M0) & `1`);
2347	return true;
2348	} else { // BE
2349	if (M0 < `2` && M1 > `1`) {
2350	Swap = false;
2351	} else if (M0 > `1` && M1 < `2`) {
2352	M0 = (M0 + `2`) % `4`;
2353	M1 = (M1 + `2`) % `4`;
2354	Swap = true;
2355	} else
2356	return false;
2357
2358	// Note: if control flow comes here that means Swap is already set above
2359	DM = (M0 << `1`) + (M1 & `1`);
2360	return true;
2361	}
2362	}
2363
2364
2365	/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2366	/// appropriate for PPC mnemonics (which have a big endian bias - namely
2367	/// elements are counted from the left of the vector register).
2368	unsigned PPC::getSplatIdxForPPCMnemonics(SDNode N, unsigned* EltSize,
2369	SelectionDAG &DAG) {
2370	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2371	assert(isSplatShuffleMask(SVOp, EltSize));
2372	EVT VT = SVOp->getValueType(ResNo: `0`);
2373
2374	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
2375	return DAG.getDataLayout().isLittleEndian() ? `1` - SVOp->getMaskElt(Idx: `0`)
2376	: SVOp->getMaskElt(Idx: `0`);
2377
2378	if (DAG.getDataLayout().isLittleEndian())
2379	return (`16` / EltSize) - `1` - (SVOp->getMaskElt(Idx: `0`) / EltSize);
2380	else
2381	return SVOp->getMaskElt(Idx: `0`) / EltSize;
2382	}
2383
2384	/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2385	/// by using a vspltis[bhw] instruction of the specified element size, return
2386	/// the constant being splatted. The ByteSize field indicates the number of
2387	/// bytes of each element [124] -> [bhw].
2388	SDValue PPC::get_VSPLTI_elt(SDNode N, unsigned* ByteSize, SelectionDAG &DAG) {
2389	SDValue OpVal;
2390
2391	// If ByteSize of the splat is bigger than the element size of the
2392	// build_vector, then we have a case where we are checking for a splat where
2393	// multiple elements of the buildvector are folded together into a single
2394	// logical element of the splat (e.g. "vsplish 1" to splat {0,1}8).*
2395	unsigned EltSize = `16`/N->getNumOperands();
2396	if (EltSize < ByteSize) {
2397	unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2398	SDValue UniquedVals[`4`];
2399	assert(Multiple > `1` && Multiple <= `4` && "How can this happen?");
2400
2401	// See if all of the elements in the buildvector agree across.
2402	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
2403	if (N->getOperand(Num: i).isUndef()) continue;
2404	// If the element isn't a constant, bail fully out.
2405	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: i))) return SDValue ();
2406
2407	if (!UniquedVals[i&(Multiple-`1`)].getNode())
2408	UniquedVals[i&(Multiple-`1`)] = N->getOperand(Num: i);
2409	else if (UniquedVals[i&(Multiple-`1`)] != N->getOperand(Num: i))
2410	return SDValue (); // no match.
2411	}
2412
2413	// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2414	// either constant or undef values that are identical for each chunk. See
2415	// if these chunks can form into a larger vspltis.*
2416
2417	// Check to see if all of the leading entries are either 0 or -1. If
2418	// neither, then this won't fit into the immediate field.
2419	bool LeadingZero = true;
2420	bool LeadingOnes = true;
2421	for (unsigned i = `0`; i != Multiple-`1`; ++i) {
2422	if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2423
2424	LeadingZero &= isNullConstant(V: UniquedVals[i]);
2425	LeadingOnes &= isAllOnesConstant(V: UniquedVals[i]);
2426	}
2427	// Finally, check the least significant entry.
2428	if (LeadingZero) {
2429	if (!UniquedVals[Multiple-`1`].getNode())
2430	return DAG.getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32); // 0,0,0,undef
2431	int Val = UniquedVals[Multiple - `1`]->getAsZExtVal();
2432	if (Val < `16`) // 0,0,0,4 -> vspltisw(4)
2433	return DAG.getTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
2434	}
2435	if (LeadingOnes) {
2436	if (!UniquedVals[Multiple-`1`].getNode())
2437	return DAG.getTargetConstant(Val: ~`0U`, DL: SDLoc (N), VT: MVT::i32); // -1,-1,-1,undef
2438	int Val =cast<ConstantSDNode>(Val&: UniquedVals[Multiple-`1`])->getSExtValue();
2439	if (Val >= -`16`) // -1,-1,-1,-2 -> vspltisw(-2)
2440	return DAG.getTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
2441	}
2442
2443	return SDValue ();
2444	}
2445
2446	// Check to see if this buildvec has a single non-undef value in its elements.
2447	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
2448	if (N->getOperand(Num: i).isUndef()) continue;
2449	if (!OpVal.getNode())
2450	OpVal = N->getOperand(Num: i);
2451	else if (OpVal != N->getOperand(Num: i))
2452	return SDValue ();
2453	}
2454
2455	if (!OpVal.getNode()) return SDValue (); // All UNDEF: use implicit def.
2456
2457	unsigned ValSizeInBytes = EltSize;
2458	uint64_t Value = `0`;
2459	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) {
2460	Value = CN->getZExtValue();
2461	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) {
2462	assert(CN->getValueType(`0`) == MVT::f32 && "Only one legal FP vector type!");
2463	Value = llvm::bit_cast<uint32_t>(from: CN->getValueAPF().convertToFloat());
2464	}
2465
2466	// If the splat value is larger than the element value, then we can never do
2467	// this splat. The only case that we could fit the replicated bits into our
2468	// immediate field for would be zero, and we prefer to use vxor for it.
2469	if (ValSizeInBytes < ByteSize) return SDValue ();
2470
2471	// If the element value is larger than the splat value, check if it consists
2472	// of a repeated bit pattern of size ByteSize.
2473	if (!APInt (ValSizeInBytes * `8`, Value).isSplat(SplatSizeInBits: ByteSize * `8`))
2474	return SDValue ();
2475
2476	// Properly sign extend the value.
2477	int MaskVal = SignExtend32(X: Value, B: ByteSize * `8`);
2478
2479	// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2480	if (MaskVal == `0`) return SDValue ();
2481
2482	// Finally, if this value fits in a 5 bit sext field, return it
2483	if (SignExtend32<`5`>(X: MaskVal) == MaskVal)
2484	return DAG.getSignedTargetConstant(Val: MaskVal, DL: SDLoc (N), VT: MVT::i32);
2485	return SDValue ();
2486	}
2487
2488	//===----------------------------------------------------------------------===//
2489	// Addressing Mode Selection
2490	//===----------------------------------------------------------------------===//
2491
2492	/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2493	/// or 64-bit immediate, and if the value can be accurately represented as a
2494	/// sign extension from a 16-bit value. If so, this returns true and the
2495	/// immediate.
2496	bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2497	if (!isa<ConstantSDNode>(Val: N))
2498	return false;
2499
2500	Imm = (int16_t)N->getAsZExtVal();
2501	if (N->getValueType(ResNo: `0`) == MVT::i32)
2502	return Imm == (int32_t)N->getAsZExtVal();
2503	else
2504	return Imm == (int64_t)N->getAsZExtVal();
2505	}
2506	bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2507	return isIntS16Immediate(N: Op.getNode(), Imm);
2508	}
2509
2510	/// Used when computing address flags for selecting loads and stores.
2511	/// If we have an OR, check if the LHS and RHS are provably disjoint.
2512	/// An OR of two provably disjoint values is equivalent to an ADD.
2513	/// Most PPC load/store instructions compute the effective address as a sum,
2514	/// so doing this conversion is useful.
2515	static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2516	if (N.getOpcode() != ISD::OR)
2517	return false;
2518	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2519	if (!LHSKnown.Zero.getBoolValue())
2520	return false;
2521	KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `1`));
2522	return (~(LHSKnown.Zero \| RHSKnown.Zero) == `0`);
2523	}
2524
2525	/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2526	/// be represented as an indexed [r+r] operation.
2527	bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2528	SDValue &Index,
2529	SelectionDAG &DAG) const {
2530	for (SDNode *U : N ->users()) {
2531	if (MemSDNode *Memop = dyn_cast<MemSDNode>(Val: U)) {
2532	if (Memop->getMemoryVT() == MVT::f64) {
2533	Base = N.getOperand(i: `0`);
2534	Index = N.getOperand(i: `1`);
2535	return true;
2536	}
2537	}
2538	}
2539	return false;
2540	}
2541
2542	/// isIntS34Immediate - This method tests if value of node given can be
2543	/// accurately represented as a sign extension from a 34-bit value. If so,
2544	/// this returns true and the immediate.
2545	bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2546	if (!isa<ConstantSDNode>(Val: N))
2547	return false;
2548
2549	Imm = cast<ConstantSDNode>(Val: N)->getSExtValue();
2550	return isInt<`34`>(x: Imm);
2551	}
2552	bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2553	return isIntS34Immediate(N: Op.getNode(), Imm);
2554	}
2555
2556	/// SelectAddressRegReg - Given the specified addressed, check to see if it
2557	/// can be represented as an indexed [r+r] operation. Returns false if it
2558	/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2559	/// non-zero and N can be represented by a base register plus a signed 16-bit
2560	/// displacement, make a more precise judgement by checking (displacement % \p
2561	/// EncodingAlignment).
2562	bool PPCTargetLowering::SelectAddressRegReg(
2563	SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2564	MaybeAlign EncodingAlignment) const {
2565	// If we have a PC Relative target flag don't select as [reg+reg]. It will be
2566	// a [pc+imm].
2567	if (SelectAddressPCRel(N, Base))
2568	return false;
2569
2570	int16_t Imm = `0`;
2571	if (N.getOpcode() == ISD::ADD) {
2572	// Is there any SPE load/store (f64), which can't handle 16bit offset?
2573	// SPE load/store can only handle 8-bit offsets.
2574	if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2575	return true;
2576	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm) &&
2577	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2578	return false; // r+i
2579	if (N.getOperand(i: `1`).getOpcode() == PPCISD::Lo)
2580	return false; // r+i
2581
2582	Base = N.getOperand(i: `0`);
2583	Index = N.getOperand(i: `1`);
2584	return true;
2585	} else if (N.getOpcode() == ISD::OR) {
2586	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm) &&
2587	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2588	return false; // r+i can fold it if we can.
2589
2590	// If this is an or of disjoint bitfields, we can codegen this as an add
2591	// (for better address arithmetic) if the LHS and RHS of the OR are provably
2592	// disjoint.
2593	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2594
2595	if (LHSKnown.Zero.getBoolValue()) {
2596	KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `1`));
2597	// If all of the bits are known zero on the LHS or RHS, the add won't
2598	// carry.
2599	if (~(LHSKnown.Zero \| RHSKnown.Zero) == `0`) {
2600	Base = N.getOperand(i: `0`);
2601	Index = N.getOperand(i: `1`);
2602	return true;
2603	}
2604	}
2605	}
2606
2607	return false;
2608	}
2609
2610	// If we happen to be doing an i64 load or store into a stack slot that has
2611	// less than a 4-byte alignment, then the frame-index elimination may need to
2612	// use an indexed load or store instruction (because the offset may not be a
2613	// multiple of 4). The extra register needed to hold the offset comes from the
2614	// register scavenger, and it is possible that the scavenger will need to use
2615	// an emergency spill slot. As a result, we need to make sure that a spill slot
2616	// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2617	// stack slot.
2618	static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2619	// FIXME: This does not handle the LWA case.
2620	if (VT != MVT::i64)
2621	return;
2622
2623	// NOTE: We'll exclude negative FIs here, which come from argument
2624	// lowering, because there are no known test cases triggering this problem
2625	// using packed structures (or similar). We can remove this exclusion if
2626	// we find such a test case. The reason why this is so test-case driven is
2627	// because this entire 'fixup' is only to prevent crashes (from the
2628	// register scavenger) on not-really-valid inputs. For example, if we have:
2629	// %a = alloca i1
2630	// %b = bitcast i1* %a to i64*
2631	// store i64 a, i64 b*
2632	// then the store should really be marked as 'align 1', but is not. If it
2633	// were marked as 'align 1' then the indexed form would have been
2634	// instruction-selected initially, and the problem this 'fixup' is preventing
2635	// won't happen regardless.
2636	if (FrameIdx < `0`)
2637	return;
2638
2639	MachineFunction &MF = DAG.getMachineFunction();
2640	MachineFrameInfo &MFI = MF.getFrameInfo();
2641
2642	if (MFI.getObjectAlign(ObjectIdx: FrameIdx) >= Align (`4`))
2643	return;
2644
2645	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2646	FuncInfo->setHasNonRISpills();
2647	}
2648
2649	/// Returns true if the address N can be represented by a base register plus
2650	/// a signed 16-bit displacement [r+imm], and if it is not better
2651	/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2652	/// displacements that are multiples of that value.
2653	bool PPCTargetLowering::SelectAddressRegImm(
2654	SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2655	MaybeAlign EncodingAlignment) const {
2656	// FIXME dl should come from parent load or store, not from address
2657	SDLoc dl(N);
2658
2659	// If we have a PC Relative target flag don't select as [reg+imm]. It will be
2660	// a [pc+imm].
2661	if (SelectAddressPCRel(N, Base))
2662	return false;
2663
2664	// If this can be more profitably realized as r+r, fail.
2665	if (SelectAddressRegReg(N, Base&: Disp, Index&: Base, DAG, EncodingAlignment))
2666	return false;
2667
2668	if (N.getOpcode() == ISD::ADD) {
2669	int16_t imm = `0`;
2670	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) &&
2671	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2672	Disp = DAG.getSignedTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2673	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`))) {
2674	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2675	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2676	} else {
2677	Base = N.getOperand(i: `0`);
2678	}
2679	return true; // [r+i]
2680	} else if (N.getOperand(i: `1`).getOpcode() == PPCISD::Lo) {
2681	// Match LOAD (ADD (X, Lo(G))).
2682	assert(!N.getOperand(`1`).getConstantOperandVal(`1`) &&
2683	"Cannot handle constant offsets yet!");
2684	Disp = N.getOperand(i: `1`).getOperand(i: `0`); // The global address.
2685	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
2686	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
2687	Disp.getOpcode() == ISD::TargetConstantPool \|\|
2688	Disp.getOpcode() == ISD::TargetJumpTable);
2689	Base = N.getOperand(i: `0`);
2690	return true; // [&g+r]
2691	}
2692	} else if (N.getOpcode() == ISD::OR) {
2693	int16_t imm = `0`;
2694	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) &&
2695	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2696	// If this is an or of disjoint bitfields, we can codegen this as an add
2697	// (for better address arithmetic) if the LHS and RHS of the OR are
2698	// provably disjoint.
2699	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2700
2701	if ((LHSKnown.Zero.getZExtValue()\|~(uint64_t)imm) == ~`0ULL`) {
2702	// If all of the bits are known zero on the LHS or RHS, the add won't
2703	// carry.
2704	if (FrameIndexSDNode *FI =
2705	dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`))) {
2706	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2707	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2708	} else {
2709	Base = N.getOperand(i: `0`);
2710	}
2711	Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2712	return true;
2713	}
2714	}
2715	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
2716	// Loading from a constant address.
2717
2718	// If this address fits entirely in a 16-bit sext immediate field, codegen
2719	// this as "d, 0"
2720	int16_t Imm;
2721	if (isIntS16Immediate(N: CN, Imm) &&
2722	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm))) {
2723	Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: CN->getValueType(ResNo: `0`));
2724	Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2725	VT: CN->getValueType(ResNo: `0`));
2726	return true;
2727	}
2728
2729	// Handle 32-bit sext immediates with LIS + addr mode.
2730	if ((CN->getValueType(ResNo: `0`) == MVT::i32 \|\|
2731	(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2732	(!EncodingAlignment \|\|
2733	isAligned(Lhs: *EncodingAlignment, SizeInBytes: CN->getZExtValue()))) {
2734	int Addr = (int)CN->getZExtValue();
2735
2736	// Otherwise, break this down into an LIS + disp.
2737	Disp = DAG.getTargetConstant(Val: (short)Addr, DL: dl, VT: MVT::i32);
2738
2739	Base = DAG.getTargetConstant(Val: (Addr - (signed short)Addr) >> `16`, DL: dl,
2740	VT: MVT::i32);
2741	unsigned Opc = CN->getValueType(ResNo: `0`) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2742	Base = SDValue (DAG.getMachineNode(Opcode: Opc, dl, VT: CN->getValueType(ResNo: `0`), Op1: Base), `0`);
2743	return true;
2744	}
2745	}
2746
2747	Disp = DAG.getTargetConstant(Val: `0`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()));
2748	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2749	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2750	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2751	} else
2752	Base = N;
2753	return true; // [r+0]
2754	}
2755
2756	/// Similar to the 16-bit case but for instructions that take a 34-bit
2757	/// displacement field (prefixed loads/stores).
2758	bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2759	SDValue &Base,
2760	SelectionDAG &DAG) const {
2761	// Only on 64-bit targets.
2762	if (N.getValueType() != MVT::i64)
2763	return false;
2764
2765	SDLoc dl(N);
2766	int64_t Imm = `0`;
2767
2768	if (N.getOpcode() == ISD::ADD) {
2769	if (!isIntS34Immediate(Op: N.getOperand(i: `1`), Imm))
2770	return false;
2771	Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2772	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
2773	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2774	else
2775	Base = N.getOperand(i: `0`);
2776	return true;
2777	}
2778
2779	if (N.getOpcode() == ISD::OR) {
2780	if (!isIntS34Immediate(Op: N.getOperand(i: `1`), Imm))
2781	return false;
2782	// If this is an or of disjoint bitfields, we can codegen this as an add
2783	// (for better address arithmetic) if the LHS and RHS of the OR are
2784	// provably disjoint.
2785	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2786	if ((LHSKnown.Zero.getZExtValue() \| ~(uint64_t)Imm) != ~`0ULL`)
2787	return false;
2788	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
2789	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2790	else
2791	Base = N.getOperand(i: `0`);
2792	Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2793	return true;
2794	}
2795
2796	if (isIntS34Immediate(Op: N, Imm)) { // If the address is a 34-bit const.
2797	Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2798	Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
2799	return true;
2800	}
2801
2802	return false;
2803	}
2804
2805	/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2806	/// represented as an indexed [r+r] operation.
2807	bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2808	SDValue &Index,
2809	SelectionDAG &DAG) const {
2810	// Check to see if we can easily represent this as an [r+r] address. This
2811	// will fail if it thinks that the address is more profitably represented as
2812	// reg+imm, e.g. where imm = 0.
2813	if (SelectAddressRegReg(N, Base, Index, DAG))
2814	return true;
2815
2816	// If the address is the result of an add, we will utilize the fact that the
2817	// address calculation includes an implicit add. However, we can reduce
2818	// register pressure if we do not materialize a constant just for use as the
2819	// index register. We only get rid of the add if it is not an add of a
2820	// value and a 16-bit signed constant and both have a single use.
2821	int16_t imm = `0`;
2822	if (N.getOpcode() == ISD::ADD &&
2823	(!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) \|\|
2824	!N.getOperand(i: `1`).hasOneUse() \|\| !N.getOperand(i: `0`).hasOneUse())) {
2825	Base = N.getOperand(i: `0`);
2826	Index = N.getOperand(i: `1`);
2827	return true;
2828	}
2829
2830	// Otherwise, do it the hard way, using R0 as the base register.
2831	Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2832	VT: N.getValueType());
2833	Index = N;
2834	return true;
2835	}
2836
2837	template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2838	Ty *PCRelCand = dyn_cast<Ty>(N);
2839	return PCRelCand && (PPCInstrInfo::hasPCRelFlag(TF: PCRelCand->getTargetFlags()));
2840	}
2841
2842	/// Returns true if this address is a PC Relative address.
2843	/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2844	/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2845	bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2846	// This is a materialize PC Relative node. Always select this as PC Relative.
2847	Base = N;
2848	if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2849	return true;
2850	if (isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
2851	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
2852	isValidPCRelNode<JumpTableSDNode>(N) \|\|
2853	isValidPCRelNode<BlockAddressSDNode>(N))
2854	return true;
2855	return false;
2856	}
2857
2858	/// Returns true if we should use a direct load into vector instruction
2859	/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2860	static bool usePartialVectorLoads(SDNode N, const* PPCSubtarget& ST) {
2861
2862	// If there are any other uses other than scalar to vector, then we should
2863	// keep it as a scalar load -> direct move pattern to prevent multiple
2864	// loads.
2865	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N);
2866	if (!LD)
2867	return false;
2868
2869	EVT MemVT = LD->getMemoryVT();
2870	if (!MemVT.isSimple())
2871	return false;
2872	switch(MemVT.getSimpleVT().SimpleTy) {
2873	case MVT::i64:
2874	break;
2875	case MVT::i32:
2876	if (!ST.hasP8Vector())
2877	return false;
2878	break;
2879	case MVT::i16:
2880	case MVT::i8:
2881	if (!ST.hasP9Vector())
2882	return false;
2883	break;
2884	default:
2885	return false;
2886	}
2887
2888	SDValue LoadedVal(N, `0`);
2889	if (!LoadedVal.hasOneUse())
2890	return false;
2891
2892	for (SDUse &Use : LD->uses())
2893	if (Use.getResNo() == `0` &&
2894	Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2895	Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2896	return false;
2897
2898	return true;
2899	}
2900
2901	/// getPreIndexedAddressParts - returns true by value, base pointer and
2902	/// offset pointer and addressing mode by reference if the node's address
2903	/// can be legally represented as pre-indexed load / store address.
2904	bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2905	SDValue &Offset,
2906	ISD::MemIndexedMode &AM,
2907	SelectionDAG &DAG) const {
2908	if (DisablePPCPreinc) return false;
2909
2910	bool isLoad = true;
2911	SDValue Ptr;
2912	EVT VT;
2913	Align Alignment;
2914	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
2915	Ptr = LD->getBasePtr();
2916	VT = LD->getMemoryVT();
2917	Alignment = LD->getAlign();
2918	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
2919	Ptr = ST->getBasePtr();
2920	VT = ST->getMemoryVT();
2921	Alignment = ST->getAlign();
2922	isLoad = false;
2923	} else
2924	return false;
2925
2926	// Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2927	// instructions because we can fold these into a more efficient instruction
2928	// instead, (such as LXSD).
2929	if (isLoad && usePartialVectorLoads(N, ST: Subtarget)) {
2930	return false;
2931	}
2932
2933	// PowerPC doesn't have preinc load/store instructions for vectors
2934	if (VT.isVector())
2935	return false;
2936
2937	if (SelectAddressRegReg(N: Ptr, Base, Index&: Offset, DAG)) {
2938	// Common code will reject creating a pre-inc form if the base pointer
2939	// is a frame index, or if N is a store and the base pointer is either
2940	// the same as or a predecessor of the value being stored. Check for
2941	// those situations here, and try with swapped Base/Offset instead.
2942	bool Swap = false;
2943
2944	if (isa<FrameIndexSDNode>(Val: Base) \|\| isa<RegisterSDNode>(Val: Base))
2945	Swap = true;
2946	else if (!isLoad) {
2947	SDValue Val = cast<StoreSDNode>(Val: N)->getValue();
2948	if (Val == Base \|\| Base.getNode()->isPredecessorOf(N: Val.getNode()))
2949	Swap = true;
2950	}
2951
2952	if (Swap)
2953	std::swap(a&: Base, b&: Offset);
2954
2955	AM = ISD::PRE_INC;
2956	return true;
2957	}
2958
2959	// LDU/STU can only handle immediates that are a multiple of 4.
2960	if (VT != MVT::i64) {
2961	if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: std::nullopt))
2962	return false;
2963	} else {
2964	// LDU/STU need an address with at least 4-byte alignment.
2965	if (Alignment < Align (`4`))
2966	return false;
2967
2968	if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: Align (`4`)))
2969	return false;
2970	}
2971
2972	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
2973	// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2974	// sext i32 to i64 when addr mode is r+i.
2975	if (LD->getValueType(ResNo: `0`) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2976	LD->getExtensionType() == ISD::SEXTLOAD &&
2977	isa<ConstantSDNode>(Val: Offset))
2978	return false;
2979	}
2980
2981	AM = ISD::PRE_INC;
2982	return true;
2983	}
2984
2985	//===----------------------------------------------------------------------===//
2986	// LowerOperation implementation
2987	//===----------------------------------------------------------------------===//
2988
2989	/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2990	/// and LoOpFlags to the target MO flags.
2991	static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2992	unsigned &HiOpFlags, unsigned &LoOpFlags,
2993	const GlobalValue GV = nullptr*) {
2994	HiOpFlags = PPCII::MO_HA;
2995	LoOpFlags = PPCII::MO_LO;
2996
2997	// Don't use the pic base if not in PIC relocation model.
2998	if (IsPIC) {
2999	HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3000	LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3001	}
3002	}
3003
3004	static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3005	SelectionDAG &DAG) {
3006	SDLoc DL(HiPart);
3007	EVT PtrVT = HiPart.getValueType();
3008	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: PtrVT);
3009
3010	SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL, VT: PtrVT, N1: HiPart, N2: Zero);
3011	SDValue Lo = DAG.getNode(Opcode: PPCISD::Lo, DL, VT: PtrVT, N1: LoPart, N2: Zero);
3012
3013	// With PIC, the first instruction is actually "GR+hi(&G)".
3014	if (isPIC)
3015	Hi = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
3016	N1: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL, VT: PtrVT), N2: Hi);
3017
3018	// Generate non-pic code that has direct accesses to the constant pool.
3019	// The address of the global is just (hi(&g)+lo(&g)).
3020	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Hi, N2: Lo);
3021	}
3022
3023	static void setUsesTOCBasePtr(MachineFunction &MF) {
3024	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3025	FuncInfo->setUsesTOCBasePtr();
3026	}
3027
3028	static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3029	setUsesTOCBasePtr(DAG.getMachineFunction());
3030	}
3031
3032	SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3033	SDValue GA) const {
3034	EVT VT = Subtarget.getScalarIntVT();
3035	SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(Reg: PPC::X2, VT)
3036	: Subtarget.isAIXABI()
3037	? DAG.getRegister(Reg: PPC::R2, VT)
3038	: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT);
3039	SDValue Ops[] = { GA, Reg };
3040	return DAG.getMemIntrinsicNode(
3041	Opcode: PPCISD::TOC_ENTRY, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops, MemVT: VT,
3042	PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()), Alignment: std::nullopt,
3043	Flags: MachineMemOperand::MOLoad);
3044	}
3045
3046	SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3047	SelectionDAG &DAG) const {
3048	EVT PtrVT = Op.getValueType();
3049	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val&: Op);
3050	const Constant *C = CP->getConstVal();
3051
3052	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3053	// The actual address of the GlobalValue is stored in the TOC.
3054	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3055	if (Subtarget.isUsingPCRelativeCalls()) {
3056	SDLoc DL(CP);
3057	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3058	SDValue ConstPool = DAG.getTargetConstantPool(
3059	C, VT: Ty, Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: PPCII::MO_PCREL_FLAG);
3060	return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: ConstPool);
3061	}
3062	setUsesTOCBasePtr(DAG);
3063	SDValue GA = DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`);
3064	return getTOCEntry(DAG, dl: SDLoc (CP), GA);
3065	}
3066
3067	unsigned MOHiFlag, MOLoFlag;
3068	bool IsPIC = isPositionIndependent();
3069	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3070
3071	if (IsPIC && Subtarget.isSVR4ABI()) {
3072	SDValue GA =
3073	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: PPCII::MO_PIC_FLAG);
3074	return getTOCEntry(DAG, dl: SDLoc (CP), GA);
3075	}
3076
3077	SDValue CPIHi =
3078	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`, TargetFlags: MOHiFlag);
3079	SDValue CPILo =
3080	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`, TargetFlags: MOLoFlag);
3081	return LowerLabelRef(HiPart: CPIHi, LoPart: CPILo, isPIC: IsPIC, DAG);
3082	}
3083
3084	// For 64-bit PowerPC, prefer the more compact relative encodings.
3085	// This trades 32 bits per jump table entry for one or two instructions
3086	// on the jump site.
3087	unsigned PPCTargetLowering::getJumpTableEncoding() const {
3088	if (isJumpTableRelative())
3089	return MachineJumpTableInfo::EK_LabelDifference32;
3090
3091	return TargetLowering::getJumpTableEncoding();
3092	}
3093
3094	bool PPCTargetLowering::isJumpTableRelative() const {
3095	if (UseAbsoluteJumpTables)
3096	return false;
3097	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3098	return true;
3099	return TargetLowering::isJumpTableRelative();
3100	}
3101
3102	SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3103	SelectionDAG &DAG) const {
3104	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3105	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3106
3107	switch (getTargetMachine().getCodeModel()) {
3108	case CodeModel::Small:
3109	case CodeModel::Medium:
3110	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3111	default:
3112	return DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: SDLoc (),
3113	VT: getPointerTy(DL: DAG.getDataLayout()));
3114	}
3115	}
3116
3117	const MCExpr *
3118	PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3119	unsigned JTI,
3120	MCContext &Ctx) const {
3121	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3122	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3123
3124	switch (getTargetMachine().getCodeModel()) {
3125	case CodeModel::Small:
3126	case CodeModel::Medium:
3127	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3128	default:
3129	return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
3130	}
3131	}
3132
3133	SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3134	EVT PtrVT = Op.getValueType();
3135	JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
3136
3137	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
3138	if (Subtarget.isUsingPCRelativeCalls()) {
3139	SDLoc DL(JT);
3140	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3141	SDValue GA =
3142	DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Ty, TargetFlags: PPCII::MO_PCREL_FLAG);
3143	SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3144	return MatAddr;
3145	}
3146
3147	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3148	// The actual address of the GlobalValue is stored in the TOC.
3149	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3150	setUsesTOCBasePtr(DAG);
3151	SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
3152	return getTOCEntry(DAG, dl: SDLoc (JT), GA);
3153	}
3154
3155	unsigned MOHiFlag, MOLoFlag;
3156	bool IsPIC = isPositionIndependent();
3157	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3158
3159	if (IsPIC && Subtarget.isSVR4ABI()) {
3160	SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT,
3161	TargetFlags: PPCII::MO_PIC_FLAG);
3162	return getTOCEntry(DAG, dl: SDLoc (GA), GA);
3163	}
3164
3165	SDValue JTIHi = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOHiFlag);
3166	SDValue JTILo = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOLoFlag);
3167	return LowerLabelRef(HiPart: JTIHi, LoPart: JTILo, isPIC: IsPIC, DAG);
3168	}
3169
3170	SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3171	SelectionDAG &DAG) const {
3172	EVT PtrVT = Op.getValueType();
3173	BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Val&: Op);
3174	const BlockAddress *BA = BASDN->getBlockAddress();
3175
3176	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
3177	if (Subtarget.isUsingPCRelativeCalls()) {
3178	SDLoc DL(BASDN);
3179	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3180	SDValue GA = DAG.getTargetBlockAddress(BA, VT: Ty, Offset: BASDN->getOffset(),
3181	TargetFlags: PPCII::MO_PCREL_FLAG);
3182	SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3183	return MatAddr;
3184	}
3185
3186	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3187	// The actual BlockAddress is stored in the TOC.
3188	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3189	setUsesTOCBasePtr(DAG);
3190	SDValue GA = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset());
3191	return getTOCEntry(DAG, dl: SDLoc (BASDN), GA);
3192	}
3193
3194	// 32-bit position-independent ELF stores the BlockAddress in the .got.
3195	if (Subtarget.is32BitELFABI() && isPositionIndependent())
3196	return getTOCEntry(
3197	DAG, dl: SDLoc (BASDN),
3198	GA: DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset()));
3199
3200	unsigned MOHiFlag, MOLoFlag;
3201	bool IsPIC = isPositionIndependent();
3202	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3203	SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: MOHiFlag);
3204	SDValue TgtBALo = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: MOLoFlag);
3205	return LowerLabelRef(HiPart: TgtBAHi, LoPart: TgtBALo, isPIC: IsPIC, DAG);
3206	}
3207
3208	SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3209	SelectionDAG &DAG) const {
3210	if (Subtarget.isAIXABI())
3211	return LowerGlobalTLSAddressAIX(Op, DAG);
3212
3213	return LowerGlobalTLSAddressLinux(Op, DAG);
3214	}
3215
3216	/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3217	/// and then apply the update.
3218	static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3219	SelectionDAG &DAG,
3220	const TargetMachine &TM) {
3221	// Initialize TLS model opt setting lazily:
3222	// (1) Use initial-exec for single TLS var references within current function.
3223	// (2) Use local-dynamic for multiple TLS var references within current
3224	// function.
3225	PPCFunctionInfo *FuncInfo =
3226	DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3227	if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3228	SmallPtrSet<const GlobalValue *, `8`> TLSGV;
3229	// Iterate over all instructions within current function, collect all TLS
3230	// global variables (global variables taken as the first parameter to
3231	// Intrinsic::threadlocal_address).
3232	const Function &Func = DAG.getMachineFunction().getFunction();
3233	for (const BasicBlock &BB : Func)
3234	for (const Instruction &I : BB)
3235	if (I.getOpcode() == Instruction::Call)
3236	if (const CallInst CI = dyn_cast<const* CallInst>(Val: &I))
3237	if (Function *CF = CI->getCalledFunction())
3238	if (CF->isDeclaration() &&
3239	CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3240	if (const GlobalValue *GV =
3241	dyn_cast<GlobalValue>(Val: I.getOperand(i: `0`))) {
3242	TLSModel::Model GVModel = TM.getTLSModel(GV);
3243	if (GVModel == TLSModel::LocalDynamic)
3244	TLSGV.insert(Ptr: GV);
3245	}
3246
3247	unsigned TLSGVCnt = TLSGV.size();
3248	LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3249	if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3250	FuncInfo->setAIXFuncUseTLSIEForLD();
3251	FuncInfo->setAIXFuncTLSModelOptInitDone();
3252	}
3253
3254	if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3255	LLVM_DEBUG(
3256	dbgs() << DAG.getMachineFunction().getName()
3257	<< " function is using the TLS-IE model for TLS-LD access.\n");
3258	Model = TLSModel::InitialExec;
3259	}
3260	}
3261
3262	SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3263	SelectionDAG &DAG) const {
3264	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3265
3266	if (DAG.getTarget().useEmulatedTLS())
3267	report_fatal_error(reason: "Emulated TLS is not yet supported on AIX");
3268
3269	SDLoc dl(GA);
3270	const GlobalValue *GV = GA->getGlobal();
3271	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3272	bool Is64Bit = Subtarget.isPPC64();
3273	TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3274
3275	// Apply update to the TLS model.
3276	if (Subtarget.hasAIXShLibTLSModelOpt())
3277	updateForAIXShLibTLSModelOpt(Model, DAG, TM: getTargetMachine());
3278
3279	// TLS variables are accessed through TOC entries.
3280	// To support this, set the DAG to use the TOC base pointer.
3281	setUsesTOCBasePtr(DAG);
3282
3283	bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3284
3285	if (IsTLSLocalExecModel \|\| Model == TLSModel::InitialExec) {
3286	bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3287	bool HasAIXSmallTLSGlobalAttr = false;
3288	SDValue VariableOffsetTGA =
3289	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TPREL_FLAG);
3290	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3291	SDValue TLSReg;
3292
3293	if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV))
3294	if (GVar->hasAttribute(Kind: "aix-small-tls"))
3295	HasAIXSmallTLSGlobalAttr = true;
3296
3297	if (Is64Bit) {
3298	// For local-exec and initial-exec on AIX (64-bit), the sequence generated
3299	// involves a load of the variable offset (from the TOC), followed by an
3300	// add of the loaded variable offset to R13 (the thread pointer).
3301	// This code sequence looks like:
3302	// ld reg1,var[TC](2)
3303	// add reg2, reg1, r13 // r13 contains the thread pointer
3304	TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3305
3306	// With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3307	// global variable attribute, produce a faster access sequence for
3308	// local-exec TLS variables where the offset from the TLS base is encoded
3309	// as an immediate operand.
3310	//
3311	// We only utilize the faster local-exec access sequence when the TLS
3312	// variable has a size within the policy limit. We treat types that are
3313	// not sized or are empty as being over the policy size limit.
3314	if ((HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr) &&
3315	IsTLSLocalExecModel) {
3316	Type *GVType = GV->getValueType();
3317	if (GVType->isSized() && !GVType->isEmptyTy() &&
3318	GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3319	AIXSmallTlsPolicySizeLimit)
3320	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA, N2: TLSReg);
3321	}
3322	} else {
3323	// For local-exec and initial-exec on AIX (32-bit), the sequence generated
3324	// involves loading the variable offset from the TOC, generating a call to
3325	// .__get_tpointer to get the thread pointer (which will be in R3), and
3326	// adding the two together:
3327	// lwz reg1,var[TC](2)
3328	// bla .__get_tpointer
3329	// add reg2, reg1, r3
3330	TLSReg = DAG.getNode(Opcode: PPCISD::GET_TPOINTER, DL: dl, VT: PtrVT);
3331
3332	// We do not implement the 32-bit version of the faster access sequence
3333	// for local-exec that is controlled by the -maix-small-local-exec-tls
3334	// option, or the "aix-small-tls" global variable attribute.
3335	if (HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr)
3336	report_fatal_error(reason: "The small-local-exec TLS access sequence is "
3337	"currently only supported on AIX (64-bit mode).");
3338	}
3339	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: VariableOffset);
3340	}
3341
3342	if (Model == TLSModel::LocalDynamic) {
3343	bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3344
3345	// We do not implement the 32-bit version of the faster access sequence
3346	// for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3347	if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3348	report_fatal_error(reason: "The small-local-dynamic TLS access sequence is "
3349	"currently only supported on AIX (64-bit mode).");
3350
3351	// For local-dynamic on AIX, we need to generate one TOC entry for each
3352	// variable offset, and a single module-handle TOC entry for the entire
3353	// file.
3354
3355	SDValue VariableOffsetTGA =
3356	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSLD_FLAG);
3357	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3358
3359	Module *M = DAG.getMachineFunction().getFunction().getParent();
3360	GlobalVariable *TLSGV =
3361	dyn_cast_or_null<GlobalVariable>(Val: M->getOrInsertGlobal(
3362	Name: StringRef ("_$TLSML"), Ty: PointerType::getUnqual(C&: *DAG.getContext())));
3363	TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3364	assert(TLSGV && "Not able to create GV for _$TLSML.");
3365	SDValue ModuleHandleTGA =
3366	DAG.getTargetGlobalAddress(GV: TLSGV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSLDM_FLAG);
3367	SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, GA: ModuleHandleTGA);
3368	SDValue ModuleHandle =
3369	DAG.getNode(Opcode: PPCISD::TLSLD_AIX, DL: dl, VT: PtrVT, Operand: ModuleHandleTOC);
3370
3371	// With the -maix-small-local-dynamic-tls option, produce a faster access
3372	// sequence for local-dynamic TLS variables where the offset from the
3373	// module-handle is encoded as an immediate operand.
3374	//
3375	// We only utilize the faster local-dynamic access sequence when the TLS
3376	// variable has a size within the policy limit. We treat types that are
3377	// not sized or are empty as being over the policy size limit.
3378	if (HasAIXSmallLocalDynamicTLS) {
3379	Type *GVType = GV->getValueType();
3380	if (GVType->isSized() && !GVType->isEmptyTy() &&
3381	GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3382	AIXSmallTlsPolicySizeLimit)
3383	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA,
3384	N2: ModuleHandle);
3385	}
3386
3387	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: ModuleHandle, N2: VariableOffset);
3388	}
3389
3390	// If Local- or Initial-exec or Local-dynamic is not possible or specified,
3391	// all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3392	// need to generate two TOC entries, one for the variable offset, one for the
3393	// region handle. The global address for the TOC entry of the region handle is
3394	// created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3395	// entry of the variable offset is created with MO_TLSGD_FLAG.
3396	SDValue VariableOffsetTGA =
3397	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSGD_FLAG);
3398	SDValue RegionHandleTGA =
3399	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSGDM_FLAG);
3400	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3401	SDValue RegionHandle = getTOCEntry(DAG, dl, GA: RegionHandleTGA);
3402	return DAG.getNode(Opcode: PPCISD::TLSGD_AIX, DL: dl, VT: PtrVT, N1: VariableOffset,
3403	N2: RegionHandle);
3404	}
3405
3406	SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3407	SelectionDAG &DAG) const {
3408	// FIXME: TLS addresses currently use medium model code sequences,
3409	// which is the most useful form. Eventually support for small and
3410	// large models could be added if users need it, at the cost of
3411	// additional complexity.
3412	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3413	if (DAG.getTarget().useEmulatedTLS())
3414	return LowerToTLSEmulatedModel(GA, DAG);
3415
3416	SDLoc dl(GA);
3417	const GlobalValue *GV = GA->getGlobal();
3418	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3419	bool is64bit = Subtarget.isPPC64();
3420	const Module *M = DAG.getMachineFunction().getFunction().getParent();
3421	PICLevel::Level picLevel = M->getPICLevel();
3422
3423	const TargetMachine &TM = getTargetMachine();
3424	TLSModel::Model Model = TM.getTLSModel(GV);
3425
3426	if (Model == TLSModel::LocalExec) {
3427	if (Subtarget.isUsingPCRelativeCalls()) {
3428	SDValue TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3429	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3430	TargetFlags: PPCII::MO_TPREL_PCREL_FLAG);
3431	SDValue MatAddr =
3432	DAG.getNode(Opcode: PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3433	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: MatAddr);
3434	}
3435
3436	SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3437	TargetFlags: PPCII::MO_TPREL_HA);
3438	SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3439	TargetFlags: PPCII::MO_TPREL_LO);
3440	SDValue TLSReg = is64bit ? DAG.getRegister(Reg: PPC::X13, VT: MVT::i64)
3441	: DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
3442
3443	SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL: dl, VT: PtrVT, N1: TGAHi, N2: TLSReg);
3444	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: TGALo, N2: Hi);
3445	}
3446
3447	if (Model == TLSModel::InitialExec) {
3448	bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3449	SDValue TGA = DAG.getTargetGlobalAddress(
3450	GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : `0`);
3451	SDValue TGATLS = DAG.getTargetGlobalAddress(
3452	GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3453	SDValue TPOffset;
3454	if (IsPCRel) {
3455	SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3456	TPOffset = DAG.getLoad(VT: MVT::i64, dl, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3457	PtrInfo: MachinePointerInfo ());
3458	} else {
3459	SDValue GOTPtr;
3460	if (is64bit) {
3461	setUsesTOCBasePtr(DAG);
3462	SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3463	GOTPtr =
3464	DAG.getNode(Opcode: PPCISD::ADDIS_GOT_TPREL_HA, DL: dl, VT: PtrVT, N1: GOTReg, N2: TGA);
3465	} else {
3466	if (!TM.isPositionIndependent())
3467	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_GOT, DL: dl, VT: PtrVT);
3468	else if (picLevel == PICLevel::SmallPIC)
3469	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3470	else
3471	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3472	}
3473	TPOffset = DAG.getNode(Opcode: PPCISD::LD_GOT_TPREL_L, DL: dl, VT: PtrVT, N1: TGA, N2: GOTPtr);
3474	}
3475	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TPOffset, N2: TGATLS);
3476	}
3477
3478	if (Model == TLSModel::GeneralDynamic) {
3479	if (Subtarget.isUsingPCRelativeCalls()) {
3480	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3481	TargetFlags: PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3482	return DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3483	}
3484
3485	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: `0`);
3486	SDValue GOTPtr;
3487	if (is64bit) {
3488	setUsesTOCBasePtr(DAG);
3489	SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3490	GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSGD_HA, DL: dl, VT: PtrVT,
3491	N1: GOTReg, N2: TGA);
3492	} else {
3493	if (picLevel == PICLevel::SmallPIC)
3494	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3495	else
3496	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3497	}
3498	return DAG.getNode(Opcode: PPCISD::ADDI_TLSGD_L_ADDR, DL: dl, VT: PtrVT,
3499	N1: GOTPtr, N2: TGA, N3: TGA);
3500	}
3501
3502	if (Model == TLSModel::LocalDynamic) {
3503	if (Subtarget.isUsingPCRelativeCalls()) {
3504	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3505	TargetFlags: PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3506	SDValue MatPCRel =
3507	DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3508	return DAG.getNode(Opcode: PPCISD::PADDI_DTPREL, DL: dl, VT: PtrVT, N1: MatPCRel, N2: TGA);
3509	}
3510
3511	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: `0`);
3512	SDValue GOTPtr;
3513	if (is64bit) {
3514	setUsesTOCBasePtr(DAG);
3515	SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3516	GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSLD_HA, DL: dl, VT: PtrVT,
3517	N1: GOTReg, N2: TGA);
3518	} else {
3519	if (picLevel == PICLevel::SmallPIC)
3520	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3521	else
3522	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3523	}
3524	SDValue TLSAddr = DAG.getNode(Opcode: PPCISD::ADDI_TLSLD_L_ADDR, DL: dl,
3525	VT: PtrVT, N1: GOTPtr, N2: TGA, N3: TGA);
3526	SDValue DtvOffsetHi = DAG.getNode(Opcode: PPCISD::ADDIS_DTPREL_HA, DL: dl,
3527	VT: PtrVT, N1: TLSAddr, N2: TGA);
3528	return DAG.getNode(Opcode: PPCISD::ADDI_DTPREL_L, DL: dl, VT: PtrVT, N1: DtvOffsetHi, N2: TGA);
3529	}
3530
3531	llvm_unreachable("Unknown TLS model!");
3532	}
3533
3534	SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3535	SelectionDAG &DAG) const {
3536	EVT PtrVT = Op.getValueType();
3537	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Val&: Op);
3538	SDLoc DL(GSDN);
3539	const GlobalValue *GV = GSDN->getGlobal();
3540
3541	// 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3542	// The actual address of the GlobalValue is stored in the TOC.
3543	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3544	if (Subtarget.isUsingPCRelativeCalls()) {
3545	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3546	if (isAccessedAsGotIndirect(N: Op)) {
3547	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3548	TargetFlags: PPCII::MO_GOT_PCREL_FLAG);
3549	SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3550	SDValue Load = DAG.getLoad(VT: MVT::i64, dl: DL, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3551	PtrInfo: MachinePointerInfo ());
3552	return Load;
3553	} else {
3554	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3555	TargetFlags: PPCII::MO_PCREL_FLAG);
3556	return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3557	}
3558	}
3559	setUsesTOCBasePtr(DAG);
3560	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset());
3561	return getTOCEntry(DAG, dl: DL, GA);
3562	}
3563
3564	unsigned MOHiFlag, MOLoFlag;
3565	bool IsPIC = isPositionIndependent();
3566	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag, GV);
3567
3568	if (IsPIC && Subtarget.isSVR4ABI()) {
3569	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT,
3570	offset: GSDN->getOffset(),
3571	TargetFlags: PPCII::MO_PIC_FLAG);
3572	return getTOCEntry(DAG, dl: DL, GA);
3573	}
3574
3575	SDValue GAHi =
3576	DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOHiFlag);
3577	SDValue GALo =
3578	DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOLoFlag);
3579
3580	return LowerLabelRef(HiPart: GAHi, LoPart: GALo, isPIC: IsPIC, DAG);
3581	}
3582
3583	SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3584	bool IsStrict = Op ->isStrictFPOpcode();
3585	ISD::CondCode CC =
3586	cast<CondCodeSDNode>(Val: Op.getOperand(i: IsStrict ? `3` : `2`))->get();
3587	SDValue LHS = Op.getOperand(i: IsStrict ? `1` : `0`);
3588	SDValue RHS = Op.getOperand(i: IsStrict ? `2` : `1`);
3589	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
3590	EVT LHSVT = LHS.getValueType();
3591	SDLoc dl(Op);
3592
3593	// Soften the setcc with libcall if it is fp128.
3594	if (LHSVT == MVT::f128) {
3595	assert(!Subtarget.hasP9Vector() &&
3596	"SETCC for f128 is already legal under Power9!");
3597	softenSetCCOperands(DAG, VT: LHSVT, NewLHS&: LHS, NewRHS&: RHS, CCCode&: CC, DL: dl, OldLHS: LHS, OldRHS: RHS, Chain,
3598	IsSignaling: Op ->getOpcode() == ISD::STRICT_FSETCCS);
3599	if (RHS.getNode())
3600	LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS,
3601	N3: DAG.getCondCode(Cond: CC));
3602	if (IsStrict)
3603	return DAG.getMergeValues(Ops: {LHS, Chain}, dl);
3604	return LHS;
3605	}
3606
3607	assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3608
3609	if (Op.getValueType() == MVT::v2i64) {
3610	// When the operands themselves are v2i64 values, we need to do something
3611	// special because VSX has no underlying comparison operations for these.
3612	if (LHS.getValueType() == MVT::v2i64) {
3613	// Equality can be handled by casting to the legal type for Altivec
3614	// comparisons, everything else needs to be expanded.
3615	if (CC != ISD::SETEQ && CC != ISD::SETNE)
3616	return SDValue ();
3617	SDValue SetCC32 = DAG.getSetCC(
3618	DL: dl, VT: MVT::v4i32, LHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: LHS),
3619	RHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: RHS), Cond: CC);
3620	int ShuffV[] = {`1`, `0`, `3`, `2`};
3621	SDValue Shuff =
3622	DAG.getVectorShuffle(VT: MVT::v4i32, dl, N1: SetCC32, N2: SetCC32, Mask: ShuffV);
3623	return DAG.getBitcast(VT: MVT::v2i64,
3624	V: DAG.getNode(Opcode: CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3625	DL: dl, VT: MVT::v4i32, N1: Shuff, N2: SetCC32));
3626	}
3627
3628	// We handle most of these in the usual way.
3629	return Op;
3630	}
3631
3632	// If we're comparing for equality to zero, expose the fact that this is
3633	// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3634	// fold the new nodes.
3635	if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3636	return V;
3637
3638	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3639	// Leave comparisons against 0 and -1 alone for now, since they're usually
3640	// optimized. FIXME: revisit this when we can custom lower all setcc
3641	// optimizations.
3642	if (C->isAllOnes() \|\| C->isZero())
3643	return SDValue ();
3644	}
3645
3646	// If we have an integer seteq/setne, turn it into a compare against zero
3647	// by xor'ing the rhs with the lhs, which is faster than setting a
3648	// condition register, reading it back out, and masking the correct bit. The
3649	// normal approach here uses sub to do this instead of xor. Using xor exposes
3650	// the result to other bit-twiddling opportunities.
3651	if (LHSVT.isInteger() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
3652	EVT VT = Op.getValueType();
3653	SDValue Sub = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: LHSVT, N1: LHS, N2: RHS);
3654	return DAG.getSetCC(DL: dl, VT, LHS: Sub, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: LHSVT), Cond: CC);
3655	}
3656	return SDValue ();
3657	}
3658
3659	SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3660	SDNode *Node = Op.getNode();
3661	EVT VT = Node->getValueType(ResNo: `0`);
3662	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3663	SDValue InChain = Node->getOperand(Num: `0`);
3664	SDValue VAListPtr = Node->getOperand(Num: `1`);
3665	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
3666	SDLoc dl(Node);
3667
3668	assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3669
3670	// gpr_index
3671	SDValue GprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3672	Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV), MemVT: MVT::i8);
3673	InChain = GprIndex.getValue(R: `1`);
3674
3675	if (VT == MVT::i64) {
3676	// Check if GprIndex is even
3677	SDValue GprAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: GprIndex,
3678	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
3679	SDValue CC64 = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: GprAnd,
3680	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32), Cond: ISD::SETNE);
3681	SDValue GprIndexPlusOne = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: GprIndex,
3682	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
3683	// Align GprIndex to be even if it isn't
3684	GprIndex = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC64, N2: GprIndexPlusOne,
3685	N3: GprIndex);
3686	}
3687
3688	// fpr index is 1 byte after gpr
3689	SDValue FprPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3690	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
3691
3692	// fpr
3693	SDValue FprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3694	Ptr: FprPtr, PtrInfo: MachinePointerInfo (SV), MemVT: MVT::i8);
3695	InChain = FprIndex.getValue(R: `1`);
3696
3697	SDValue RegSaveAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3698	N2: DAG.getConstant(Val: `8`, DL: dl, VT: MVT::i32));
3699
3700	SDValue OverflowAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3701	N2: DAG.getConstant(Val: `4`, DL: dl, VT: MVT::i32));
3702
3703	// areas
3704	SDValue OverflowArea =
3705	DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: OverflowAreaPtr, PtrInfo: MachinePointerInfo ());
3706	InChain = OverflowArea.getValue(R: `1`);
3707
3708	SDValue RegSaveArea =
3709	DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: RegSaveAreaPtr, PtrInfo: MachinePointerInfo ());
3710	InChain = RegSaveArea.getValue(R: `1`);
3711
3712	// select overflow_area if index > 8
3713	SDValue CC = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: VT.isInteger() ? GprIndex : FprIndex,
3714	RHS: DAG.getConstant(Val: `8`, DL: dl, VT: MVT::i32), Cond: ISD::SETLT);
3715
3716	// adjustment constant gpr_index 4/8*
3717	SDValue RegConstant = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32,
3718	N1: VT.isInteger() ? GprIndex : FprIndex,
3719	N2: DAG.getConstant(Val: VT.isInteger() ? `4` : `8`, DL: dl,
3720	VT: MVT::i32));
3721
3722	// OurReg = RegSaveArea + RegConstant
3723	SDValue OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: RegSaveArea,
3724	N2: RegConstant);
3725
3726	// Floating types are 32 bytes into RegSaveArea
3727	if (VT.isFloatingPoint())
3728	OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OurReg,
3729	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i32));
3730
3731	// increase {f,g}pr_index by 1 (or 2 if VT is i64)
3732	SDValue IndexPlus1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32,
3733	N1: VT.isInteger() ? GprIndex : FprIndex,
3734	N2: DAG.getConstant(Val: VT == MVT::i64 ? `2` : `1`, DL: dl,
3735	VT: MVT::i32));
3736
3737	InChain = DAG.getTruncStore(Chain: InChain, dl, Val: IndexPlus1,
3738	Ptr: VT.isInteger() ? VAListPtr : FprPtr,
3739	PtrInfo: MachinePointerInfo (SV), SVT: MVT::i8);
3740
3741	// determine if we should load from reg_save_area or overflow_area
3742	SDValue Result = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: PtrVT, N1: CC, N2: OurReg, N3: OverflowArea);
3743
3744	// increase overflow_area by 4/8 if gpr/fpr > 8
3745	SDValue OverflowAreaPlusN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OverflowArea,
3746	N2: DAG.getConstant(Val: VT.isInteger() ? `4` : `8`,
3747	DL: dl, VT: MVT::i32));
3748
3749	OverflowArea = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC, N2: OverflowArea,
3750	N3: OverflowAreaPlusN);
3751
3752	InChain = DAG.getTruncStore(Chain: InChain, dl, Val: OverflowArea, Ptr: OverflowAreaPtr,
3753	PtrInfo: MachinePointerInfo (), SVT: MVT::i32);
3754
3755	return DAG.getLoad(VT, dl, Chain: InChain, Ptr: Result, PtrInfo: MachinePointerInfo ());
3756	}
3757
3758	SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3759	assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3760
3761	// We have to copy the entire va_list struct:
3762	// 2sizeof(char) + 2 Byte alignment + 2sizeof(char) = 12 Byte*
3763	return DAG.getMemcpy(Chain: Op.getOperand(i: `0`), dl: Op, Dst: Op.getOperand(i: `1`), Src: Op.getOperand(i: `2`),
3764	Size: DAG.getConstant(Val: `12`, DL: SDLoc (Op), VT: MVT::i32), Alignment: Align (`8`),
3765	isVol: false, AlwaysInline: true, /CI=/nullptr, OverrideTailCall: std::nullopt,
3766	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
3767	}
3768
3769	SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3770	SelectionDAG &DAG) const {
3771	return Op.getOperand(i: `0`);
3772	}
3773
3774	SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3775	MachineFunction &MF = DAG.getMachineFunction();
3776	PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3777
3778	assert((Op.getOpcode() == ISD::INLINEASM \|\|
3779	Op.getOpcode() == ISD::INLINEASM_BR) &&
3780	"Expecting Inline ASM node.");
3781
3782	// If an LR store is already known to be required then there is not point in
3783	// checking this ASM as well.
3784	if (MFI.isLRStoreRequired())
3785	return Op;
3786
3787	// Inline ASM nodes have an optional last operand that is an incoming Flag of
3788	// type MVT::Glue. We want to ignore this last operand if that is the case.
3789	unsigned NumOps = Op.getNumOperands();
3790	if (Op.getOperand(i: NumOps - `1`).getValueType() == MVT::Glue)
3791	--NumOps;
3792
3793	// Check all operands that may contain the LR.
3794	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3795	const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3796	unsigned NumVals = Flags.getNumOperandRegisters();
3797	++i; // Skip the ID value.
3798
3799	switch (Flags.getKind()) {
3800	default:
3801	llvm_unreachable("Bad flags!");
3802	case InlineAsm::Kind::RegUse:
3803	case InlineAsm::Kind::Imm:
3804	case InlineAsm::Kind::Mem:
3805	i += NumVals;
3806	break;
3807	case InlineAsm::Kind::Clobber:
3808	case InlineAsm::Kind::RegDef:
3809	case InlineAsm::Kind::RegDefEarlyClobber: {
3810	for (; NumVals; --NumVals, ++i) {
3811	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
3812	if (Reg != PPC::LR && Reg != PPC::LR8)
3813	continue;
3814	MFI.setLRStoreRequired();
3815	return Op;
3816	}
3817	break;
3818	}
3819	}
3820	}
3821
3822	return Op;
3823	}
3824
3825	SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3826	SelectionDAG &DAG) const {
3827	SDValue Chain = Op.getOperand(i: `0`);
3828	SDValue Trmp = Op.getOperand(i: `1`); // trampoline
3829	SDValue FPtr = Op.getOperand(i: `2`); // nested function
3830	SDValue Nest = Op.getOperand(i: `3`); // 'nest' parameter value
3831	SDLoc dl(Op);
3832
3833	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3834
3835	if (Subtarget.isAIXABI()) {
3836	// On AIX we create a trampoline descriptor by combining the
3837	// entry point and TOC from the global descriptor (FPtr) with the
3838	// nest argument as the environment pointer.
3839	uint64_t PointerSize = Subtarget.isPPC64() ? `8` : `4`;
3840	MaybeAlign PointerAlign(PointerSize);
3841	auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3842	? (MachineMemOperand::MODereferenceable \|
3843	MachineMemOperand::MOInvariant)
3844	: MachineMemOperand::MONone;
3845
3846	uint64_t TOCPointerOffset = `1` * PointerSize;
3847	uint64_t EnvPointerOffset = `2` * PointerSize;
3848	SDValue SDTOCPtrOffset = DAG.getConstant(Val: TOCPointerOffset, DL: dl, VT: PtrVT);
3849	SDValue SDEnvPtrOffset = DAG.getConstant(Val: EnvPointerOffset, DL: dl, VT: PtrVT);
3850
3851	const Value *TrampolineAddr =
3852	cast<SrcValueSDNode>(Val: Op.getOperand(i: `4`))->getValue();
3853	const Function *Func =
3854	cast<Function>(Val: cast<SrcValueSDNode>(Val: Op.getOperand(i: `5`))->getValue());
3855
3856	SDValue OutChains[`3`];
3857
3858	// Copy the entry point address from the global descriptor to the
3859	// trampoline buffer.
3860	SDValue LoadEntryPoint =
3861	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: FPtr, PtrInfo: MachinePointerInfo (Func, `0`),
3862	Alignment: PointerAlign, MMOFlags);
3863	SDValue EPLoadChain = LoadEntryPoint.getValue(R: `1`);
3864	OutChains[`0`] = DAG.getStore(Chain: EPLoadChain, dl, Val: LoadEntryPoint, Ptr: Trmp,
3865	PtrInfo: MachinePointerInfo (TrampolineAddr, `0`));
3866
3867	// Copy the TOC pointer from the global descriptor to the trampoline
3868	// buffer.
3869	SDValue TOCFromDescriptorPtr =
3870	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FPtr, N2: SDTOCPtrOffset);
3871	SDValue TOCReg = DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: TOCFromDescriptorPtr,
3872	PtrInfo: MachinePointerInfo (Func, TOCPointerOffset),
3873	Alignment: PointerAlign, MMOFlags);
3874	SDValue TrampolineTOCPointer =
3875	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Trmp, N2: SDTOCPtrOffset);
3876	SDValue TOCLoadChain = TOCReg.getValue(R: `1`);
3877	OutChains[`1`] =
3878	DAG.getStore(Chain: TOCLoadChain, dl, Val: TOCReg, Ptr: TrampolineTOCPointer,
3879	PtrInfo: MachinePointerInfo (TrampolineAddr, TOCPointerOffset));
3880
3881	// Store the nest argument into the environment pointer in the trampoline
3882	// buffer.
3883	SDValue EnvPointer = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Trmp, N2: SDEnvPtrOffset);
3884	OutChains[`2`] =
3885	DAG.getStore(Chain, dl, Val: Nest, Ptr: EnvPointer,
3886	PtrInfo: MachinePointerInfo (TrampolineAddr, EnvPointerOffset));
3887
3888	SDValue TokenFactor =
3889	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OutChains);
3890	return TokenFactor;
3891	}
3892
3893	bool isPPC64 = (PtrVT == MVT::i64);
3894	Type IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: DAG.getContext());
3895
3896	TargetLowering::ArgListTy Args;
3897	Args.emplace_back(args&: Trmp, args&: IntPtrTy);
3898	// TrampSize == (isPPC64 ? 48 : 40);
3899	Args.emplace_back(
3900	args: DAG.getConstant(Val: isPPC64 ? `48` : `40`, DL: dl, VT: Subtarget.getScalarIntVT()),
3901	args&: IntPtrTy);
3902	Args.emplace_back(args&: FPtr, args&: IntPtrTy);
3903	Args.emplace_back(args&: Nest, args&: IntPtrTy);
3904
3905	// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3906	TargetLowering::CallLoweringInfo CLI(DAG);
3907	CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3908	CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *DAG.getContext()),
3909	Target: DAG.getExternalSymbol(Sym: "__trampoline_setup", VT: PtrVT), ArgsList: std::move(Args));
3910
3911	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3912	return CallResult.second;
3913	}
3914
3915	SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3916	MachineFunction &MF = DAG.getMachineFunction();
3917	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3918	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
3919
3920	SDLoc dl(Op);
3921
3922	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI()) {
3923	// vastart just stores the address of the VarArgsFrameIndex slot into the
3924	// memory location argument.
3925	SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
3926	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
3927	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl, Val: FR, Ptr: Op.getOperand(i: `1`),
3928	PtrInfo: MachinePointerInfo (SV));
3929	}
3930
3931	// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3932	// We suppose the given va_list is already allocated.
3933	//
3934	// typedef struct {
3935	// char gpr; / index into the array of 8 GPRs*
3936	// stored in the register save area*
3937	// gpr=0 corresponds to r3,*
3938	// gpr=1 to r4, etc.*
3939	// /*
3940	// char fpr; / index into the array of 8 FPRs*
3941	// stored in the register save area*
3942	// fpr=0 corresponds to f1,*
3943	// fpr=1 to f2, etc.*
3944	// /*
3945	// char overflow_arg_area;*
3946	// / location on stack that holds*
3947	// the next overflow argument*
3948	// /*
3949	// char reg_save_area;*
3950	// / where r3:r10 and f1:f8 (if saved)*
3951	// are stored*
3952	// /*
3953	// } va_list[1];
3954
3955	SDValue ArgGPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumGPR(), DL: dl, VT: MVT::i32);
3956	SDValue ArgFPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumFPR(), DL: dl, VT: MVT::i32);
3957	SDValue StackOffsetFI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsStackOffset(),
3958	VT: PtrVT);
3959	SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
3960	VT: PtrVT);
3961
3962	uint64_t FrameOffset = PtrVT.getSizeInBits()/`8`;
3963	SDValue ConstFrameOffset = DAG.getConstant(Val: FrameOffset, DL: dl, VT: PtrVT);
3964
3965	uint64_t StackOffset = PtrVT.getSizeInBits()/`8` - `1`;
3966	SDValue ConstStackOffset = DAG.getConstant(Val: StackOffset, DL: dl, VT: PtrVT);
3967
3968	uint64_t FPROffset = `1`;
3969	SDValue ConstFPROffset = DAG.getConstant(Val: FPROffset, DL: dl, VT: PtrVT);
3970
3971	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
3972
3973	// Store first byte : number of int regs
3974	SDValue firstStore =
3975	DAG.getTruncStore(Chain: Op.getOperand(i: `0`), dl, Val: ArgGPR, Ptr: Op.getOperand(i: `1`),
3976	PtrInfo: MachinePointerInfo (SV), SVT: MVT::i8);
3977	uint64_t nextOffset = FPROffset;
3978	SDValue nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Op.getOperand(i: `1`),
3979	N2: ConstFPROffset);
3980
3981	// Store second byte : number of float regs
3982	SDValue secondStore =
3983	DAG.getTruncStore(Chain: firstStore, dl, Val: ArgFPR, Ptr: nextPtr,
3984	PtrInfo: MachinePointerInfo (SV, nextOffset), SVT: MVT::i8);
3985	nextOffset += StackOffset;
3986	nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstStackOffset);
3987
3988	// Store second word : arguments given on stack
3989	SDValue thirdStore = DAG.getStore(Chain: secondStore, dl, Val: StackOffsetFI, Ptr: nextPtr,
3990	PtrInfo: MachinePointerInfo (SV, nextOffset));
3991	nextOffset += FrameOffset;
3992	nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstFrameOffset);
3993
3994	// Store third word : arguments given in registers
3995	return DAG.getStore(Chain: thirdStore, dl, Val: FR, Ptr: nextPtr,
3996	PtrInfo: MachinePointerInfo (SV, nextOffset));
3997	}
3998
3999	/// FPR - The set of FP registers that should be allocated for arguments
4000	/// on Darwin and AIX.
4001	static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4002	PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4003	PPC::F11, PPC::F12, PPC::F13};
4004
4005	/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4006	/// the stack.
4007	static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4008	unsigned PtrByteSize) {
4009	unsigned ArgSize = ArgVT.getStoreSize();
4010	if (Flags.isByVal())
4011	ArgSize = Flags.getByValSize();
4012
4013	// Round up to multiples of the pointer size, except for array members,
4014	// which are always packed.
4015	if (!Flags.isInConsecutiveRegs())
4016	ArgSize = ((ArgSize + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4017
4018	return ArgSize;
4019	}
4020
4021	/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4022	/// on the stack.
4023	static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4024	ISD::ArgFlagsTy Flags,
4025	unsigned PtrByteSize) {
4026	Align Alignment(PtrByteSize);
4027
4028	// Altivec parameters are padded to a 16 byte boundary.
4029	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
4030	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
4031	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
4032	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
4033	Alignment = Align (`16`);
4034
4035	// ByVal parameters are aligned as requested.
4036	if (Flags.isByVal()) {
4037	auto BVAlign = Flags.getNonZeroByValAlign();
4038	if (BVAlign > PtrByteSize) {
4039	if (BVAlign.value() % PtrByteSize != `0`)
4040	llvm_unreachable(
4041	"ByVal alignment is not a multiple of the pointer size");
4042
4043	Alignment = BVAlign;
4044	}
4045	}
4046
4047	// Array members are always packed to their original alignment.
4048	if (Flags.isInConsecutiveRegs()) {
4049	// If the array member was split into multiple registers, the first
4050	// needs to be aligned to the size of the full type. (Except for
4051	// ppcf128, which is only aligned as its f64 components.)
4052	if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4053	Alignment = Align (OrigVT.getStoreSize());
4054	else
4055	Alignment = Align (ArgVT.getStoreSize());
4056	}
4057
4058	return Alignment;
4059	}
4060
4061	/// CalculateStackSlotUsed - Return whether this argument will use its
4062	/// stack slot (instead of being passed in registers). ArgOffset,
4063	/// AvailableFPRs, and AvailableVRs must hold the current argument
4064	/// position, and will be updated to account for this argument.
4065	static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4066	unsigned PtrByteSize, unsigned LinkageSize,
4067	unsigned ParamAreaSize, unsigned &ArgOffset,
4068	unsigned &AvailableFPRs,
4069	unsigned &AvailableVRs) {
4070	bool UseMemory = false;
4071
4072	// Respect alignment of argument on the stack.
4073	Align Alignment =
4074	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4075	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4076	// If there's no space left in the argument save area, we must
4077	// use memory (this check also catches zero-sized arguments).
4078	if (ArgOffset >= LinkageSize + ParamAreaSize)
4079	UseMemory = true;
4080
4081	// Allocate argument on the stack.
4082	ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4083	if (Flags.isInConsecutiveRegsLast())
4084	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4085	// If we overran the argument save area, we must use memory
4086	// (this check catches arguments passed partially in memory)
4087	if (ArgOffset > LinkageSize + ParamAreaSize)
4088	UseMemory = true;
4089
4090	// However, if the argument is actually passed in an FPR or a VR,
4091	// we don't use memory after all.
4092	if (!Flags.isByVal()) {
4093	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64)
4094	if (AvailableFPRs > `0`) {
4095	--AvailableFPRs;
4096	return false;
4097	}
4098	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
4099	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
4100	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
4101	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
4102	if (AvailableVRs > `0`) {
4103	--AvailableVRs;
4104	return false;
4105	}
4106	}
4107
4108	return UseMemory;
4109	}
4110
4111	/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4112	/// ensure minimum alignment required for target.
4113	static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4114	unsigned NumBytes) {
4115	return alignTo(Size: NumBytes, A: Lowering->getStackAlign());
4116	}
4117
4118	SDValue PPCTargetLowering::LowerFormalArguments(
4119	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4120	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4121	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4122	if (Subtarget.isAIXABI())
4123	return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4124	InVals);
4125	if (Subtarget.is64BitELFABI())
4126	return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127	InVals);
4128	assert(Subtarget.is32BitELFABI());
4129	return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4130	InVals);
4131	}
4132
4133	SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4134	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4135	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4136	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4137
4138	// 32-bit SVR4 ABI Stack Frame Layout:
4139	// +-----------------------------------+
4140	// +--> \| Back chain \|
4141	// \| +-----------------------------------+
4142	// \| \| Floating-point register save area \|
4143	// \| +-----------------------------------+
4144	// \| \| General register save area \|
4145	// \| +-----------------------------------+
4146	// \| \| CR save word \|
4147	// \| +-----------------------------------+
4148	// \| \| VRSAVE save word \|
4149	// \| +-----------------------------------+
4150	// \| \| Alignment padding \|
4151	// \| +-----------------------------------+
4152	// \| \| Vector register save area \|
4153	// \| +-----------------------------------+
4154	// \| \| Local variable space \|
4155	// \| +-----------------------------------+
4156	// \| \| Parameter list area \|
4157	// \| +-----------------------------------+
4158	// \| \| LR save word \|
4159	// \| +-----------------------------------+
4160	// SP--> +--- \| Back chain \|
4161	// +-----------------------------------+
4162	//
4163	// Specifications:
4164	// System V Application Binary Interface PowerPC Processor Supplement
4165	// AltiVec Technology Programming Interface Manual
4166
4167	MachineFunction &MF = DAG.getMachineFunction();
4168	MachineFrameInfo &MFI = MF.getFrameInfo();
4169	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4170
4171	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4172	// Potential tail calls could cause overwriting of argument stack slots.
4173	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4174	(CallConv == CallingConv::Fast));
4175	const Align PtrAlign(`4`);
4176
4177	// Assign locations to all of the incoming arguments.
4178	SmallVector<CCValAssign, `16`> ArgLocs;
4179	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4180	*DAG.getContext());
4181
4182	// Reserve space for the linkage area on the stack.
4183	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4184	CCInfo.AllocateStack(Size: LinkageSize, Alignment: PtrAlign);
4185	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4);
4186
4187	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
4188	CCValAssign &VA = ArgLocs [i];
4189
4190	// Arguments stored in registers.
4191	if (VA.isRegLoc()) {
4192	const TargetRegisterClass *RC;
4193	EVT ValVT = VA.getValVT();
4194
4195	switch (ValVT.getSimpleVT().SimpleTy) {
4196	default:
4197	llvm_unreachable("ValVT not supported by formal arguments Lowering");
4198	case MVT::i1:
4199	case MVT::i32:
4200	RC = &PPC::GPRCRegClass;
4201	break;
4202	case MVT::f32:
4203	if (Subtarget.hasP8Vector())
4204	RC = &PPC::VSSRCRegClass;
4205	else if (Subtarget.hasSPE())
4206	RC = &PPC::GPRCRegClass;
4207	else
4208	RC = &PPC::F4RCRegClass;
4209	break;
4210	case MVT::f64:
4211	if (Subtarget.hasVSX())
4212	RC = &PPC::VSFRCRegClass;
4213	else if (Subtarget.hasSPE())
4214	// SPE passes doubles in GPR pairs.
4215	RC = &PPC::GPRCRegClass;
4216	else
4217	RC = &PPC::F8RCRegClass;
4218	break;
4219	case MVT::v16i8:
4220	case MVT::v8i16:
4221	case MVT::v4i32:
4222	RC = &PPC::VRRCRegClass;
4223	break;
4224	case MVT::v4f32:
4225	RC = &PPC::VRRCRegClass;
4226	break;
4227	case MVT::v2f64:
4228	case MVT::v2i64:
4229	RC = &PPC::VRRCRegClass;
4230	break;
4231	}
4232
4233	SDValue ArgValue;
4234	// Transform the arguments stored in physical registers into
4235	// virtual ones.
4236	if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4237	assert(i + `1` < e && "No second half of double precision argument");
4238	Register RegLo = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4239	Register RegHi = MF.addLiveIn(PReg: ArgLocs [++i].getLocReg(), RC);
4240	SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, Reg: RegLo, VT: MVT::i32);
4241	SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, Reg: RegHi, VT: MVT::i32);
4242	if (!Subtarget.isLittleEndian())
4243	std::swap (a&: ArgValueLo, b&: ArgValueHi);
4244	ArgValue = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: ArgValueLo,
4245	N2: ArgValueHi);
4246	} else {
4247	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4248	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4249	VT: ValVT == MVT::i1 ? MVT::i32 : ValVT);
4250	if (ValVT == MVT::i1)
4251	ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: ArgValue);
4252	}
4253
4254	InVals.push_back(Elt: ArgValue);
4255	} else {
4256	// Argument stored in memory.
4257	assert(VA.isMemLoc());
4258
4259	// Get the extended size of the argument type in stack
4260	unsigned ArgSize = VA.getLocVT().getStoreSize();
4261	// Get the actual size of the argument type
4262	unsigned ObjSize = VA.getValVT().getStoreSize();
4263	unsigned ArgOffset = VA.getLocMemOffset();
4264	// Stack objects in PPC32 are right justified.
4265	ArgOffset += ArgSize - ObjSize;
4266	int FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: isImmutable);
4267
4268	// Create load nodes to retrieve arguments from the stack.
4269	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4270	InVals.push_back(
4271	Elt: DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ()));
4272	}
4273	}
4274
4275	// Assign locations to all of the incoming aggregate by value arguments.
4276	// Aggregates passed by value are stored in the local variable space of the
4277	// caller's stack frame, right above the parameter list area.
4278	SmallVector<CCValAssign, `16`> ByValArgLocs;
4279	CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4280	ByValArgLocs, *DAG.getContext());
4281
4282	// Reserve stack space for the allocations in CCInfo.
4283	CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
4284
4285	CCByValInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4_ByVal);
4286
4287	// Area that is at least reserved in the caller of this function.
4288	unsigned MinReservedArea = CCByValInfo.getStackSize();
4289	MinReservedArea = std::max(a: MinReservedArea, b: LinkageSize);
4290
4291	// Set the size that is at least reserved in caller of this function. Tail
4292	// call optimized function's reserved stack space needs to be aligned so that
4293	// taking the difference between two stack areas will result in an aligned
4294	// stack.
4295	MinReservedArea =
4296	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4297	FuncInfo->setMinReservedArea(MinReservedArea);
4298
4299	SmallVector<SDValue, `8`> MemOps;
4300
4301	// If the function takes variable number of arguments, make a frame index for
4302	// the start of the first vararg value... for expansion of llvm.va_start.
4303	if (isVarArg) {
4304	static const MCPhysReg GPArgRegs[] = {
4305	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4306	PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4307	};
4308	const unsigned NumGPArgRegs = std::size(GPArgRegs);
4309
4310	static const MCPhysReg FPArgRegs[] = {
4311	PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4312	PPC::F8
4313	};
4314	unsigned NumFPArgRegs = std::size(FPArgRegs);
4315
4316	if (useSoftFloat() \|\| hasSPE())
4317	NumFPArgRegs = `0`;
4318
4319	FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(Regs: GPArgRegs));
4320	FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(Regs: FPArgRegs));
4321
4322	// Make room for NumGPArgRegs and NumFPArgRegs.
4323	int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/`8` +
4324	NumFPArgRegs * MVT (MVT::f64).getSizeInBits()/`8`;
4325
4326	FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4327	Size: PtrVT.getSizeInBits() / `8`, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
4328
4329	FuncInfo->setVarArgsFrameIndex(
4330	MFI.CreateStackObject(Size: Depth, Alignment: Align (`8`), isSpillSlot: false));
4331	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4332
4333	// The fixed integer arguments of a variadic function are stored to the
4334	// VarArgsFrameIndex on the stack so that they may be loaded by
4335	// dereferencing the result of va_next.
4336	for (MCPhysReg GPArgReg : GPArgRegs) {
4337	// Get an existing live-in vreg, or add a new one.
4338	Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: GPArgReg);
4339	if (!VReg)
4340	VReg = MF.addLiveIn(PReg: GPArgReg, RC: &PPC::GPRCRegClass);
4341
4342	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4343	SDValue Store =
4344	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4345	MemOps.push_back(Elt: Store);
4346	// Increment the address by four for the next argument to store
4347	SDValue PtrOff = DAG.getConstant(Val: PtrVT.getSizeInBits()/`8`, DL: dl, VT: PtrVT);
4348	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4349	}
4350
4351	// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4352	// is set.
4353	// The double arguments are stored to the VarArgsFrameIndex
4354	// on the stack.
4355	for (unsigned FPRIndex = `0`; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4356	// Get an existing live-in vreg, or add a new one.
4357	Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: FPArgRegs[FPRIndex]);
4358	if (!VReg)
4359	VReg = MF.addLiveIn(PReg: FPArgRegs[FPRIndex], RC: &PPC::F8RCRegClass);
4360
4361	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::f64);
4362	SDValue Store =
4363	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4364	MemOps.push_back(Elt: Store);
4365	// Increment the address by eight for the next argument to store
4366	SDValue PtrOff = DAG.getConstant(Val: MVT (MVT::f64).getSizeInBits()/`8`, DL: dl,
4367	VT: PtrVT);
4368	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4369	}
4370	}
4371
4372	if (!MemOps.empty())
4373	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4374
4375	return Chain;
4376	}
4377
4378	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4379	// value to MVT::i64 and then truncate to the correct register size.
4380	SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4381	EVT ObjectVT, SelectionDAG &DAG,
4382	SDValue ArgVal,
4383	const SDLoc &dl) const {
4384	if (Flags.isSExt())
4385	ArgVal = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: MVT::i64, N1: ArgVal,
4386	N2: DAG.getValueType(ObjectVT));
4387	else if (Flags.isZExt())
4388	ArgVal = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: MVT::i64, N1: ArgVal,
4389	N2: DAG.getValueType(ObjectVT));
4390
4391	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ObjectVT, Operand: ArgVal);
4392	}
4393
4394	SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4395	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4396	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4397	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4398	// TODO: add description of PPC stack frame format, or at least some docs.
4399	//
4400	bool isELFv2ABI = Subtarget.isELFv2ABI();
4401	bool isLittleEndian = Subtarget.isLittleEndian();
4402	MachineFunction &MF = DAG.getMachineFunction();
4403	MachineFrameInfo &MFI = MF.getFrameInfo();
4404	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4405
4406	assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4407	"fastcc not supported on varargs functions");
4408
4409	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4410	// Potential tail calls could cause overwriting of argument stack slots.
4411	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4412	(CallConv == CallingConv::Fast));
4413	unsigned PtrByteSize = `8`;
4414	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4415
4416	static const MCPhysReg GPR[] = {
4417	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4418	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4419	};
4420	static const MCPhysReg VR[] = {
4421	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4422	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4423	};
4424
4425	const unsigned Num_GPR_Regs = std::size(GPR);
4426	const unsigned Num_FPR_Regs = useSoftFloat() ? `0` : `13`;
4427	const unsigned Num_VR_Regs = std::size(VR);
4428
4429	// Do a first pass over the arguments to determine whether the ABI
4430	// guarantees that our caller has allocated the parameter save area
4431	// on its stack frame. In the ELFv1 ABI, this is always the case;
4432	// in the ELFv2 ABI, it is true if this is a vararg function or if
4433	// any parameter is located in a stack slot.
4434
4435	bool HasParameterArea = !isELFv2ABI \|\| isVarArg;
4436	unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4437	unsigned NumBytes = LinkageSize;
4438	unsigned AvailableFPRs = Num_FPR_Regs;
4439	unsigned AvailableVRs = Num_VR_Regs;
4440	for (const ISD::InputArg &In : Ins) {
4441	if (In.Flags.isNest())
4442	continue;
4443
4444	if (CalculateStackSlotUsed(ArgVT: In.VT, OrigVT: In.ArgVT, Flags: In.Flags, PtrByteSize,
4445	LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4446	AvailableFPRs, AvailableVRs))
4447	HasParameterArea = true;
4448	}
4449
4450	// Add DAG nodes to load the arguments or copy them out of registers. On
4451	// entry to a function on PPC, the arguments start after the linkage area,
4452	// although the first ones are often in registers.
4453
4454	unsigned ArgOffset = LinkageSize;
4455	unsigned GPR_idx = `0`, FPR_idx = `0`, VR_idx = `0`;
4456	SmallVector<SDValue, `8`> MemOps;
4457	Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4458	unsigned CurArgIdx = `0`;
4459	for (unsigned ArgNo = `0`, e = Ins.size(); ArgNo != e; ++ArgNo) {
4460	SDValue ArgVal;
4461	bool needsLoad = false;
4462	EVT ObjectVT = Ins [ArgNo].VT;
4463	EVT OrigVT = Ins [ArgNo].ArgVT;
4464	unsigned ObjSize = ObjectVT.getStoreSize();
4465	unsigned ArgSize = ObjSize;
4466	ISD::ArgFlagsTy Flags = Ins [ArgNo].Flags;
4467	if (Ins [ArgNo].isOrigArg()) {
4468	std::advance(i&: FuncArg, n: Ins [ArgNo].getOrigArgIndex() - CurArgIdx);
4469	CurArgIdx = Ins [ArgNo].getOrigArgIndex();
4470	}
4471	// We re-align the argument offset for each argument, except when using the
4472	// fast calling convention, when we need to make sure we do that only when
4473	// we'll actually use a stack slot.
4474	unsigned CurArgOffset;
4475	Align Alignment;
4476	auto ComputeArgOffset = [&]() {
4477	/ Respect alignment of argument on the stack. /
4478	Alignment =
4479	CalculateStackSlotAlignment(ArgVT: ObjectVT, OrigVT, Flags, PtrByteSize);
4480	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4481	CurArgOffset = ArgOffset;
4482	};
4483
4484	if (CallConv != CallingConv::Fast) {
4485	ComputeArgOffset ();
4486
4487	/ Compute GPR index associated with argument offset. /
4488	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4489	GPR_idx = std::min(a: GPR_idx, b: Num_GPR_Regs);
4490	}
4491
4492	// FIXME the codegen can be much improved in some cases.
4493	// We do not have to keep everything in memory.
4494	if (Flags.isByVal()) {
4495	assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4496
4497	if (CallConv == CallingConv::Fast)
4498	ComputeArgOffset ();
4499
4500	// ObjSize is the true size, ArgSize rounded up to multiple of registers.
4501	ObjSize = Flags.getByValSize();
4502	ArgSize = ((ObjSize + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4503	// Empty aggregate parameters do not take up registers. Examples:
4504	// struct { } a;
4505	// union { } b;
4506	// int c[0];
4507	// etc. However, we have to provide a place-holder in InVals, so
4508	// pretend we have an 8-byte item at the current address for that
4509	// purpose.
4510	if (!ObjSize) {
4511	int FI = MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: ArgOffset, IsImmutable: true);
4512	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4513	InVals.push_back(Elt: FIN);
4514	continue;
4515	}
4516
4517	// Create a stack object covering all stack doublewords occupied
4518	// by the argument. If the argument is (fully or partially) on
4519	// the stack, or if the argument is fully in registers but the
4520	// caller has allocated the parameter save anyway, we can refer
4521	// directly to the caller's stack frame. Otherwise, create a
4522	// local copy in our own frame.
4523	int FI;
4524	if (HasParameterArea \|\|
4525	ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4526	FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: false, isAliased: true);
4527	else
4528	FI = MFI.CreateStackObject(Size: ArgSize, Alignment, isSpillSlot: false);
4529	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4530
4531	// Handle aggregates smaller than 8 bytes.
4532	if (ObjSize < PtrByteSize) {
4533	// The value of the object is its address, which differs from the
4534	// address of the enclosing doubleword on big-endian systems.
4535	SDValue Arg = FIN;
4536	if (!isLittleEndian) {
4537	SDValue ArgOff = DAG.getConstant(Val: PtrByteSize - ObjSize, DL: dl, VT: PtrVT);
4538	Arg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ArgOff.getValueType(), N1: Arg, N2: ArgOff);
4539	}
4540	InVals.push_back(Elt: Arg);
4541
4542	if (GPR_idx != Num_GPR_Regs) {
4543	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4544	FuncInfo->addLiveInAttr(VReg, Flags);
4545	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4546	EVT ObjType = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: ObjSize `8`);
4547	SDValue Store =
4548	DAG.getTruncStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: Arg,
4549	PtrInfo: MachinePointerInfo (&*FuncArg), SVT: ObjType);
4550	MemOps.push_back(Elt: Store);
4551	}
4552	// Whether we copied from a register or not, advance the offset
4553	// into the parameter save area by a full doubleword.
4554	ArgOffset += PtrByteSize;
4555	continue;
4556	}
4557
4558	// The value of the object is its address, which is the address of
4559	// its first stack doubleword.
4560	InVals.push_back(Elt: FIN);
4561
4562	// Store whatever pieces of the object are in registers to memory.
4563	for (unsigned j = `0`; j < ArgSize; j += PtrByteSize) {
4564	if (GPR_idx == Num_GPR_Regs)
4565	break;
4566
4567	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4568	FuncInfo->addLiveInAttr(VReg, Flags);
4569	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4570	SDValue Addr = FIN;
4571	if (j) {
4572	SDValue Off = DAG.getConstant(Val: j, DL: dl, VT: PtrVT);
4573	Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Off.getValueType(), N1: Addr, N2: Off);
4574	}
4575	unsigned StoreSizeInBits = std::min(a: PtrByteSize, b: (ObjSize - j)) * `8`;
4576	EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreSizeInBits);
4577	SDValue Store =
4578	DAG.getTruncStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: Addr,
4579	PtrInfo: MachinePointerInfo (&*FuncArg, j), SVT: ObjType);
4580	MemOps.push_back(Elt: Store);
4581	++GPR_idx;
4582	}
4583	ArgOffset += ArgSize;
4584	continue;
4585	}
4586
4587	switch (ObjectVT.getSimpleVT().SimpleTy) {
4588	default: llvm_unreachable("Unhandled argument type!");
4589	case MVT::i1:
4590	case MVT::i32:
4591	case MVT::i64:
4592	if (Flags.isNest()) {
4593	// The 'nest' parameter, if any, is passed in R11.
4594	Register VReg = MF.addLiveIn(PReg: PPC::X11, RC: &PPC::G8RCRegClass);
4595	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4596
4597	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
4598	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4599
4600	break;
4601	}
4602
4603	// These can be scalar arguments or elements of an integer array type
4604	// passed directly. Clang may use those instead of "byval" aggregate
4605	// types to avoid forcing arguments to memory unnecessarily.
4606	if (GPR_idx != Num_GPR_Regs) {
4607	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4608	FuncInfo->addLiveInAttr(VReg, Flags);
4609	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4610
4611	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
4612	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4613	// value to MVT::i64 and then truncate to the correct register size.
4614	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4615	} else {
4616	if (CallConv == CallingConv::Fast)
4617	ComputeArgOffset ();
4618
4619	needsLoad = true;
4620	ArgSize = PtrByteSize;
4621	}
4622	if (CallConv != CallingConv::Fast \|\| needsLoad)
4623	ArgOffset += `8`;
4624	break;
4625
4626	case MVT::f32:
4627	case MVT::f64:
4628	// These can be scalar arguments or elements of a float array type
4629	// passed directly. The latter are used to implement ELFv2 homogenous
4630	// float aggregates.
4631	if (FPR_idx != Num_FPR_Regs) {
4632	unsigned VReg;
4633
4634	if (ObjectVT == MVT::f32)
4635	VReg = MF.addLiveIn(PReg: FPR[FPR_idx],
4636	RC: Subtarget.hasP8Vector()
4637	? &PPC::VSSRCRegClass
4638	: &PPC::F4RCRegClass);
4639	else
4640	VReg = MF.addLiveIn(PReg: FPR[FPR_idx], RC: Subtarget.hasVSX()
4641	? &PPC::VSFRCRegClass
4642	: &PPC::F8RCRegClass);
4643
4644	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4645	++FPR_idx;
4646	} else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4647	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4648	// once we support fp <-> gpr moves.
4649
4650	// This can only ever happen in the presence of f32 array types,
4651	// since otherwise we never run out of FPRs before running out
4652	// of GPRs.
4653	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4654	FuncInfo->addLiveInAttr(VReg, Flags);
4655	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4656
4657	if (ObjectVT == MVT::f32) {
4658	if ((ArgOffset % PtrByteSize) == (isLittleEndian ? `4` : `0`))
4659	ArgVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgVal,
4660	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i32));
4661	ArgVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: ArgVal);
4662	}
4663
4664	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ObjectVT, Operand: ArgVal);
4665	} else {
4666	if (CallConv == CallingConv::Fast)
4667	ComputeArgOffset ();
4668
4669	needsLoad = true;
4670	}
4671
4672	// When passing an array of floats, the array occupies consecutive
4673	// space in the argument area; only round up to the next doubleword
4674	// at the end of the array. Otherwise, each float takes 8 bytes.
4675	if (CallConv != CallingConv::Fast \|\| needsLoad) {
4676	ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4677	ArgOffset += ArgSize;
4678	if (Flags.isInConsecutiveRegsLast())
4679	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4680	}
4681	break;
4682	case MVT::v4f32:
4683	case MVT::v4i32:
4684	case MVT::v8i16:
4685	case MVT::v16i8:
4686	case MVT::v2f64:
4687	case MVT::v2i64:
4688	case MVT::v1i128:
4689	case MVT::f128:
4690	// These can be scalar arguments or elements of a vector array type
4691	// passed directly. The latter are used to implement ELFv2 homogenous
4692	// vector aggregates.
4693	if (VR_idx != Num_VR_Regs) {
4694	Register VReg = MF.addLiveIn(PReg: VR[VR_idx], RC: &PPC::VRRCRegClass);
4695	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4696	++VR_idx;
4697	} else {
4698	if (CallConv == CallingConv::Fast)
4699	ComputeArgOffset ();
4700	needsLoad = true;
4701	}
4702	if (CallConv != CallingConv::Fast \|\| needsLoad)
4703	ArgOffset += `16`;
4704	break;
4705	}
4706
4707	// We need to load the argument to a virtual register if we determined
4708	// above that we ran out of physical registers of the appropriate type.
4709	if (needsLoad) {
4710	if (ObjSize < ArgSize && !isLittleEndian)
4711	CurArgOffset += ArgSize - ObjSize;
4712	int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: CurArgOffset, IsImmutable: isImmutable);
4713	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4714	ArgVal = DAG.getLoad(VT: ObjectVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4715	}
4716
4717	InVals.push_back(Elt: ArgVal);
4718	}
4719
4720	// Area that is at least reserved in the caller of this function.
4721	unsigned MinReservedArea;
4722	if (HasParameterArea)
4723	MinReservedArea = std::max(a: ArgOffset, b: LinkageSize + `8` * PtrByteSize);
4724	else
4725	MinReservedArea = LinkageSize;
4726
4727	// Set the size that is at least reserved in caller of this function. Tail
4728	// call optimized functions' reserved stack space needs to be aligned so that
4729	// taking the difference between two stack areas will result in an aligned
4730	// stack.
4731	MinReservedArea =
4732	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4733	FuncInfo->setMinReservedArea(MinReservedArea);
4734
4735	// If the function takes variable number of arguments, make a frame index for
4736	// the start of the first vararg value... for expansion of llvm.va_start.
4737	// On ELFv2ABI spec, it writes:
4738	// C programs that are intended to be portable* across different compilers*
4739	// and architectures must use the header file <stdarg.h> to deal with variable
4740	// argument lists.
4741	if (isVarArg && MFI.hasVAStart()) {
4742	int Depth = ArgOffset;
4743
4744	FuncInfo->setVarArgsFrameIndex(
4745	MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: Depth, IsImmutable: true));
4746	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4747
4748	// If this function is vararg, store any remaining integer argument regs
4749	// to their spots on the stack so that they may be loaded by dereferencing
4750	// the result of va_next.
4751	for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4752	GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4753	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4754	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4755	SDValue Store =
4756	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4757	MemOps.push_back(Elt: Store);
4758	// Increment the address by four for the next argument to store
4759	SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
4760	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4761	}
4762	}
4763
4764	if (!MemOps.empty())
4765	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4766
4767	return Chain;
4768	}
4769
4770	/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4771	/// adjusted to accommodate the arguments for the tailcall.
4772	static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4773	unsigned ParamSize) {
4774
4775	if (!isTailCall) return `0`;
4776
4777	PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4778	unsigned CallerMinReservedArea = FI->getMinReservedArea();
4779	int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4780	// Remember only if the new adjustment is bigger.
4781	if (SPDiff < FI->getTailCallSPDelta())
4782	FI->setTailCallSPDelta(SPDiff);
4783
4784	return SPDiff;
4785	}
4786
4787	static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4788
4789	static bool callsShareTOCBase(const Function *Caller,
4790	const GlobalValue *CalleeGV,
4791	const TargetMachine &TM) {
4792	// It does not make sense to call callsShareTOCBase() with a caller that
4793	// is PC Relative since PC Relative callers do not have a TOC.
4794	#ifndef NDEBUG
4795	const PPCSubtarget STICaller = &TM.getSubtarget<PPCSubtarget>(Caller);
4796	assert(!STICaller->isUsingPCRelativeCalls() &&
4797	"PC Relative callers do not have a TOC and cannot share a TOC Base");
4798	#endif
4799
4800	// Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4801	// don't have enough information to determine if the caller and callee share
4802	// the same TOC base, so we have to pessimistically assume they don't for
4803	// correctness.
4804	if (!CalleeGV)
4805	return false;
4806
4807	// If the callee is preemptable, then the static linker will use a plt-stub
4808	// which saves the toc to the stack, and needs a nop after the call
4809	// instruction to convert to a toc-restore.
4810	if (!TM.shouldAssumeDSOLocal(GV: CalleeGV))
4811	return false;
4812
4813	// Functions with PC Relative enabled may clobber the TOC in the same DSO.
4814	// We may need a TOC restore in the situation where the caller requires a
4815	// valid TOC but the callee is PC Relative and does not.
4816	const Function *F = dyn_cast<Function>(Val: CalleeGV);
4817	const GlobalAlias *Alias = dyn_cast<GlobalAlias>(Val: CalleeGV);
4818
4819	// If we have an Alias we can try to get the function from there.
4820	if (Alias) {
4821	const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4822	F = dyn_cast<Function>(Val: GlobalObj);
4823	}
4824
4825	// If we still have no valid function pointer we do not have enough
4826	// information to determine if the callee uses PC Relative calls so we must
4827	// assume that it does.
4828	if (!F)
4829	return false;
4830
4831	// If the callee uses PC Relative we cannot guarantee that the callee won't
4832	// clobber the TOC of the caller and so we must assume that the two
4833	// functions do not share a TOC base.
4834	const PPCSubtarget STICallee = &TM.getSubtarget<PPCSubtarget>(F: F);
4835	if (STICallee->isUsingPCRelativeCalls())
4836	return false;
4837
4838	// If the GV is not a strong definition then we need to assume it can be
4839	// replaced by another function at link time. The function that replaces
4840	// it may not share the same TOC as the caller since the callee may be
4841	// replaced by a PC Relative version of the same function.
4842	if (!CalleeGV->isStrongDefinitionForLinker())
4843	return false;
4844
4845	// The medium and large code models are expected to provide a sufficiently
4846	// large TOC to provide all data addressing needs of a module with a
4847	// single TOC.
4848	if (CodeModel::Medium == TM.getCodeModel() \|\|
4849	CodeModel::Large == TM.getCodeModel())
4850	return true;
4851
4852	// Any explicitly-specified sections and section prefixes must also match.
4853	// Also, if we're using -ffunction-sections, then each function is always in
4854	// a different section (the same is true for COMDAT functions).
4855	if (TM.getFunctionSections() \|\| CalleeGV->hasComdat() \|\|
4856	Caller->hasComdat() \|\| CalleeGV->getSection() != Caller->getSection())
4857	return false;
4858	if (const auto *F = dyn_cast<Function>(Val: CalleeGV)) {
4859	if (F->getSectionPrefix() != Caller->getSectionPrefix())
4860	return false;
4861	}
4862
4863	return true;
4864	}
4865
4866	static bool
4867	needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4868	const SmallVectorImpl<ISD::OutputArg> &Outs) {
4869	assert(Subtarget.is64BitELFABI());
4870
4871	const unsigned PtrByteSize = `8`;
4872	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4873
4874	static const MCPhysReg GPR[] = {
4875	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4876	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4877	};
4878	static const MCPhysReg VR[] = {
4879	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4880	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4881	};
4882
4883	const unsigned NumGPRs = std::size(GPR);
4884	const unsigned NumFPRs = `13`;
4885	const unsigned NumVRs = std::size(VR);
4886	const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4887
4888	unsigned NumBytes = LinkageSize;
4889	unsigned AvailableFPRs = NumFPRs;
4890	unsigned AvailableVRs = NumVRs;
4891
4892	for (const ISD::OutputArg& Param : Outs) {
4893	if (Param.Flags.isNest()) continue;
4894
4895	if (CalculateStackSlotUsed(ArgVT: Param.VT, OrigVT: Param.ArgVT, Flags: Param.Flags, PtrByteSize,
4896	LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4897	AvailableFPRs, AvailableVRs))
4898	return true;
4899	}
4900	return false;
4901	}
4902
4903	static bool hasSameArgumentList(const Function CallerFn, const* CallBase &CB) {
4904	if (CB.arg_size() != CallerFn->arg_size())
4905	return false;
4906
4907	auto CalleeArgIter = CB.arg_begin();
4908	auto CalleeArgEnd = CB.arg_end();
4909	Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4910
4911	for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4912	const Value* CalleeArg = *CalleeArgIter;
4913	const Value* CallerArg = &(*CallerArgIter);
4914	if (CalleeArg == CallerArg)
4915	continue;
4916
4917	// e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4918	// tail call @callee([4 x i64] undef, [4 x i64] %b)
4919	// }
4920	// 1st argument of callee is undef and has the same type as caller.
4921	if (CalleeArg->getType() == CallerArg->getType() &&
4922	isa<UndefValue>(Val: CalleeArg))
4923	continue;
4924
4925	return false;
4926	}
4927
4928	return true;
4929	}
4930
4931	// Returns true if TCO is possible between the callers and callees
4932	// calling conventions.
4933	static bool
4934	areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4935	CallingConv::ID CalleeCC) {
4936	// Tail calls are possible with fastcc and ccc.
4937	auto isTailCallableCC = [] (CallingConv::ID CC){
4938	return CC == CallingConv::C \|\| CC == CallingConv::Fast;
4939	};
4940	if (!isTailCallableCC (CallerCC) \|\| !isTailCallableCC (CalleeCC))
4941	return false;
4942
4943	// We can safely tail call both fastcc and ccc callees from a c calling
4944	// convention caller. If the caller is fastcc, we may have less stack space
4945	// than a non-fastcc caller with the same signature so disable tail-calls in
4946	// that case.
4947	return CallerCC == CallingConv::C \|\| CallerCC == CalleeCC;
4948	}
4949
4950	bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4951	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4952	CallingConv::ID CallerCC, const CallBase CB, bool* isVarArg,
4953	const SmallVectorImpl<ISD::OutputArg> &Outs,
4954	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4955	bool isCalleeExternalSymbol) const {
4956	bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4957
4958	if (DisableSCO && !TailCallOpt) return false;
4959
4960	// Variadic argument functions are not supported.
4961	if (isVarArg) return false;
4962
4963	// Check that the calling conventions are compatible for tco.
4964	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4965	return false;
4966
4967	// Caller contains any byval parameter is not supported.
4968	if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4969	return false;
4970
4971	// Callee contains any byval parameter is not supported, too.
4972	// Note: This is a quick work around, because in some cases, e.g.
4973	// caller's stack size > callee's stack size, we are still able to apply
4974	// sibling call optimization. For example, gcc is able to do SCO for caller1
4975	// in the following example, but not for caller2.
4976	// struct test {
4977	// long int a;
4978	// char ary[56];
4979	// } gTest;
4980	// __attribute__((noinline)) int callee(struct test v, struct test b) {*
4981	// b->a = v.a;
4982	// return 0;
4983	// }
4984	// void caller1(struct test a, struct test c, struct test b) {*
4985	// callee(gTest, b); }
4986	// void caller2(struct test b) { callee(gTest, b); }*
4987	if (any_of(Range: Outs, P: [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4988	return false;
4989
4990	// If callee and caller use different calling conventions, we cannot pass
4991	// parameters on stack since offsets for the parameter area may be different.
4992	if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4993	return false;
4994
4995	// All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4996	// the caller and callee share the same TOC for TCO/SCO. If the caller and
4997	// callee potentially have different TOC bases then we cannot tail call since
4998	// we need to restore the TOC pointer after the call.
4999	// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5000	// We cannot guarantee this for indirect calls or calls to external functions.
5001	// When PC-Relative addressing is used, the concept of the TOC is no longer
5002	// applicable so this check is not required.
5003	// Check first for indirect calls.
5004	if (!Subtarget.isUsingPCRelativeCalls() &&
5005	!isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5006	return false;
5007
5008	// Check if we share the TOC base.
5009	if (!Subtarget.isUsingPCRelativeCalls() &&
5010	!callsShareTOCBase(Caller: CallerFunc, CalleeGV, TM: getTargetMachine()))
5011	return false;
5012
5013	// TCO allows altering callee ABI, so we don't have to check further.
5014	if (CalleeCC == CallingConv::Fast && TailCallOpt)
5015	return true;
5016
5017	if (DisableSCO) return false;
5018
5019	// If callee use the same argument list that caller is using, then we can
5020	// apply SCO on this case. If it is not, then we need to check if callee needs
5021	// stack for passing arguments.
5022	// PC Relative tail calls may not have a CallBase.
5023	// If there is no CallBase we cannot verify if we have the same argument
5024	// list so assume that we don't have the same argument list.
5025	if (CB && !hasSameArgumentList(CallerFn: CallerFunc, CB: *CB) &&
5026	needStackSlotPassParameters(Subtarget, Outs))
5027	return false;
5028	else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5029	return false;
5030
5031	return true;
5032	}
5033
5034	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5035	/// for tail call optimization. Targets which want to do tail call
5036	/// optimization should implement this function.
5037	bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5038	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5039	CallingConv::ID CallerCC, bool isVarArg,
5040	const SmallVectorImpl<ISD::InputArg> &Ins) const {
5041	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5042	return false;
5043
5044	// Variable argument functions are not supported.
5045	if (isVarArg)
5046	return false;
5047
5048	if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5049	// Functions containing by val parameters are not supported.
5050	if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5051	return false;
5052
5053	// Non-PIC/GOT tail calls are supported.
5054	if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5055	return true;
5056
5057	// At the moment we can only do local tail calls (in same module, hidden
5058	// or protected) if we are generating PIC.
5059	if (CalleeGV)
5060	return CalleeGV->hasHiddenVisibility() \|\|
5061	CalleeGV->hasProtectedVisibility();
5062	}
5063
5064	return false;
5065	}
5066
5067	/// isCallCompatibleAddress - Return the immediate to use if the specified
5068	/// 32-bit value is representable in the immediate field of a BxA instruction.
5069	static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5070	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Op);
5071	if (!C) return nullptr;
5072
5073	int Addr = C->getZExtValue();
5074	if ((Addr & `3`) != `0` \|\| // Low 2 bits are implicitly zero.
5075	SignExtend32<`26`>(X: Addr) != Addr)
5076	return nullptr; // Top 6 bits have to be sext of immediate.
5077
5078	return DAG
5079	.getSignedConstant(
5080	Val: (int)C->getZExtValue() >> `2`, DL: SDLoc (Op),
5081	VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()))
5082	.getNode();
5083	}
5084
5085	namespace {
5086
5087	struct TailCallArgumentInfo {
5088	SDValue Arg;
5089	SDValue FrameIdxOp;
5090	int FrameIdx = `0`;
5091
5092	TailCallArgumentInfo() = default;
5093	};
5094
5095	} // end anonymous namespace
5096
5097	/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5098	static void StoreTailCallArgumentsToStackSlot(
5099	SelectionDAG &DAG, SDValue Chain,
5100	const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5101	SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5102	for (unsigned i = `0`, e = TailCallArgs.size(); i != e; ++i) {
5103	SDValue Arg = TailCallArgs [i].Arg;
5104	SDValue FIN = TailCallArgs [i].FrameIdxOp;
5105	int FI = TailCallArgs [i].FrameIdx;
5106	// Store relative to framepointer.
5107	MemOpChains.push_back(Elt: DAG.getStore(
5108	Chain, dl, Val: Arg, Ptr: FIN,
5109	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
5110	}
5111	}
5112
5113	/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5114	/// the appropriate stack slot for the tail call optimized function call.
5115	static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5116	SDValue OldRetAddr, SDValue OldFP,
5117	int SPDiff, const SDLoc &dl) {
5118	if (SPDiff) {
5119	// Calculate the new stack slot for the return address.
5120	MachineFunction &MF = DAG.getMachineFunction();
5121	const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5122	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5123	int SlotSize = Subtarget.isPPC64() ? `8` : `4`;
5124	int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5125	int NewRetAddr = MF.getFrameInfo().CreateFixedObject(Size: SlotSize,
5126	SPOffset: NewRetAddrLoc, IsImmutable: true);
5127	SDValue NewRetAddrFrIdx =
5128	DAG.getFrameIndex(FI: NewRetAddr, VT: Subtarget.getScalarIntVT());
5129	Chain = DAG.getStore(Chain, dl, Val: OldRetAddr, Ptr: NewRetAddrFrIdx,
5130	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: NewRetAddr));
5131	}
5132	return Chain;
5133	}
5134
5135	/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5136	/// the position of the argument.
5137	static void CalculateTailCallArgDest(
5138	SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5139	int SPDiff, unsigned ArgOffset,
5140	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5141	int Offset = ArgOffset + SPDiff;
5142	uint32_t OpSize = (Arg.getValueSizeInBits() + `7`) / `8`;
5143	int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
5144	EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5145	SDValue FIN = DAG.getFrameIndex(FI, VT);
5146	TailCallArgumentInfo Info;
5147	Info.Arg = Arg;
5148	Info.FrameIdxOp = FIN;
5149	Info.FrameIdx = FI;
5150	TailCallArguments.push_back(Elt: Info);
5151	}
5152
5153	/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5154	/// stack slot. Returns the chain as result and the loaded frame pointers in
5155	/// LROpOut/FPOpout. Used when tail calling.
5156	SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5157	SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5158	SDValue &FPOpOut, const SDLoc &dl) const {
5159	if (SPDiff) {
5160	// Load the LR and FP stack slot for later adjusting.
5161	LROpOut = getReturnAddrFrameIndex(DAG);
5162	LROpOut = DAG.getLoad(VT: Subtarget.getScalarIntVT(), dl, Chain, Ptr: LROpOut,
5163	PtrInfo: MachinePointerInfo ());
5164	Chain = SDValue (LROpOut.getNode(), `1`);
5165	}
5166	return Chain;
5167	}
5168
5169	/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5170	/// by "Src" to address "Dst" of size "Size". Alignment information is
5171	/// specified by the specific parameter attribute. The copy will be passed as
5172	/// a byval function parameter.
5173	/// Sometimes what we are copying is the end of a larger object, the part that
5174	/// does not fit in registers.
5175	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5176	SDValue Chain, ISD::ArgFlagsTy Flags,
5177	SelectionDAG &DAG, const SDLoc &dl) {
5178	SDValue SizeNode = DAG.getConstant(Val: Flags.getByValSize(), DL: dl, VT: MVT::i32);
5179	return DAG.getMemcpy(
5180	Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(), isVol: false, AlwaysInline: false,
5181	/CI=/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
5182	}
5183
5184	/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5185	/// tail calls.
5186	static void LowerMemOpCallTo(
5187	SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5188	SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5189	bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5190	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5191	EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5192	if (!isTailCall) {
5193	if (isVector) {
5194	SDValue StackPtr;
5195	if (isPPC64)
5196	StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
5197	else
5198	StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5199	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr,
5200	N2: DAG.getConstant(Val: ArgOffset, DL: dl, VT: PtrVT));
5201	}
5202	MemOpChains.push_back(
5203	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
5204	// Calculate and remember argument location.
5205	} else
5206	CalculateTailCallArgDest(DAG, MF, IsPPC64: isPPC64, Arg, SPDiff, ArgOffset,
5207	TailCallArguments);
5208	}
5209
5210	static void
5211	PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5212	const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5213	SDValue FPOp,
5214	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5215	// Emit a sequence of copyto/copyfrom virtual registers for arguments that
5216	// might overwrite each other in case of tail call optimization.
5217	SmallVector<SDValue, `8`> MemOpChains2;
5218	// Do not flag preceding copytoreg stuff together with the following stuff.
5219	InGlue = SDValue ();
5220	StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArgs: TailCallArguments,
5221	MemOpChains&: MemOpChains2, dl);
5222	if (!MemOpChains2.empty())
5223	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
5224
5225	// Store the return address to the appropriate stack slot.
5226	Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, OldRetAddr: LROp, OldFP: FPOp, SPDiff, dl);
5227
5228	// Emit callseq_end just before tailcall node.
5229	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue: InGlue, DL: dl);
5230	InGlue = Chain.getValue(R: `1`);
5231	}
5232
5233	// Is this global address that of a function that can be called by name? (as
5234	// opposed to something that must hold a descriptor for an indirect call).
5235	static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5236	if (GV) {
5237	if (GV->isThreadLocal())
5238	return false;
5239
5240	return GV->getValueType()->isFunctionTy();
5241	}
5242
5243	return false;
5244	}
5245
5246	SDValue PPCTargetLowering::LowerCallResult(
5247	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5248	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5249	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5250	SmallVector<CCValAssign, `16`> RVLocs;
5251	CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5252	*DAG.getContext());
5253
5254	CCRetInfo.AnalyzeCallResult(
5255	Ins, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5256	? RetCC_PPC_Cold
5257	: RetCC_PPC);
5258
5259	// Copy all of the result registers out of their specified physreg.
5260	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
5261	CCValAssign &VA = RVLocs [i];
5262	assert(VA.isRegLoc() && "Can only return in registers!");
5263
5264	SDValue Val;
5265
5266	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5267	SDValue Lo = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5268	Glue: InGlue);
5269	Chain = Lo.getValue(R: `1`);
5270	InGlue = Lo.getValue(R: `2`);
5271	VA = RVLocs [++i]; // skip ahead to next loc
5272	SDValue Hi = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5273	Glue: InGlue);
5274	Chain = Hi.getValue(R: `1`);
5275	InGlue = Hi.getValue(R: `2`);
5276	if (!Subtarget.isLittleEndian())
5277	std::swap (a&: Lo, b&: Hi);
5278	Val = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
5279	} else {
5280	Val = DAG.getCopyFromReg(Chain, dl,
5281	Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue: InGlue);
5282	Chain = Val.getValue(R: `1`);
5283	InGlue = Val.getValue(R: `2`);
5284	}
5285
5286	switch (VA.getLocInfo()) {
5287	default: llvm_unreachable("Unknown loc info!");
5288	case CCValAssign::Full: break;
5289	case CCValAssign::AExt:
5290	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5291	break;
5292	case CCValAssign::ZExt:
5293	Val = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: VA.getLocVT(), N1: Val,
5294	N2: DAG.getValueType(VA.getValVT()));
5295	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5296	break;
5297	case CCValAssign::SExt:
5298	Val = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: VA.getLocVT(), N1: Val,
5299	N2: DAG.getValueType(VA.getValVT()));
5300	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5301	break;
5302	}
5303
5304	InVals.push_back(Elt: Val);
5305	}
5306
5307	return Chain;
5308	}
5309
5310	static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5311	const PPCSubtarget &Subtarget, bool isPatchPoint) {
5312	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5313	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5314
5315	// PatchPoint calls are not indirect.
5316	if (isPatchPoint)
5317	return false;
5318
5319	if (isFunctionGlobalAddress(GV) \|\| isa<ExternalSymbolSDNode>(Val: Callee))
5320	return false;
5321
5322	// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5323	// becuase the immediate function pointer points to a descriptor instead of
5324	// a function entry point. The ELFv2 ABI cannot use a BLA because the function
5325	// pointer immediate points to the global entry point, while the BLA would
5326	// need to jump to the local entry point (see rL211174).
5327	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5328	isBLACompatibleAddress(Op: Callee, DAG))
5329	return false;
5330
5331	return true;
5332	}
5333
5334	// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5335	static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5336	return Subtarget.isAIXABI() \|\|
5337	(Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5338	}
5339
5340	static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5341	const Function &Caller, const SDValue &Callee,
5342	const PPCSubtarget &Subtarget,
5343	const TargetMachine &TM,
5344	bool IsStrictFPCall = false) {
5345	if (CFlags.IsTailCall)
5346	return PPCISD::TC_RETURN;
5347
5348	unsigned RetOpc = `0`;
5349	// This is a call through a function pointer.
5350	if (CFlags.IsIndirect) {
5351	// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5352	// indirect calls. The save of the caller's TOC pointer to the stack will be
5353	// inserted into the DAG as part of call lowering. The restore of the TOC
5354	// pointer is modeled by using a pseudo instruction for the call opcode that
5355	// represents the 2 instruction sequence of an indirect branch and link,
5356	// immediately followed by a load of the TOC pointer from the stack save
5357	// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5358	// as it is not saved or used.
5359	RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5360	: PPCISD::BCTRL;
5361	} else if (Subtarget.isUsingPCRelativeCalls()) {
5362	assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5363	RetOpc = PPCISD::CALL_NOTOC;
5364	} else if (Subtarget.isAIXABI() \|\| Subtarget.is64BitELFABI()) {
5365	// The ABIs that maintain a TOC pointer accross calls need to have a nop
5366	// immediately following the call instruction if the caller and callee may
5367	// have different TOC bases. At link time if the linker determines the calls
5368	// may not share a TOC base, the call is redirected to a trampoline inserted
5369	// by the linker. The trampoline will (among other things) save the callers
5370	// TOC pointer at an ABI designated offset in the linkage area and the
5371	// linker will rewrite the nop to be a load of the TOC pointer from the
5372	// linkage area into gpr2.
5373	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5374	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5375	RetOpc =
5376	callsShareTOCBase(Caller: &Caller, CalleeGV: GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5377	} else
5378	RetOpc = PPCISD::CALL;
5379	if (IsStrictFPCall) {
5380	switch (RetOpc) {
5381	default:
5382	llvm_unreachable("Unknown call opcode");
5383	case PPCISD::BCTRL_LOAD_TOC:
5384	RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5385	break;
5386	case PPCISD::BCTRL:
5387	RetOpc = PPCISD::BCTRL_RM;
5388	break;
5389	case PPCISD::CALL_NOTOC:
5390	RetOpc = PPCISD::CALL_NOTOC_RM;
5391	break;
5392	case PPCISD::CALL:
5393	RetOpc = PPCISD::CALL_RM;
5394	break;
5395	case PPCISD::CALL_NOP:
5396	RetOpc = PPCISD::CALL_NOP_RM;
5397	break;
5398	}
5399	}
5400	return RetOpc;
5401	}
5402
5403	static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5404	const SDLoc &dl, const PPCSubtarget &Subtarget) {
5405	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5406	if (SDNode *Dest = isBLACompatibleAddress(Op: Callee, DAG))
5407	return SDValue (Dest, `0`);
5408
5409	// Returns true if the callee is local, and false otherwise.
5410	auto isLocalCallee = [&]() {
5411	const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5412	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5413
5414	return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5415	!isa_and_nonnull<GlobalIFunc>(Val: GV);
5416	};
5417
5418	// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5419	// a static relocation model causes some versions of GNU LD (2.17.50, at
5420	// least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5421	// built with secure-PLT.
5422	bool UsePlt =
5423	Subtarget.is32BitELFABI() && !isLocalCallee () &&
5424	Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5425
5426	const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5427	const TargetMachine &TM = Subtarget.getTargetMachine();
5428	const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5429	auto *S =
5430	static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(Func: GV, TM));
5431
5432	MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5433	return DAG.getMCSymbol(Sym: S, VT: PtrVT);
5434	};
5435
5436	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5437	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5438	if (isFunctionGlobalAddress(GV)) {
5439	const GlobalValue *GV = cast<GlobalAddressSDNode>(Val: Callee)->getGlobal();
5440
5441	if (Subtarget.isAIXABI()) {
5442	return getAIXFuncEntryPointSymbolSDNode (GV);
5443	}
5444	return DAG.getTargetGlobalAddress(GV, DL: dl, VT: Callee.getValueType(), offset: `0`,
5445	TargetFlags: UsePlt ? PPCII::MO_PLT : `0`);
5446	}
5447
5448	if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val: Callee)) {
5449	const char *SymName = S->getSymbol();
5450	if (Subtarget.isAIXABI()) {
5451	// If there exists a user-declared function whose name is the same as the
5452	// ExternalSymbol's, then we pick up the user-declared version.
5453	const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5454	if (const Function *F =
5455	dyn_cast_or_null<Function>(Val: Mod->getNamedValue(Name: SymName)))
5456	return getAIXFuncEntryPointSymbolSDNode (F);
5457
5458	// On AIX, direct function calls reference the symbol for the function's
5459	// entry point, which is named by prepending a "." before the function's
5460	// C-linkage name. A Qualname is returned here because an external
5461	// function entry point is a csect with XTY_ER property.
5462	const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5463	auto &Context = DAG.getMachineFunction().getContext();
5464	MCSectionXCOFF *Sec = Context.getXCOFFSection(
5465	Section: (Twine (".") + Twine (SymName)).str(), K: SectionKind::getMetadata(),
5466	CsectProp: XCOFF::CsectProperties (XCOFF::XMC_PR, XCOFF::XTY_ER));
5467	return Sec->getQualNameSymbol();
5468	};
5469
5470	SymName = getExternalFunctionEntryPointSymbol (SymName)->getName().data();
5471	}
5472	return DAG.getTargetExternalSymbol(Sym: SymName, VT: Callee.getValueType(),
5473	TargetFlags: UsePlt ? PPCII::MO_PLT : `0`);
5474	}
5475
5476	// No transformation needed.
5477	assert(Callee.getNode() && "What no callee?");
5478	return Callee;
5479	}
5480
5481	static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5482	assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5483	"Expected a CALLSEQ_STARTSDNode.");
5484
5485	// The last operand is the chain, except when the node has glue. If the node
5486	// has glue, then the last operand is the glue, and the chain is the second
5487	// last operand.
5488	SDValue LastValue = CallSeqStart.getValue(R: CallSeqStart ->getNumValues() - `1`);
5489	if (LastValue.getValueType() != MVT::Glue)
5490	return LastValue;
5491
5492	return CallSeqStart.getValue(R: CallSeqStart ->getNumValues() - `2`);
5493	}
5494
5495	// Creates the node that moves a functions address into the count register
5496	// to prepare for an indirect call instruction.
5497	static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5498	SDValue &Glue, SDValue &Chain,
5499	const SDLoc &dl) {
5500	SDValue MTCTROps[] = {Chain, Callee, Glue};
5501	EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5502	Chain = DAG.getNode(Opcode: PPCISD::MTCTR, DL: dl, ResultTys: ReturnTypes,
5503	Ops: ArrayRef(MTCTROps, Glue.getNode() ? `3` : `2`));
5504	// The glue is the second value produced.
5505	Glue = Chain.getValue(R: `1`);
5506	}
5507
5508	static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5509	SDValue &Glue, SDValue &Chain,
5510	SDValue CallSeqStart,
5511	const CallBase CB, const* SDLoc &dl,
5512	bool hasNest,
5513	const PPCSubtarget &Subtarget) {
5514	// Function pointers in the 64-bit SVR4 ABI do not point to the function
5515	// entry point, but to the function descriptor (the function entry point
5516	// address is part of the function descriptor though).
5517	// The function descriptor is a three doubleword structure with the
5518	// following fields: function entry point, TOC base address and
5519	// environment pointer.
5520	// Thus for a call through a function pointer, the following actions need
5521	// to be performed:
5522	// 1. Save the TOC of the caller in the TOC save area of its stack
5523	// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5524	// 2. Load the address of the function entry point from the function
5525	// descriptor.
5526	// 3. Load the TOC of the callee from the function descriptor into r2.
5527	// 4. Load the environment pointer from the function descriptor into
5528	// r11.
5529	// 5. Branch to the function entry point address.
5530	// 6. On return of the callee, the TOC of the caller needs to be
5531	// restored (this is done in FinishCall()).
5532	//
5533	// The loads are scheduled at the beginning of the call sequence, and the
5534	// register copies are flagged together to ensure that no other
5535	// operations can be scheduled in between. E.g. without flagging the
5536	// copies together, a TOC access in the caller could be scheduled between
5537	// the assignment of the callee TOC and the branch to the callee, which leads
5538	// to incorrect code.
5539
5540	// Start by loading the function address from the descriptor.
5541	SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5542	auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5543	? (MachineMemOperand::MODereferenceable \|
5544	MachineMemOperand::MOInvariant)
5545	: MachineMemOperand::MONone;
5546
5547	MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5548
5549	// Registers used in building the DAG.
5550	const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5551	const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5552
5553	// Offsets of descriptor members.
5554	const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5555	const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5556
5557	const MVT RegVT = Subtarget.getScalarIntVT();
5558	const Align Alignment = Subtarget.isPPC64() ? Align (`8`) : Align (`4`);
5559
5560	// One load for the functions entry point address.
5561	SDValue LoadFuncPtr = DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: Callee, PtrInfo: MPI,
5562	Alignment, MMOFlags);
5563
5564	// One for loading the TOC anchor for the module that contains the called
5565	// function.
5566	SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCAnchorOffset, DL: dl);
5567	SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: TOCOff);
5568	SDValue TOCPtr =
5569	DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddTOC,
5570	PtrInfo: MPI.getWithOffset(O: TOCAnchorOffset), Alignment, MMOFlags);
5571
5572	// One for loading the environment pointer.
5573	SDValue PtrOff = DAG.getIntPtrConstant(Val: EnvPtrOffset, DL: dl);
5574	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: PtrOff);
5575	SDValue LoadEnvPtr =
5576	DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddPtr,
5577	PtrInfo: MPI.getWithOffset(O: EnvPtrOffset), Alignment, MMOFlags);
5578
5579
5580	// Then copy the newly loaded TOC anchor to the TOC pointer.
5581	SDValue TOCVal = DAG.getCopyToReg(Chain, dl, Reg: TOCReg, N: TOCPtr, Glue);
5582	Chain = TOCVal.getValue(R: `0`);
5583	Glue = TOCVal.getValue(R: `1`);
5584
5585	// If the function call has an explicit 'nest' parameter, it takes the
5586	// place of the environment pointer.
5587	assert((!hasNest \|\| !Subtarget.isAIXABI()) &&
5588	"Nest parameter is not supported on AIX.");
5589	if (!hasNest) {
5590	SDValue EnvVal = DAG.getCopyToReg(Chain, dl, Reg: EnvPtrReg, N: LoadEnvPtr, Glue);
5591	Chain = EnvVal.getValue(R: `0`);
5592	Glue = EnvVal.getValue(R: `1`);
5593	}
5594
5595	// The rest of the indirect call sequence is the same as the non-descriptor
5596	// DAG.
5597	prepareIndirectCall(DAG, Callee&: LoadFuncPtr, Glue, Chain, dl);
5598	}
5599
5600	static void
5601	buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5602	PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5603	SelectionDAG &DAG,
5604	SmallVector<std::pair<unsigned, SDValue>, `8`> &RegsToPass,
5605	SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5606	const PPCSubtarget &Subtarget) {
5607	const bool IsPPC64 = Subtarget.isPPC64();
5608	// MVT for a general purpose register.
5609	const MVT RegVT = Subtarget.getScalarIntVT();
5610
5611	// First operand is always the chain.
5612	Ops.push_back(Elt: Chain);
5613
5614	// If it's a direct call pass the callee as the second operand.
5615	if (!CFlags.IsIndirect)
5616	Ops.push_back(Elt: Callee);
5617	else {
5618	assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5619
5620	// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5621	// on the stack (this would have been done in `LowerCall_64SVR4` or
5622	// `LowerCall_AIX`). The call instruction is a pseudo instruction that
5623	// represents both the indirect branch and a load that restores the TOC
5624	// pointer from the linkage area. The operand for the TOC restore is an add
5625	// of the TOC save offset to the stack pointer. This must be the second
5626	// operand: after the chain input but before any other variadic arguments.
5627	// For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5628	// saved or used.
5629	if (isTOCSaveRestoreRequired(Subtarget)) {
5630	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5631
5632	SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: RegVT);
5633	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5634	SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
5635	SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: StackPtr, N2: TOCOff);
5636	Ops.push_back(Elt: AddTOC);
5637	}
5638
5639	// Add the register used for the environment pointer.
5640	if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5641	Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getEnvironmentPointerRegister(),
5642	VT: RegVT));
5643
5644
5645	// Add CTR register as callee so a bctr can be emitted later.
5646	if (CFlags.IsTailCall)
5647	Ops.push_back(Elt: DAG.getRegister(Reg: IsPPC64 ? PPC::CTR8 : PPC::CTR, VT: RegVT));
5648	}
5649
5650	// If this is a tail call add stack pointer delta.
5651	if (CFlags.IsTailCall)
5652	Ops.push_back(Elt: DAG.getConstant(Val: SPDiff, DL: dl, VT: MVT::i32));
5653
5654	// Add argument registers to the end of the list so that they are known live
5655	// into the call.
5656	for (const auto &[Reg, N] : RegsToPass)
5657	Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
5658
5659	// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5660	// no way to mark dependencies as implicit here.
5661	// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5662	if ((Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) &&
5663	!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5664	Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getTOCPointerRegister(), VT: RegVT));
5665
5666	// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5667	if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5668	Ops.push_back(Elt: DAG.getRegister(Reg: PPC::CR1EQ, VT: MVT::i32));
5669
5670	// Add a register mask operand representing the call-preserved registers.
5671	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5672	const uint32_t *Mask =
5673	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CFlags.CallConv);
5674	assert(Mask && "Missing call preserved mask for calling convention");
5675	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5676
5677	// If the glue is valid, it is the last operand.
5678	if (Glue.getNode())
5679	Ops.push_back(Elt: Glue);
5680	}
5681
5682	SDValue PPCTargetLowering::FinishCall(
5683	CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5684	SmallVector<std::pair<unsigned, SDValue>, `8`> &RegsToPass, SDValue Glue,
5685	SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5686	unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5687	SmallVectorImpl<SDValue> &InVals, const CallBase CB) const* {
5688
5689	if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) \|\|
5690	Subtarget.isAIXABI())
5691	setUsesTOCBasePtr(DAG);
5692
5693	unsigned CallOpc =
5694	getCallOpcode(CFlags, Caller: DAG.getMachineFunction().getFunction(), Callee,
5695	Subtarget, TM: DAG.getTarget(), IsStrictFPCall: CB ? CB->isStrictFP() : false);
5696
5697	if (!CFlags.IsIndirect)
5698	Callee = transformCallee(Callee, DAG, dl, Subtarget);
5699	else if (Subtarget.usesFunctionDescriptors())
5700	prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5701	dl, hasNest: CFlags.HasNest, Subtarget);
5702	else
5703	prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5704
5705	// Build the operand list for the call instruction.
5706	SmallVector<SDValue, `8`> Ops;
5707	buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5708	SPDiff, Subtarget);
5709
5710	// Emit tail call.
5711	if (CFlags.IsTailCall) {
5712	// Indirect tail call when using PC Relative calls do not have the same
5713	// constraints.
5714	assert(((Callee.getOpcode() == ISD::Register &&
5715	cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) \|\|
5716	Callee.getOpcode() == ISD::TargetExternalSymbol \|\|
5717	Callee.getOpcode() == ISD::TargetGlobalAddress \|\|
5718	isa<ConstantSDNode>(Callee) \|\|
5719	(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5720	"Expecting a global address, external symbol, absolute value, "
5721	"register or an indirect tail call when PC Relative calls are "
5722	"used.");
5723	// PC Relative calls also use TC_RETURN as the way to mark tail calls.
5724	assert(CallOpc == PPCISD::TC_RETURN &&
5725	"Unexpected call opcode for a tail call.");
5726	DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5727	SDValue Ret = DAG.getNode(Opcode: CallOpc, DL: dl, VT: MVT::Other, Ops);
5728	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CFlags.NoMerge);
5729	return Ret;
5730	}
5731
5732	std::array<EVT, `2`> ReturnTypes = {._M_elems: {MVT::Other, MVT::Glue}};
5733	Chain = DAG.getNode(Opcode: CallOpc, DL: dl, ResultTys: ReturnTypes, Ops);
5734	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CFlags.NoMerge);
5735	Glue = Chain.getValue(R: `1`);
5736
5737	// When performing tail call optimization the callee pops its arguments off
5738	// the stack. Account for this here so these bytes can be pushed back on in
5739	// PPCFrameLowering::eliminateCallFramePseudoInstr.
5740	int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5741	getTargetMachine().Options.GuaranteedTailCallOpt)
5742	? NumBytes
5743	: `0`;
5744
5745	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: BytesCalleePops, Glue, DL: dl);
5746	Glue = Chain.getValue(R: `1`);
5747
5748	return LowerCallResult(Chain, InGlue: Glue, CallConv: CFlags.CallConv, isVarArg: CFlags.IsVarArg, Ins, dl,
5749	DAG, InVals);
5750	}
5751
5752	bool PPCTargetLowering::supportsTailCallFor(const CallBase CB) const* {
5753	CallingConv::ID CalleeCC = CB->getCallingConv();
5754	const Function *CallerFunc = CB->getCaller();
5755	CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5756	const Function *CalleeFunc = CB->getCalledFunction();
5757	if (!CalleeFunc)
5758	return false;
5759	const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(Val: CalleeFunc);
5760
5761	SmallVector<ISD::OutputArg, `2`> Outs;
5762	SmallVector<ISD::InputArg, `2`> Ins;
5763
5764	GetReturnInfo(CC: CalleeCC, ReturnType: CalleeFunc->getReturnType(),
5765	attr: CalleeFunc->getAttributes(), Outs, TLI: *this,
5766	DL: CalleeFunc->getDataLayout());
5767
5768	return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5769	isVarArg: CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5770	isCalleeExternalSymbol: false /isCalleeExternalSymbol/);
5771	}
5772
5773	bool PPCTargetLowering::isEligibleForTCO(
5774	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5775	CallingConv::ID CallerCC, const CallBase CB, bool* isVarArg,
5776	const SmallVectorImpl<ISD::OutputArg> &Outs,
5777	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5778	bool isCalleeExternalSymbol) const {
5779	if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5780	return false;
5781
5782	if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5783	return IsEligibleForTailCallOptimization_64SVR4(
5784	CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5785	isCalleeExternalSymbol);
5786	else
5787	return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5788	isVarArg, Ins);
5789	}
5790
5791	SDValue
5792	PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5793	SmallVectorImpl<SDValue> &InVals) const {
5794	SelectionDAG &DAG = CLI.DAG;
5795	SDLoc &dl = CLI.DL;
5796	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5797	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5798	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5799	SDValue Chain = CLI.Chain;
5800	SDValue Callee = CLI.Callee;
5801	bool &isTailCall = CLI.IsTailCall;
5802	CallingConv::ID CallConv = CLI.CallConv;
5803	bool isVarArg = CLI.IsVarArg;
5804	bool isPatchPoint = CLI.IsPatchPoint;
5805	const CallBase *CB = CLI.CB;
5806
5807	if (isTailCall) {
5808	MachineFunction &MF = DAG.getMachineFunction();
5809	CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5810	auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
5811	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5812	bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Val: Callee);
5813
5814	isTailCall =
5815	isEligibleForTCO(CalleeGV: GV, CalleeCC: CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5816	CallerFunc: &(MF.getFunction()), isCalleeExternalSymbol: IsCalleeExternalSymbol);
5817	if (isTailCall) {
5818	++NumTailCalls;
5819	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5820	++NumSiblingCalls;
5821
5822	// PC Relative calls no longer guarantee that the callee is a Global
5823	// Address Node. The callee could be an indirect tail call in which
5824	// case the SDValue for the callee could be a load (to load the address
5825	// of a function pointer) or it may be a register copy (to move the
5826	// address of the callee from a function parameter into a virtual
5827	// register). It may also be an ExternalSymbolSDNode (ex memcopy).
5828	assert((Subtarget.isUsingPCRelativeCalls() \|\|
5829	isa<GlobalAddressSDNode>(Callee)) &&
5830	"Callee should be an llvm::Function object.");
5831
5832	LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5833	<< "\nTCO callee: ");
5834	LLVM_DEBUG(Callee.dump());
5835	}
5836	}
5837
5838	if (!isTailCall && CB && CB->isMustTailCall())
5839	report_fatal_error(reason: "failed to perform tail call elimination on a call "
5840	"site marked musttail");
5841
5842	// When long calls (i.e. indirect calls) are always used, calls are always
5843	// made via function pointer. If we have a function name, first translate it
5844	// into a pointer.
5845	if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Val: Callee) &&
5846	!isTailCall)
5847	Callee = LowerGlobalAddress(Op: Callee, DAG);
5848
5849	CallFlags CFlags(
5850	CallConv, isTailCall, isVarArg, isPatchPoint,
5851	isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5852	// hasNest
5853	Subtarget.is64BitELFABI() &&
5854	any_of(Range&: Outs, P: [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5855	CLI.NoMerge);
5856
5857	if (Subtarget.isAIXABI())
5858	return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5859	InVals, CB);
5860
5861	assert(Subtarget.isSVR4ABI());
5862	if (Subtarget.isPPC64())
5863	return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5864	InVals, CB);
5865	return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5866	InVals, CB);
5867	}
5868
5869	SDValue PPCTargetLowering::LowerCall_32SVR4(
5870	SDValue Chain, SDValue Callee, CallFlags CFlags,
5871	const SmallVectorImpl<ISD::OutputArg> &Outs,
5872	const SmallVectorImpl<SDValue> &OutVals,
5873	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5874	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5875	const CallBase CB) const* {
5876	// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5877	// of the 32-bit SVR4 ABI stack frame layout.
5878
5879	const CallingConv::ID CallConv = CFlags.CallConv;
5880	const bool IsVarArg = CFlags.IsVarArg;
5881	const bool IsTailCall = CFlags.IsTailCall;
5882
5883	assert((CallConv == CallingConv::C \|\|
5884	CallConv == CallingConv::Cold \|\|
5885	CallConv == CallingConv::Fast) && "Unknown calling convention!");
5886
5887	const Align PtrAlign(`4`);
5888
5889	MachineFunction &MF = DAG.getMachineFunction();
5890
5891	// Mark this function as potentially containing a function that contains a
5892	// tail call. As a consequence the frame pointer will be used for dynamicalloc
5893	// and restoring the callers stack pointer in this functions epilog. This is
5894	// done because by tail calling the called function might overwrite the value
5895	// in this function's (MF) stack pointer stack slot 0(SP).
5896	if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5897	CallConv == CallingConv::Fast)
5898	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5899
5900	// Count how many bytes are to be pushed on the stack, including the linkage
5901	// area, parameter list area and the part of the local variable space which
5902	// contains copies of aggregates which are passed by value.
5903
5904	// Assign locations to all of the outgoing arguments.
5905	SmallVector<CCValAssign, `16`> ArgLocs;
5906	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5907
5908	// Reserve space for the linkage area on the stack.
5909	CCInfo.AllocateStack(Size: Subtarget.getFrameLowering()->getLinkageSize(),
5910	Alignment: PtrAlign);
5911
5912	if (IsVarArg) {
5913	// Handle fixed and variable vector arguments differently.
5914	// Fixed vector arguments go into registers as long as registers are
5915	// available. Variable vector arguments always go into memory.
5916	unsigned NumArgs = Outs.size();
5917
5918	for (unsigned i = `0`; i != NumArgs; ++i) {
5919	MVT ArgVT = Outs [i].VT;
5920	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
5921	bool Result;
5922
5923	if (!ArgFlags.isVarArg()) {
5924	Result = CC_PPC32_SVR4(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full, ArgFlags,
5925	OrigTy: Outs [i].OrigTy, State&: CCInfo);
5926	} else {
5927	Result = CC_PPC32_SVR4_VarArg(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full,
5928	ArgFlags, OrigTy: Outs [i].OrigTy, State&: CCInfo);
5929	}
5930
5931	if (Result) {
5932	#ifndef NDEBUG
5933	errs() << "Call operand #" << i << " has unhandled type "
5934	<< ArgVT << "\n";
5935	#endif
5936	llvm_unreachable(nullptr);
5937	}
5938	}
5939	} else {
5940	// All arguments are treated the same.
5941	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4);
5942	}
5943
5944	// Assign locations to all of the outgoing aggregate by value arguments.
5945	SmallVector<CCValAssign, `16`> ByValArgLocs;
5946	CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5947
5948	// Reserve stack space for the allocations in CCInfo.
5949	CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
5950
5951	CCByValInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4_ByVal);
5952
5953	// Size of the linkage area, parameter list area and the part of the local
5954	// space variable where copies of aggregates which are passed by value are
5955	// stored.
5956	unsigned NumBytes = CCByValInfo.getStackSize();
5957
5958	// Calculate by how many bytes the stack has to be adjusted in case of tail
5959	// call optimization.
5960	int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: IsTailCall, ParamSize: NumBytes);
5961
5962	// Adjust the stack pointer for the new arguments...
5963	// These operations are automatically eliminated by the prolog/epilog pass
5964	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
5965	SDValue CallSeqStart = Chain;
5966
5967	// Load the return address and frame pointer so it can be moved somewhere else
5968	// later.
5969	SDValue LROp, FPOp;
5970	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
5971
5972	// Set up a copy of the stack pointer for use loading and storing any
5973	// arguments that may not fit in the registers available for argument
5974	// passing.
5975	SDValue StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5976
5977	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
5978	SmallVector<TailCallArgumentInfo, `8`> TailCallArguments;
5979	SmallVector<SDValue, `8`> MemOpChains;
5980
5981	bool seenFloatArg = false;
5982	// Walk the register/memloc assignments, inserting copies/loads.
5983	// i - Tracks the index into the list of registers allocated for the call
5984	// RealArgIdx - Tracks the index into the list of actual function arguments
5985	// j - Tracks the index into the list of byval arguments
5986	for (unsigned i = `0`, RealArgIdx = `0`, j = `0`, e = ArgLocs.size();
5987	i != e;
5988	++i, ++RealArgIdx) {
5989	CCValAssign &VA = ArgLocs [i];
5990	SDValue Arg = OutVals [RealArgIdx];
5991	ISD::ArgFlagsTy Flags = Outs [RealArgIdx].Flags;
5992
5993	if (Flags.isByVal()) {
5994	// Argument is an aggregate which is passed by value, thus we need to
5995	// create a copy of it in the local variable space of the current stack
5996	// frame (which is the stack frame of the caller) and pass the address of
5997	// this copy to the callee.
5998	assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5999	CCValAssign &ByValVA = ByValArgLocs [j++];
6000	assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6001
6002	// Memory reserved in the local variable space of the callers stack frame.
6003	unsigned LocMemOffset = ByValVA.getLocMemOffset();
6004
6005	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6006	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6007	N1: StackPtr, N2: PtrOff);
6008
6009	// Create a copy of the argument in the local area of the current
6010	// stack frame.
6011	SDValue MemcpyCall =
6012	CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6013	Chain: CallSeqStart.getNode()->getOperand(Num: `0`),
6014	Flags, DAG, dl);
6015
6016	// This must go outside the CALLSEQ_START..END.
6017	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: NumBytes, OutSize: `0`,
6018	DL: SDLoc (MemcpyCall));
6019	DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6020	To: NewCallSeqStart.getNode());
6021	Chain = CallSeqStart = NewCallSeqStart;
6022
6023	// Pass the address of the aggregate copy on the stack either in a
6024	// physical register or in the parameter list area of the current stack
6025	// frame to the callee.
6026	Arg = PtrOff;
6027	}
6028
6029	// When useCRBits() is true, there can be i1 arguments.
6030	// It is because getRegisterType(MVT::i1) => MVT::i1,
6031	// and for other integer types getRegisterType() => MVT::i32.
6032	// Extend i1 and ensure callee will get i32.
6033	if (Arg.getValueType() == MVT::i1)
6034	Arg = DAG.getNode(Opcode: Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6035	DL: dl, VT: MVT::i32, Operand: Arg);
6036
6037	if (VA.isRegLoc()) {
6038	seenFloatArg \|= VA.getLocVT().isFloatingPoint();
6039	// Put argument in a physical register.
6040	if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6041	bool IsLE = Subtarget.isLittleEndian();
6042	SDValue SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6043	N2: DAG.getIntPtrConstant(Val: IsLE ? `0` : `1`, DL: dl));
6044	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y: SVal.getValue(R: `0`)));
6045	SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6046	N2: DAG.getIntPtrConstant(Val: IsLE ? `1` : `0`, DL: dl));
6047	RegsToPass.push_back(Elt: std::make_pair(x: ArgLocs [++i].getLocReg(),
6048	y: SVal.getValue(R: `0`)));
6049	} else
6050	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
6051	} else {
6052	// Put argument in the parameter list area of the current stack frame.
6053	assert(VA.isMemLoc());
6054	unsigned LocMemOffset = VA.getLocMemOffset();
6055
6056	if (!IsTailCall) {
6057	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6058	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6059	N1: StackPtr, N2: PtrOff);
6060
6061	MemOpChains.push_back(
6062	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
6063	} else {
6064	// Calculate and remember argument location.
6065	CalculateTailCallArgDest(DAG, MF, IsPPC64: false, Arg, SPDiff, ArgOffset: LocMemOffset,
6066	TailCallArguments);
6067	}
6068	}
6069	}
6070
6071	if (!MemOpChains.empty())
6072	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6073
6074	// Build a sequence of copy-to-reg nodes chained together with token chain
6075	// and flag operands which copy the outgoing args into the appropriate regs.
6076	SDValue InGlue;
6077	for (const auto &[Reg, N] : RegsToPass) {
6078	Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6079	InGlue = Chain.getValue(R: `1`);
6080	}
6081
6082	// Set CR bit 6 to true if this is a vararg call with floating args passed in
6083	// registers.
6084	if (IsVarArg) {
6085	SDVTList VTs = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
6086	SDValue Ops[] = { Chain, InGlue };
6087
6088	Chain = DAG.getNode(Opcode: seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, DL: dl,
6089	VTList: VTs, Ops: ArrayRef(Ops, InGlue.getNode() ? `2` : `1`));
6090
6091	InGlue = Chain.getValue(R: `1`);
6092	}
6093
6094	if (IsTailCall)
6095	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6096	TailCallArguments);
6097
6098	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6099	Callee, SPDiff, NumBytes, Ins, InVals, CB);
6100	}
6101
6102	// Copy an argument into memory, being careful to do this outside the
6103	// call sequence for the call to which the argument belongs.
6104	SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6105	SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6106	SelectionDAG &DAG, const SDLoc &dl) const {
6107	SDValue MemcpyCall = CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6108	Chain: CallSeqStart.getNode()->getOperand(Num: `0`),
6109	Flags, DAG, dl);
6110	// The MEMCPY must go outside the CALLSEQ_START..END.
6111	int64_t FrameSize = CallSeqStart.getConstantOperandVal(i: `1`);
6112	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: FrameSize, OutSize: `0`,
6113	DL: SDLoc (MemcpyCall));
6114	DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6115	To: NewCallSeqStart.getNode());
6116	return NewCallSeqStart;
6117	}
6118
6119	SDValue PPCTargetLowering::LowerCall_64SVR4(
6120	SDValue Chain, SDValue Callee, CallFlags CFlags,
6121	const SmallVectorImpl<ISD::OutputArg> &Outs,
6122	const SmallVectorImpl<SDValue> &OutVals,
6123	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6124	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6125	const CallBase CB) const* {
6126	bool isELFv2ABI = Subtarget.isELFv2ABI();
6127	bool isLittleEndian = Subtarget.isLittleEndian();
6128	unsigned NumOps = Outs.size();
6129	bool IsSibCall = false;
6130	bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6131
6132	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
6133	unsigned PtrByteSize = `8`;
6134
6135	MachineFunction &MF = DAG.getMachineFunction();
6136
6137	if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6138	IsSibCall = true;
6139
6140	// Mark this function as potentially containing a function that contains a
6141	// tail call. As a consequence the frame pointer will be used for dynamicalloc
6142	// and restoring the callers stack pointer in this functions epilog. This is
6143	// done because by tail calling the called function might overwrite the value
6144	// in this function's (MF) stack pointer stack slot 0(SP).
6145	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6146	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6147
6148	assert(!(IsFastCall && CFlags.IsVarArg) &&
6149	"fastcc not supported on varargs functions");
6150
6151	// Count how many bytes are to be pushed on the stack, including the linkage
6152	// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6153	// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6154	// area is 32 bytes reserved space for [SP][CR][LR][TOC].
6155	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6156	unsigned NumBytes = LinkageSize;
6157	unsigned GPR_idx = `0`, FPR_idx = `0`, VR_idx = `0`;
6158
6159	static const MCPhysReg GPR[] = {
6160	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6161	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6162	};
6163	static const MCPhysReg VR[] = {
6164	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6165	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6166	};
6167
6168	const unsigned NumGPRs = std::size(GPR);
6169	const unsigned NumFPRs = useSoftFloat() ? `0` : `13`;
6170	const unsigned NumVRs = std::size(VR);
6171
6172	// On ELFv2, we can avoid allocating the parameter area if all the arguments
6173	// can be passed to the callee in registers.
6174	// For the fast calling convention, there is another check below.
6175	// Note: We should keep consistent with LowerFormalArguments_64SVR4()
6176	bool HasParameterArea = !isELFv2ABI \|\| CFlags.IsVarArg \|\| IsFastCall;
6177	if (!HasParameterArea) {
6178	unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6179	unsigned AvailableFPRs = NumFPRs;
6180	unsigned AvailableVRs = NumVRs;
6181	unsigned NumBytesTmp = NumBytes;
6182	for (unsigned i = `0`; i != NumOps; ++i) {
6183	if (Outs [i].Flags.isNest()) continue;
6184	if (CalculateStackSlotUsed(ArgVT: Outs [i].VT, OrigVT: Outs [i].ArgVT, Flags: Outs [i].Flags,
6185	PtrByteSize, LinkageSize, ParamAreaSize,
6186	ArgOffset&: NumBytesTmp, AvailableFPRs, AvailableVRs))
6187	HasParameterArea = true;
6188	}
6189	}
6190
6191	// When using the fast calling convention, we don't provide backing for
6192	// arguments that will be in registers.
6193	unsigned NumGPRsUsed = `0`, NumFPRsUsed = `0`, NumVRsUsed = `0`;
6194
6195	// Avoid allocating parameter area for fastcc functions if all the arguments
6196	// can be passed in the registers.
6197	if (IsFastCall)
6198	HasParameterArea = false;
6199
6200	// Add up all the space actually used.
6201	for (unsigned i = `0`; i != NumOps; ++i) {
6202	ISD::ArgFlagsTy Flags = Outs [i].Flags;
6203	EVT ArgVT = Outs [i].VT;
6204	EVT OrigVT = Outs [i].ArgVT;
6205
6206	if (Flags.isNest())
6207	continue;
6208
6209	if (IsFastCall) {
6210	if (Flags.isByVal()) {
6211	NumGPRsUsed += (Flags.getByValSize()+`7`)/`8`;
6212	if (NumGPRsUsed > NumGPRs)
6213	HasParameterArea = true;
6214	} else {
6215	switch (ArgVT.getSimpleVT().SimpleTy) {
6216	default: llvm_unreachable("Unexpected ValueType for argument!");
6217	case MVT::i1:
6218	case MVT::i32:
6219	case MVT::i64:
6220	if (++NumGPRsUsed <= NumGPRs)
6221	continue;
6222	break;
6223	case MVT::v4i32:
6224	case MVT::v8i16:
6225	case MVT::v16i8:
6226	case MVT::v2f64:
6227	case MVT::v2i64:
6228	case MVT::v1i128:
6229	case MVT::f128:
6230	if (++NumVRsUsed <= NumVRs)
6231	continue;
6232	break;
6233	case MVT::v4f32:
6234	if (++NumVRsUsed <= NumVRs)
6235	continue;
6236	break;
6237	case MVT::f32:
6238	case MVT::f64:
6239	if (++NumFPRsUsed <= NumFPRs)
6240	continue;
6241	break;
6242	}
6243	HasParameterArea = true;
6244	}
6245	}
6246
6247	/ Respect alignment of argument on the stack. /
6248	auto Alignement =
6249	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6250	NumBytes = alignTo(Size: NumBytes, A: Alignement);
6251
6252	NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6253	if (Flags.isInConsecutiveRegsLast())
6254	NumBytes = ((NumBytes + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
6255	}
6256
6257	unsigned NumBytesActuallyUsed = NumBytes;
6258
6259	// In the old ELFv1 ABI,
6260	// the prolog code of the callee may store up to 8 GPR argument registers to
6261	// the stack, allowing va_start to index over them in memory if its varargs.
6262	// Because we cannot tell if this is needed on the caller side, we have to
6263	// conservatively assume that it is needed. As such, make sure we have at
6264	// least enough stack space for the caller to store the 8 GPRs.
6265	// In the ELFv2 ABI, we allocate the parameter area iff a callee
6266	// really requires memory operands, e.g. a vararg function.
6267	if (HasParameterArea)
6268	NumBytes = std::max(a: NumBytes, b: LinkageSize + `8` * PtrByteSize);
6269	else
6270	NumBytes = LinkageSize;
6271
6272	// Tail call needs the stack to be aligned.
6273	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6274	NumBytes = EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes);
6275
6276	int SPDiff = `0`;
6277
6278	// Calculate by how many bytes the stack has to be adjusted in case of tail
6279	// call optimization.
6280	if (!IsSibCall)
6281	SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: CFlags.IsTailCall, ParamSize: NumBytes);
6282
6283	// To protect arguments on the stack from being clobbered in a tail call,
6284	// force all the loads to happen before doing any other lowering.
6285	if (CFlags.IsTailCall)
6286	Chain = DAG.getStackArgumentTokenFactor(Chain);
6287
6288	// Adjust the stack pointer for the new arguments...
6289	// These operations are automatically eliminated by the prolog/epilog pass
6290	if (!IsSibCall)
6291	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
6292	SDValue CallSeqStart = Chain;
6293
6294	// Load the return address and frame pointer so it can be move somewhere else
6295	// later.
6296	SDValue LROp, FPOp;
6297	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6298
6299	// Set up a copy of the stack pointer for use loading and storing any
6300	// arguments that may not fit in the registers available for argument
6301	// passing.
6302	SDValue StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
6303
6304	// Figure out which arguments are going to go in registers, and which in
6305	// memory. Also, if this is a vararg function, floating point operations
6306	// must be stored to our stack, and loaded into integer regs as well, if
6307	// any integer regs are available for argument passing.
6308	unsigned ArgOffset = LinkageSize;
6309
6310	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
6311	SmallVector<TailCallArgumentInfo, `8`> TailCallArguments;
6312
6313	SmallVector<SDValue, `8`> MemOpChains;
6314	for (unsigned i = `0`; i != NumOps; ++i) {
6315	SDValue Arg = OutVals [i];
6316	ISD::ArgFlagsTy Flags = Outs [i].Flags;
6317	EVT ArgVT = Outs [i].VT;
6318	EVT OrigVT = Outs [i].ArgVT;
6319
6320	// PtrOff will be used to store the current argument to the stack if a
6321	// register cannot be found for it.
6322	SDValue PtrOff;
6323
6324	// We re-align the argument offset for each argument, except when using the
6325	// fast calling convention, when we need to make sure we do that only when
6326	// we'll actually use a stack slot.
6327	auto ComputePtrOff = [&]() {
6328	/ Respect alignment of argument on the stack. /
6329	auto Alignment =
6330	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6331	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
6332
6333	PtrOff = DAG.getConstant(Val: ArgOffset, DL: dl, VT: StackPtr.getValueType());
6334
6335	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6336	};
6337
6338	if (!IsFastCall) {
6339	ComputePtrOff ();
6340
6341	/ Compute GPR index associated with argument offset. /
6342	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6343	GPR_idx = std::min(a: GPR_idx, b: NumGPRs);
6344	}
6345
6346	// Promote integers to 64-bit values.
6347	if (Arg.getValueType() == MVT::i32 \|\| Arg.getValueType() == MVT::i1) {
6348	// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6349	unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6350	Arg = DAG.getNode(Opcode: ExtOp, DL: dl, VT: MVT::i64, Operand: Arg);
6351	}
6352
6353	// FIXME memcpy is used way more than necessary. Correctness first.
6354	// Note: "by value" is code for passing a structure by value, not
6355	// basic types.
6356	if (Flags.isByVal()) {
6357	// Note: Size includes alignment padding, so
6358	// struct x { short a; char b; }
6359	// will have Size = 4. With #pragma pack(1), it will have Size = 3.
6360	// These are the proper values we need for right-justifying the
6361	// aggregate in a parameter register.
6362	unsigned Size = Flags.getByValSize();
6363
6364	// An empty aggregate parameter takes up no storage and no
6365	// registers.
6366	if (Size == `0`)
6367	continue;
6368
6369	if (IsFastCall)
6370	ComputePtrOff ();
6371
6372	// All aggregates smaller than 8 bytes must be passed right-justified.
6373	if (Size==`1` \|\| Size==`2` \|\| Size==`4`) {
6374	EVT VT = (Size==`1`) ? MVT::i8 : ((Size==`2`) ? MVT::i16 : MVT::i32);
6375	if (GPR_idx != NumGPRs) {
6376	SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: Arg,
6377	PtrInfo: MachinePointerInfo (), MemVT: VT);
6378	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6379	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6380
6381	ArgOffset += PtrByteSize;
6382	continue;
6383	}
6384	}
6385
6386	if (GPR_idx == NumGPRs && Size < `8`) {
6387	SDValue AddPtr = PtrOff;
6388	if (!isLittleEndian) {
6389	SDValue Const = DAG.getConstant(Val: PtrByteSize - Size, DL: dl,
6390	VT: PtrOff.getValueType());
6391	AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6392	}
6393	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6394	CallSeqStart,
6395	Flags, DAG, dl);
6396	ArgOffset += PtrByteSize;
6397	continue;
6398	}
6399	// Copy the object to parameter save area if it can not be entirely passed
6400	// by registers.
6401	// FIXME: we only need to copy the parts which need to be passed in
6402	// parameter save area. For the parts passed by registers, we don't need
6403	// to copy them to the stack although we need to allocate space for them
6404	// in parameter save area.
6405	if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6406	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6407	CallSeqStart,
6408	Flags, DAG, dl);
6409
6410	// When a register is available, pass a small aggregate right-justified.
6411	if (Size < `8` && GPR_idx != NumGPRs) {
6412	// The easiest way to get this right-justified in a register
6413	// is to copy the structure into the rightmost portion of a
6414	// local variable slot, then load the whole slot into the
6415	// register.
6416	// FIXME: The memcpy seems to produce pretty awful code for
6417	// small aggregates, particularly for packed ones.
6418	// FIXME: It would be preferable to use the slot in the
6419	// parameter save area instead of a new local variable.
6420	SDValue AddPtr = PtrOff;
6421	if (!isLittleEndian) {
6422	SDValue Const = DAG.getConstant(Val: `8` - Size, DL: dl, VT: PtrOff.getValueType());
6423	AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6424	}
6425	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6426	CallSeqStart,
6427	Flags, DAG, dl);
6428
6429	// Load the slot into the register.
6430	SDValue Load =
6431	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6432	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6433	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6434
6435	// Done with this argument.
6436	ArgOffset += PtrByteSize;
6437	continue;
6438	}
6439
6440	// For aggregates larger than PtrByteSize, copy the pieces of the
6441	// object that fit into registers from the parameter save area.
6442	for (unsigned j=`0`; j<Size; j+=PtrByteSize) {
6443	SDValue Const = DAG.getConstant(Val: j, DL: dl, VT: PtrOff.getValueType());
6444	SDValue AddArg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Arg, N2: Const);
6445	if (GPR_idx != NumGPRs) {
6446	unsigned LoadSizeInBits = std::min(a: PtrByteSize, b: (Size - j)) * `8`;
6447	EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadSizeInBits);
6448	SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: AddArg,
6449	PtrInfo: MachinePointerInfo (), MemVT: ObjType);
6450
6451	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6452	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6453	ArgOffset += PtrByteSize;
6454	} else {
6455	ArgOffset += ((Size - j + PtrByteSize-`1`)/PtrByteSize)*PtrByteSize;
6456	break;
6457	}
6458	}
6459	continue;
6460	}
6461
6462	switch (Arg.getSimpleValueType().SimpleTy) {
6463	default: llvm_unreachable("Unexpected ValueType for argument!");
6464	case MVT::i1:
6465	case MVT::i32:
6466	case MVT::i64:
6467	if (Flags.isNest()) {
6468	// The 'nest' parameter, if any, is passed in R11.
6469	RegsToPass.push_back(Elt: std::make_pair(x: PPC::X11, y&: Arg));
6470	break;
6471	}
6472
6473	// These can be scalar arguments or elements of an integer array type
6474	// passed directly. Clang may use those instead of "byval" aggregate
6475	// types to avoid forcing arguments to memory unnecessarily.
6476	if (GPR_idx != NumGPRs) {
6477	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Arg));
6478	} else {
6479	if (IsFastCall)
6480	ComputePtrOff ();
6481
6482	assert(HasParameterArea &&
6483	"Parameter area must exist to pass an argument in memory.");
6484	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6485	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6486	TailCallArguments, dl);
6487	if (IsFastCall)
6488	ArgOffset += PtrByteSize;
6489	}
6490	if (!IsFastCall)
6491	ArgOffset += PtrByteSize;
6492	break;
6493	case MVT::f32:
6494	case MVT::f64: {
6495	// These can be scalar arguments or elements of a float array type
6496	// passed directly. The latter are used to implement ELFv2 homogenous
6497	// float aggregates.
6498
6499	// Named arguments go into FPRs first, and once they overflow, the
6500	// remaining arguments go into GPRs and then the parameter save area.
6501	// Unnamed arguments for vararg functions always go to GPRs and
6502	// then the parameter save area. For now, put all arguments to vararg
6503	// routines always in both locations (FPR and* GPR or stack slot).*
6504	bool NeedGPROrStack = CFlags.IsVarArg \|\| FPR_idx == NumFPRs;
6505	bool NeededLoad = false;
6506
6507	// First load the argument into the next available FPR.
6508	if (FPR_idx != NumFPRs)
6509	RegsToPass.push_back(Elt: std::make_pair(x: FPR[FPR_idx++], y&: Arg));
6510
6511	// Next, load the argument into GPR or stack slot if needed.
6512	if (!NeedGPROrStack)
6513	;
6514	else if (GPR_idx != NumGPRs && !IsFastCall) {
6515	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6516	// once we support fp <-> gpr moves.
6517
6518	// In the non-vararg case, this can only ever happen in the
6519	// presence of f32 array types, since otherwise we never run
6520	// out of FPRs before running out of GPRs.
6521	SDValue ArgVal;
6522
6523	// Double values are always passed in a single GPR.
6524	if (Arg.getValueType() != MVT::f32) {
6525	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: Arg);
6526
6527	// Non-array float values are extended and passed in a GPR.
6528	} else if (!Flags.isInConsecutiveRegs()) {
6529	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6530	ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6531
6532	// If we have an array of floats, we collect every odd element
6533	// together with its predecessor into one GPR.
6534	} else if (ArgOffset % PtrByteSize != `0`) {
6535	SDValue Lo, Hi;
6536	Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: OutVals [i - `1`]);
6537	Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6538	if (!isLittleEndian)
6539	std::swap(a&: Lo, b&: Hi);
6540	ArgVal = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
6541
6542	// The final element, if even, goes into the first half of a GPR.
6543	} else if (Flags.isInConsecutiveRegsLast()) {
6544	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6545	ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6546	if (!isLittleEndian)
6547	ArgVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: ArgVal,
6548	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i32));
6549
6550	// Non-final even elements are skipped; they will be handled
6551	// together the with subsequent argument on the next go-around.
6552	} else
6553	ArgVal = SDValue ();
6554
6555	if (ArgVal.getNode())
6556	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: ArgVal));
6557	} else {
6558	if (IsFastCall)
6559	ComputePtrOff ();
6560
6561	// Single-precision floating-point values are mapped to the
6562	// second (rightmost) word of the stack doubleword.
6563	if (Arg.getValueType() == MVT::f32 &&
6564	!isLittleEndian && !Flags.isInConsecutiveRegs()) {
6565	SDValue ConstFour = DAG.getConstant(Val: `4`, DL: dl, VT: PtrOff.getValueType());
6566	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: ConstFour);
6567	}
6568
6569	assert(HasParameterArea &&
6570	"Parameter area must exist to pass an argument in memory.");
6571	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6573	TailCallArguments, dl);
6574
6575	NeededLoad = true;
6576	}
6577	// When passing an array of floats, the array occupies consecutive
6578	// space in the argument area; only round up to the next doubleword
6579	// at the end of the array. Otherwise, each float takes 8 bytes.
6580	if (!IsFastCall \|\| NeededLoad) {
6581	ArgOffset += (Arg.getValueType() == MVT::f32 &&
6582	Flags.isInConsecutiveRegs()) ? `4` : `8`;
6583	if (Flags.isInConsecutiveRegsLast())
6584	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
6585	}
6586	break;
6587	}
6588	case MVT::v4f32:
6589	case MVT::v4i32:
6590	case MVT::v8i16:
6591	case MVT::v16i8:
6592	case MVT::v2f64:
6593	case MVT::v2i64:
6594	case MVT::v1i128:
6595	case MVT::f128:
6596	// These can be scalar arguments or elements of a vector array type
6597	// passed directly. The latter are used to implement ELFv2 homogenous
6598	// vector aggregates.
6599
6600	// For a varargs call, named arguments go into VRs or on the stack as
6601	// usual; unnamed arguments always go to the stack or the corresponding
6602	// GPRs when within range. For now, we always put the value in both
6603	// locations (or even all three).
6604	if (CFlags.IsVarArg) {
6605	assert(HasParameterArea &&
6606	"Parameter area must exist if we have a varargs call.");
6607	// We could elide this store in the case where the object fits
6608	// entirely in R registers. Maybe later.
6609	SDValue Store =
6610	DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6611	MemOpChains.push_back(Elt: Store);
6612	if (VR_idx != NumVRs) {
6613	SDValue Load =
6614	DAG.getLoad(VT: MVT::v4f32, dl, Chain: Store, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6615	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6616	RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Load));
6617	}
6618	ArgOffset += `16`;
6619	for (unsigned i=`0`; i<`16`; i+=PtrByteSize) {
6620	if (GPR_idx == NumGPRs)
6621	break;
6622	SDValue Ix = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
6623	N2: DAG.getConstant(Val: i, DL: dl, VT: PtrVT));
6624	SDValue Load =
6625	DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Ix, PtrInfo: MachinePointerInfo ());
6626	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6627	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6628	}
6629	break;
6630	}
6631
6632	// Non-varargs Altivec params go into VRs or on the stack.
6633	if (VR_idx != NumVRs) {
6634	RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Arg));
6635	} else {
6636	if (IsFastCall)
6637	ComputePtrOff ();
6638
6639	assert(HasParameterArea &&
6640	"Parameter area must exist to pass an argument in memory.");
6641	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6642	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: true, MemOpChains,
6643	TailCallArguments, dl);
6644	if (IsFastCall)
6645	ArgOffset += `16`;
6646	}
6647
6648	if (!IsFastCall)
6649	ArgOffset += `16`;
6650	break;
6651	}
6652	}
6653
6654	assert((!HasParameterArea \|\| NumBytesActuallyUsed == ArgOffset) &&
6655	"mismatch in size of parameter area");
6656	(void)NumBytesActuallyUsed;
6657
6658	if (!MemOpChains.empty())
6659	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6660
6661	// Check if this is an indirect call (MTCTR/BCTRL).
6662	// See prepareDescriptorIndirectCall and buildCallOperands for more
6663	// information about calls through function pointers in the 64-bit SVR4 ABI.
6664	if (CFlags.IsIndirect) {
6665	// For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6666	// caller in the TOC save area.
6667	if (isTOCSaveRestoreRequired(Subtarget)) {
6668	assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6669	// Load r2 into a virtual register and store it to the TOC save area.
6670	setUsesTOCBasePtr(DAG);
6671	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: PPC::X2, VT: MVT::i64);
6672	// TOC save area offset.
6673	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6674	SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
6675	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6676	Chain = DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: AddPtr,
6677	PtrInfo: MachinePointerInfo::getStack(
6678	MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
6679	}
6680	// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6681	// This does not mean the MTCTR instruction must use R12; it's easier
6682	// to model this as an extra parameter, so do that.
6683	if (isELFv2ABI && !CFlags.IsPatchPoint)
6684	RegsToPass.push_back(Elt: std::make_pair(x: (unsigned)PPC::X12, y&: Callee));
6685	}
6686
6687	// Build a sequence of copy-to-reg nodes chained together with token chain
6688	// and flag operands which copy the outgoing args into the appropriate regs.
6689	SDValue InGlue;
6690	for (const auto &[Reg, N] : RegsToPass) {
6691	Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6692	InGlue = Chain.getValue(R: `1`);
6693	}
6694
6695	if (CFlags.IsTailCall && !IsSibCall)
6696	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6697	TailCallArguments);
6698
6699	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6700	Callee, SPDiff, NumBytes, Ins, InVals, CB);
6701	}
6702
6703	// Returns true when the shadow of a general purpose argument register
6704	// in the parameter save area is aligned to at least 'RequiredAlign'.
6705	static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6706	assert(RequiredAlign.value() <= `16` &&
6707	"Required alignment greater than stack alignment.");
6708	switch (Reg) {
6709	default:
6710	report_fatal_error(reason: "called on invalid register.");
6711	case PPC::R5:
6712	case PPC::R9:
6713	case PPC::X3:
6714	case PPC::X5:
6715	case PPC::X7:
6716	case PPC::X9:
6717	// These registers are 16 byte aligned which is the most strict aligment
6718	// we can support.
6719	return true;
6720	case PPC::R3:
6721	case PPC::R7:
6722	case PPC::X4:
6723	case PPC::X6:
6724	case PPC::X8:
6725	case PPC::X10:
6726	// The shadow of these registers in the PSA is 8 byte aligned.
6727	return RequiredAlign <= `8`;
6728	case PPC::R4:
6729	case PPC::R6:
6730	case PPC::R8:
6731	case PPC::R10:
6732	return RequiredAlign <= `4`;
6733	}
6734	}
6735
6736	static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6737	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6738	Type *OrigTy, CCState &State) {
6739	const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6740	State.getMachineFunction().getSubtarget());
6741	const bool IsPPC64 = Subtarget.isPPC64();
6742	const unsigned PtrSize = IsPPC64 ? `8` : `4`;
6743	const Align PtrAlign(PtrSize);
6744	const Align StackAlign(`16`);
6745	const MVT RegVT = Subtarget.getScalarIntVT();
6746
6747	if (ValVT == MVT::f128)
6748	report_fatal_error(reason: "f128 is unimplemented on AIX.");
6749
6750	static const MCPhysReg GPR_32[] = {// 32-bit registers.
6751	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6752	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6753	static const MCPhysReg GPR_64[] = {// 64-bit registers.
6754	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6755	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6756
6757	static const MCPhysReg VR[] = {// Vector registers.
6758	PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6759	PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6760	PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6761
6762	const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6763
6764	if (ArgFlags.isNest()) {
6765	MCRegister EnvReg = State.AllocateReg(Reg: IsPPC64 ? PPC::X11 : PPC::R11);
6766	if (!EnvReg)
6767	report_fatal_error(reason: "More then one nest argument.");
6768	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: EnvReg, LocVT: RegVT, HTP: LocInfo));
6769	return false;
6770	}
6771
6772	if (ArgFlags.isByVal()) {
6773	const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6774	if (ByValAlign > StackAlign)
6775	report_fatal_error(reason: "Pass-by-value arguments with alignment greater than "
6776	"16 are not supported.");
6777
6778	const unsigned ByValSize = ArgFlags.getByValSize();
6779	const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6780
6781	// An empty aggregate parameter takes up no storage and no registers,
6782	// but needs a MemLoc for a stack slot for the formal arguments side.
6783	if (ByValSize == `0`) {
6784	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6785	Offset: State.getStackSize(), LocVT: RegVT, HTP: LocInfo));
6786	return false;
6787	}
6788
6789	// Shadow allocate any registers that are not properly aligned.
6790	unsigned NextReg = State.getFirstUnallocated(Regs: GPRs);
6791	while (NextReg != GPRs.size() &&
6792	!isGPRShadowAligned(Reg: GPRs [NextReg], RequiredAlign: ObjAlign)) {
6793	// Shadow allocate next registers since its aligment is not strict enough.
6794	MCRegister Reg = State.AllocateReg(Regs: GPRs);
6795	// Allocate the stack space shadowed by said register.
6796	State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6797	assert(Reg && "Alocating register unexpectedly failed.");
6798	(void)Reg;
6799	NextReg = State.getFirstUnallocated(Regs: GPRs);
6800	}
6801
6802	const unsigned StackSize = alignTo(Size: ByValSize, A: ObjAlign);
6803	unsigned Offset = State.AllocateStack(Size: StackSize, Alignment: ObjAlign);
6804	for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6805	if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6806	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6807	else {
6808	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6809	Offset, LocVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6810	HTP: LocInfo));
6811	break;
6812	}
6813	}
6814	return false;
6815	}
6816
6817	// Arguments always reserve parameter save area.
6818	switch (ValVT.SimpleTy) {
6819	default:
6820	report_fatal_error(reason: "Unhandled value type for argument.");
6821	case MVT::i64:
6822	// i64 arguments should have been split to i32 for PPC32.
6823	assert(IsPPC64 && "PPC32 should have split i64 values.");
6824	[[fallthrough]];
6825	case MVT::i1:
6826	case MVT::i32: {
6827	const unsigned Offset = State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6828	// AIX integer arguments are always passed in register width.
6829	if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6830	LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6831	: CCValAssign::LocInfo::ZExt;
6832	if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6833	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6834	else
6835	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT: RegVT, HTP: LocInfo));
6836
6837	return false;
6838	}
6839	case MVT::f32:
6840	case MVT::f64: {
6841	// Parameter save area (PSA) is reserved even if the float passes in fpr.
6842	const unsigned StoreSize = LocVT.getStoreSize();
6843	// Floats are always 4-byte aligned in the PSA on AIX.
6844	// This includes f64 in 64-bit mode for ABI compatibility.
6845	const unsigned Offset =
6846	State.AllocateStack(Size: IsPPC64 ? `8` : StoreSize, Alignment: Align (`4`));
6847	MCRegister FReg = State.AllocateReg(Regs: FPR);
6848	if (FReg)
6849	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: FReg, LocVT, HTP: LocInfo));
6850
6851	// Reserve and initialize GPRs or initialize the PSA as required.
6852	for (unsigned I = `0`; I < StoreSize; I += PtrSize) {
6853	if (MCRegister Reg = State.AllocateReg(Regs: GPRs)) {
6854	assert(FReg && "An FPR should be available when a GPR is reserved.");
6855	if (State.isVarArg()) {
6856	// Successfully reserved GPRs are only initialized for vararg calls.
6857	// Custom handling is required for:
6858	// f64 in PPC32 needs to be split into 2 GPRs.
6859	// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6860	State.addLoc(
6861	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6862	}
6863	} else {
6864	// If there are insufficient GPRs, the PSA needs to be initialized.
6865	// Initialization occurs even if an FPR was initialized for
6866	// compatibility with the AIX XL compiler. The full memory for the
6867	// argument will be initialized even if a prior word is saved in GPR.
6868	// A custom memLoc is used when the argument also passes in FPR so
6869	// that the callee handling can skip over it easily.
6870	State.addLoc(
6871	V: FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6872	HTP: LocInfo)
6873	: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6874	break;
6875	}
6876	}
6877
6878	return false;
6879	}
6880	case MVT::v4f32:
6881	case MVT::v4i32:
6882	case MVT::v8i16:
6883	case MVT::v16i8:
6884	case MVT::v2i64:
6885	case MVT::v2f64:
6886	case MVT::v1i128: {
6887	const unsigned VecSize = `16`;
6888	const Align VecAlign(VecSize);
6889
6890	if (!State.isVarArg()) {
6891	// If there are vector registers remaining we don't consume any stack
6892	// space.
6893	if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
6894	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
6895	return false;
6896	}
6897	// Vectors passed on the stack do not shadow GPRs or FPRs even though they
6898	// might be allocated in the portion of the PSA that is shadowed by the
6899	// GPRs.
6900	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6901	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6902	return false;
6903	}
6904
6905	unsigned NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6906	// Burn any underaligned registers and their shadowed stack space until
6907	// we reach the required alignment.
6908	while (NextRegIndex != GPRs.size() &&
6909	!isGPRShadowAligned(Reg: GPRs [NextRegIndex], RequiredAlign: VecAlign)) {
6910	// Shadow allocate register and its stack shadow.
6911	MCRegister Reg = State.AllocateReg(Regs: GPRs);
6912	State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6913	assert(Reg && "Allocating register unexpectedly failed.");
6914	(void)Reg;
6915	NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6916	}
6917
6918	// Vectors that are passed as fixed arguments are handled differently.
6919	// They are passed in VRs if any are available (unlike arguments passed
6920	// through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6921	// functions)
6922	if (!ArgFlags.isVarArg()) {
6923	if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
6924	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
6925	// Shadow allocate GPRs and stack space even though we pass in a VR.
6926	for (unsigned I = `0`; I != VecSize; I += PtrSize)
6927	State.AllocateReg(Regs: GPRs);
6928	State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6929	return false;
6930	}
6931	// No vector registers remain so pass on the stack.
6932	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6933	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6934	return false;
6935	}
6936
6937	// If all GPRS are consumed then we pass the argument fully on the stack.
6938	if (NextRegIndex == GPRs.size()) {
6939	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6940	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6941	return false;
6942	}
6943
6944	// Corner case for 32-bit codegen. We have 2 registers to pass the first
6945	// half of the argument, and then need to pass the remaining half on the
6946	// stack.
6947	if (GPRs [NextRegIndex] == PPC::R9) {
6948	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6949	State.addLoc(
6950	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6951
6952	const MCRegister FirstReg = State.AllocateReg(Reg: PPC::R9);
6953	const MCRegister SecondReg = State.AllocateReg(Reg: PPC::R10);
6954	assert(FirstReg && SecondReg &&
6955	"Allocating R9 or R10 unexpectedly failed.");
6956	State.addLoc(
6957	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: FirstReg, LocVT: RegVT, HTP: LocInfo));
6958	State.addLoc(
6959	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: SecondReg, LocVT: RegVT, HTP: LocInfo));
6960	return false;
6961	}
6962
6963	// We have enough GPRs to fully pass the vector argument, and we have
6964	// already consumed any underaligned registers. Start with the custom
6965	// MemLoc and then the custom RegLocs.
6966	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6967	State.addLoc(
6968	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6969	for (unsigned I = `0`; I != VecSize; I += PtrSize) {
6970	const MCRegister Reg = State.AllocateReg(Regs: GPRs);
6971	assert(Reg && "Failed to allocated register for vararg vector argument");
6972	State.addLoc(
6973	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6974	}
6975	return false;
6976	}
6977	}
6978	return true;
6979	}
6980
6981	// So far, this function is only used by LowerFormalArguments_AIX()
6982	static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6983	bool IsPPC64,
6984	bool HasP8Vector,
6985	bool HasVSX) {
6986	assert((IsPPC64 \|\| SVT != MVT::i64) &&
6987	"i64 should have been split for 32-bit codegen.");
6988
6989	switch (SVT) {
6990	default:
6991	report_fatal_error(reason: "Unexpected value type for formal argument");
6992	case MVT::i1:
6993	case MVT::i32:
6994	case MVT::i64:
6995	return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6996	case MVT::f32:
6997	return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6998	case MVT::f64:
6999	return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7000	case MVT::v4f32:
7001	case MVT::v4i32:
7002	case MVT::v8i16:
7003	case MVT::v16i8:
7004	case MVT::v2i64:
7005	case MVT::v2f64:
7006	case MVT::v1i128:
7007	return &PPC::VRRCRegClass;
7008	}
7009	}
7010
7011	static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7012	SelectionDAG &DAG, SDValue ArgValue,
7013	MVT LocVT, const SDLoc &dl) {
7014	assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7015	assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7016
7017	if (Flags.isSExt())
7018	ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: LocVT, N1: ArgValue,
7019	N2: DAG.getValueType(ValVT));
7020	else if (Flags.isZExt())
7021	ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: LocVT, N1: ArgValue,
7022	N2: DAG.getValueType(ValVT));
7023
7024	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ValVT, Operand: ArgValue);
7025	}
7026
7027	static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7028	const unsigned LASize = FL->getLinkageSize();
7029
7030	if (PPC::GPRCRegClass.contains(Reg)) {
7031	assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7032	"Reg must be a valid argument register!");
7033	return LASize + `4` * (Reg - PPC::R3);
7034	}
7035
7036	if (PPC::G8RCRegClass.contains(Reg)) {
7037	assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7038	"Reg must be a valid argument register!");
7039	return LASize + `8` * (Reg - PPC::X3);
7040	}
7041
7042	llvm_unreachable("Only general purpose registers expected.");
7043	}
7044
7045	// AIX ABI Stack Frame Layout:
7046	//
7047	// Low Memory +--------------------------------------------+
7048	// SP +---> \| Back chain \| ---+
7049	// \| +--------------------------------------------+ \|
7050	// \| \| Saved Condition Register \| \|
7051	// \| +--------------------------------------------+ \|
7052	// \| \| Saved Linkage Register \| \|
7053	// \| +--------------------------------------------+ \| Linkage Area
7054	// \| \| Reserved for compilers \| \|
7055	// \| +--------------------------------------------+ \|
7056	// \| \| Reserved for binders \| \|
7057	// \| +--------------------------------------------+ \|
7058	// \| \| Saved TOC pointer \| ---+
7059	// \| +--------------------------------------------+
7060	// \| \| Parameter save area \|
7061	// \| +--------------------------------------------+
7062	// \| \| Alloca space \|
7063	// \| +--------------------------------------------+
7064	// \| \| Local variable space \|
7065	// \| +--------------------------------------------+
7066	// \| \| Float/int conversion temporary \|
7067	// \| +--------------------------------------------+
7068	// \| \| Save area for AltiVec registers \|
7069	// \| +--------------------------------------------+
7070	// \| \| AltiVec alignment padding \|
7071	// \| +--------------------------------------------+
7072	// \| \| Save area for VRSAVE register \|
7073	// \| +--------------------------------------------+
7074	// \| \| Save area for General Purpose registers \|
7075	// \| +--------------------------------------------+
7076	// \| \| Save area for Floating Point registers \|
7077	// \| +--------------------------------------------+
7078	// +---- \| Back chain \|
7079	// High Memory +--------------------------------------------+
7080	//
7081	// Specifications:
7082	// AIX 7.2 Assembler Language Reference
7083	// Subroutine linkage convention
7084
7085	SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7086	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7087	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7088	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7089
7090	assert((CallConv == CallingConv::C \|\| CallConv == CallingConv::Cold \|\|
7091	CallConv == CallingConv::Fast) &&
7092	"Unexpected calling convention!");
7093
7094	if (getTargetMachine().Options.GuaranteedTailCallOpt)
7095	report_fatal_error(reason: "Tail call support is unimplemented on AIX.");
7096
7097	if (useSoftFloat())
7098	report_fatal_error(reason: "Soft float support is unimplemented on AIX.");
7099
7100	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7101
7102	const bool IsPPC64 = Subtarget.isPPC64();
7103	const unsigned PtrByteSize = IsPPC64 ? `8` : `4`;
7104
7105	// Assign locations to all of the incoming arguments.
7106	SmallVector<CCValAssign, `16`> ArgLocs;
7107	MachineFunction &MF = DAG.getMachineFunction();
7108	MachineFrameInfo &MFI = MF.getFrameInfo();
7109	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7110	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7111
7112	const EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7113	// Reserve space for the linkage area on the stack.
7114	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7115	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (PtrByteSize));
7116	uint64_t SaveStackPos = CCInfo.getStackSize();
7117	bool SaveParams = MF.getFunction().hasFnAttribute(Kind: "save-reg-params");
7118	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_AIX);
7119
7120	SmallVector<SDValue, `8`> MemOps;
7121
7122	for (size_t I = `0`, End = ArgLocs.size(); I != End; / No increment here /) {
7123	CCValAssign &VA = ArgLocs [I++];
7124	MVT LocVT = VA.getLocVT();
7125	MVT ValVT = VA.getValVT();
7126	ISD::ArgFlagsTy Flags = Ins [VA.getValNo()].Flags;
7127
7128	EVT ArgVT = Ins [VA.getValNo()].ArgVT;
7129	bool ArgSignExt = Ins [VA.getValNo()].Flags.isSExt();
7130	// For compatibility with the AIX XL compiler, the float args in the
7131	// parameter save area are initialized even if the argument is available
7132	// in register. The caller is required to initialize both the register
7133	// and memory, however, the callee can choose to expect it in either.
7134	// The memloc is dismissed here because the argument is retrieved from
7135	// the register.
7136	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7137	continue;
7138
7139	if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7140	const TargetRegisterClass *RegClass = getRegClassForSVT(
7141	SVT: LocVT.SimpleTy, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(), HasVSX: Subtarget.hasVSX());
7142	// On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7143	MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7144	const Register VReg = MF.addLiveIn(PReg: VA.getLocReg(), RC: RegClass);
7145	SDValue Parm = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: SaveVT);
7146	int FI = MFI.CreateFixedObject(Size: SaveVT.getStoreSize(), SPOffset: SaveStackPos, IsImmutable: true);
7147	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7148	SDValue StoreReg = DAG.getStore(Chain, dl, Val: Parm, Ptr: FIN,
7149	PtrInfo: MachinePointerInfo (), Alignment: Align (PtrByteSize));
7150	SaveStackPos = alignTo(Value: SaveStackPos + SaveVT.getStoreSize(), Align: PtrByteSize);
7151	MemOps.push_back(Elt: StoreReg);
7152	}
7153
7154	if (SaveParams && (VA.isMemLoc() \|\| Flags.isByVal()) && !VA.needsCustom()) {
7155	unsigned StoreSize =
7156	Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7157	SaveStackPos = alignTo(Value: SaveStackPos + StoreSize, Align: PtrByteSize);
7158	}
7159
7160	auto HandleMemLoc = [&]() {
7161	const unsigned LocSize = LocVT.getStoreSize();
7162	const unsigned ValSize = ValVT.getStoreSize();
7163	assert((ValSize <= LocSize) &&
7164	"Object size is larger than size of MemLoc");
7165	int CurArgOffset = VA.getLocMemOffset();
7166	// Objects are right-justified because AIX is big-endian.
7167	if (LocSize > ValSize)
7168	CurArgOffset += LocSize - ValSize;
7169	// Potential tail calls could cause overwriting of argument stack slots.
7170	const bool IsImmutable =
7171	!(getTargetMachine().Options.GuaranteedTailCallOpt &&
7172	(CallConv == CallingConv::Fast));
7173	int FI = MFI.CreateFixedObject(Size: ValSize, SPOffset: CurArgOffset, IsImmutable);
7174	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7175	SDValue ArgValue =
7176	DAG.getLoad(VT: ValVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ());
7177
7178	// While the ABI specifies the argument type is (sign or zero) extended
7179	// out to register width, not all code is compliant. We truncate and
7180	// re-extend to be more forgiving of these callers when the argument type
7181	// is smaller than register width.
7182	if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7183	ValVT.isInteger() &&
7184	ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7185	// It is possible to have either real integer values
7186	// or integers that were not originally integers.
7187	// In the latter case, these could have came from structs,
7188	// and these integers would not have an extend on the parameter.
7189	// Since these types of integers do not have an extend specified
7190	// in the first place, the type of extend that we do should not matter.
7191	EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7192	? MVT::i8
7193	: ArgVT;
7194	SDValue ArgValueTrunc =
7195	DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncatedArgVT, Operand: ArgValue);
7196	SDValue ArgValueExt =
7197	ArgSignExt ? DAG.getSExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT)
7198	: DAG.getZExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT);
7199	InVals.push_back(Elt: ArgValueExt);
7200	} else {
7201	InVals.push_back(Elt: ArgValue);
7202	}
7203	};
7204
7205	// Vector arguments to VaArg functions are passed both on the stack, and
7206	// in any available GPRs. Load the value from the stack and add the GPRs
7207	// as live ins.
7208	if (VA.isMemLoc() && VA.needsCustom()) {
7209	assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7210	assert(isVarArg && "Only use custom memloc for vararg.");
7211	// ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7212	// matching custom RegLocs.
7213	const unsigned OriginalValNo = VA.getValNo();
7214	(void)OriginalValNo;
7215
7216	auto HandleCustomVecRegLoc = [&]() {
7217	assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7218	"Missing custom RegLoc.");
7219	VA = ArgLocs [I++];
7220	assert(VA.getValVT().isVector() &&
7221	"Unexpected Val type for custom RegLoc.");
7222	assert(VA.getValNo() == OriginalValNo &&
7223	"ValNo mismatch between custom MemLoc and RegLoc.");
7224	MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7225	MF.addLiveIn(PReg: VA.getLocReg(),
7226	RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7227	HasVSX: Subtarget.hasVSX()));
7228	};
7229
7230	HandleMemLoc ();
7231	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7232	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7233	// R10.
7234	HandleCustomVecRegLoc ();
7235	HandleCustomVecRegLoc ();
7236
7237	// If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7238	// we passed the vector in R5, R6, R7 and R8.
7239	if (I != End && ArgLocs [I].isRegLoc() && ArgLocs [I].needsCustom()) {
7240	assert(!IsPPC64 &&
7241	"Only 2 custom RegLocs expected for 64-bit codegen.");
7242	HandleCustomVecRegLoc ();
7243	HandleCustomVecRegLoc ();
7244	}
7245
7246	continue;
7247	}
7248
7249	if (VA.isRegLoc()) {
7250	if (VA.getValVT().isScalarInteger())
7251	FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7252	else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7253	switch (VA.getValVT().SimpleTy) {
7254	default:
7255	report_fatal_error(reason: "Unhandled value type for argument.");
7256	case MVT::f32:
7257	FuncInfo->appendParameterType(Type: PPCFunctionInfo::ShortFloatingPoint);
7258	break;
7259	case MVT::f64:
7260	FuncInfo->appendParameterType(Type: PPCFunctionInfo::LongFloatingPoint);
7261	break;
7262	}
7263	} else if (VA.getValVT().isVector()) {
7264	switch (VA.getValVT().SimpleTy) {
7265	default:
7266	report_fatal_error(reason: "Unhandled value type for argument.");
7267	case MVT::v16i8:
7268	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorChar);
7269	break;
7270	case MVT::v8i16:
7271	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorShort);
7272	break;
7273	case MVT::v4i32:
7274	case MVT::v2i64:
7275	case MVT::v1i128:
7276	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorInt);
7277	break;
7278	case MVT::v4f32:
7279	case MVT::v2f64:
7280	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorFloat);
7281	break;
7282	}
7283	}
7284	}
7285
7286	if (Flags.isByVal() && VA.isMemLoc()) {
7287	const unsigned Size =
7288	alignTo(Value: Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7289	Align: PtrByteSize);
7290	const int FI = MF.getFrameInfo().CreateFixedObject(
7291	Size, SPOffset: VA.getLocMemOffset(), / IsImmutable / false,
7292	/ IsAliased / isAliased: true);
7293	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7294	InVals.push_back(Elt: FIN);
7295
7296	continue;
7297	}
7298
7299	if (Flags.isByVal()) {
7300	assert(VA.isRegLoc() && "MemLocs should already be handled.");
7301
7302	const MCPhysReg ArgReg = VA.getLocReg();
7303	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7304
7305	const unsigned StackSize = alignTo(Value: Flags.getByValSize(), Align: PtrByteSize);
7306	const int FI = MF.getFrameInfo().CreateFixedObject(
7307	Size: StackSize, SPOffset: mapArgRegToOffsetAIX(Reg: ArgReg, FL), / IsImmutable / false,
7308	/ IsAliased / isAliased: true);
7309	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7310	InVals.push_back(Elt: FIN);
7311
7312	// Add live ins for all the RegLocs for the same ByVal.
7313	const TargetRegisterClass *RegClass =
7314	IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7315
7316	auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7317	unsigned Offset) {
7318	const Register VReg = MF.addLiveIn(PReg: PhysReg, RC: RegClass);
7319	// Since the callers side has left justified the aggregate in the
7320	// register, we can simply store the entire register into the stack
7321	// slot.
7322	SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7323	// The store to the fixedstack object is needed becuase accessing a
7324	// field of the ByVal will use a gep and load. Ideally we will optimize
7325	// to extracting the value from the register directly, and elide the
7326	// stores when the arguments address is not taken, but that will need to
7327	// be future work.
7328	SDValue Store = DAG.getStore(
7329	Chain: CopyFrom.getValue(R: `1`), dl, Val: CopyFrom,
7330	Ptr: DAG.getObjectPtrOffset(SL: dl, Ptr: FIN, Offset: TypeSize::getFixed(ExactSize: Offset)),
7331	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset));
7332
7333	MemOps.push_back(Elt: Store);
7334	};
7335
7336	unsigned Offset = `0`;
7337	HandleRegLoc (VA.getLocReg(), Offset);
7338	Offset += PtrByteSize;
7339	for (; Offset != StackSize && ArgLocs [I].isRegLoc();
7340	Offset += PtrByteSize) {
7341	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342	"RegLocs should be for ByVal argument.");
7343
7344	const CCValAssign RL = ArgLocs [I++];
7345	HandleRegLoc (RL.getLocReg(), Offset);
7346	FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7347	}
7348
7349	if (Offset != StackSize) {
7350	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7351	"Expected MemLoc for remaining bytes.");
7352	assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7353	// Consume the MemLoc.The InVal has already been emitted, so nothing
7354	// more needs to be done.
7355	++I;
7356	}
7357
7358	continue;
7359	}
7360
7361	if (VA.isRegLoc() && !VA.needsCustom()) {
7362	MVT::SimpleValueType SVT = ValVT.SimpleTy;
7363	Register VReg =
7364	MF.addLiveIn(PReg: VA.getLocReg(),
7365	RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7366	HasVSX: Subtarget.hasVSX()));
7367	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7368	if (ValVT.isScalarInteger() &&
7369	(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7370	ArgValue =
7371	truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7372	}
7373	InVals.push_back(Elt: ArgValue);
7374	continue;
7375	}
7376	if (VA.isMemLoc()) {
7377	HandleMemLoc ();
7378	continue;
7379	}
7380	}
7381
7382	// On AIX a minimum of 8 words is saved to the parameter save area.
7383	const unsigned MinParameterSaveArea = `8` * PtrByteSize;
7384	// Area that is at least reserved in the caller of this function.
7385	unsigned CallerReservedArea = std::max<unsigned>(
7386	a: CCInfo.getStackSize(), b: LinkageSize + MinParameterSaveArea);
7387
7388	// Set the size that is at least reserved in caller of this function. Tail
7389	// call optimized function's reserved stack space needs to be aligned so
7390	// that taking the difference between two stack areas will result in an
7391	// aligned stack.
7392	CallerReservedArea =
7393	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: CallerReservedArea);
7394	FuncInfo->setMinReservedArea(CallerReservedArea);
7395
7396	if (isVarArg) {
7397	int VAListIndex = `0`;
7398	// If any of the optional arguments are passed in register then the fixed
7399	// stack object we spill into is not immutable. Create a fixed stack object
7400	// that overlaps the remainder of the parameter save area.
7401	if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7402	unsigned FixedStackSize =
7403	LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7404	VAListIndex =
7405	MFI.CreateFixedObject(Size: FixedStackSize, SPOffset: CCInfo.getStackSize(),
7406	/ IsImmutable / false, / IsAliased / isAliased: true);
7407	} else {
7408	// All the arguments passed through ellipses are on the stack. Create a
7409	// dummy fixed stack object the same size as a pointer since we don't
7410	// know the actual size.
7411	VAListIndex =
7412	MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: CCInfo.getStackSize(),
7413	/ IsImmutable / true, / IsAliased / isAliased: true);
7414	}
7415
7416	FuncInfo->setVarArgsFrameIndex(VAListIndex);
7417	SDValue FIN = DAG.getFrameIndex(FI: VAListIndex, VT: PtrVT);
7418
7419	static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7420	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7421
7422	static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7423	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7424	const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7425
7426	// The fixed integer arguments of a variadic function are stored to the
7427	// VarArgsFrameIndex on the stack so that they may be loaded by
7428	// dereferencing the result of va_next.
7429	for (unsigned
7430	GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7431	Offset = `0`;
7432	GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7433
7434	const Register VReg =
7435	IsPPC64 ? MF.addLiveIn(PReg: GPR_64[GPRIndex], RC: &PPC::G8RCRegClass)
7436	: MF.addLiveIn(PReg: GPR_32[GPRIndex], RC: &PPC::GPRCRegClass);
7437
7438	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
7439	MachinePointerInfo MPI =
7440	MachinePointerInfo::getFixedStack(MF, FI: VAListIndex, Offset);
7441	SDValue Store = DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MPI);
7442	MemOps.push_back(Elt: Store);
7443	// Increment the address for the next argument to store.
7444	SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
7445	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
7446	}
7447	}
7448
7449	if (!MemOps.empty())
7450	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
7451
7452	return Chain;
7453	}
7454
7455	SDValue PPCTargetLowering::LowerCall_AIX(
7456	SDValue Chain, SDValue Callee, CallFlags CFlags,
7457	const SmallVectorImpl<ISD::OutputArg> &Outs,
7458	const SmallVectorImpl<SDValue> &OutVals,
7459	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7460	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7461	const CallBase CB) const* {
7462	// See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7463	// AIX ABI stack frame layout.
7464
7465	assert((CFlags.CallConv == CallingConv::C \|\|
7466	CFlags.CallConv == CallingConv::Cold \|\|
7467	CFlags.CallConv == CallingConv::Fast) &&
7468	"Unexpected calling convention!");
7469
7470	if (CFlags.IsPatchPoint)
7471	report_fatal_error(reason: "This call type is unimplemented on AIX.");
7472
7473	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7474
7475	MachineFunction &MF = DAG.getMachineFunction();
7476	SmallVector<CCValAssign, `16`> ArgLocs;
7477	CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7478	*DAG.getContext());
7479
7480	// Reserve space for the linkage save area (LSA) on the stack.
7481	// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7482	// [SP][CR][LR][2 x reserved][TOC].
7483	// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7484	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7485	const bool IsPPC64 = Subtarget.isPPC64();
7486	const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7487	const unsigned PtrByteSize = IsPPC64 ? `8` : `4`;
7488	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (PtrByteSize));
7489	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_AIX);
7490
7491	// The prolog code of the callee may store up to 8 GPR argument registers to
7492	// the stack, allowing va_start to index over them in memory if the callee
7493	// is variadic.
7494	// Because we cannot tell if this is needed on the caller side, we have to
7495	// conservatively assume that it is needed. As such, make sure we have at
7496	// least enough stack space for the caller to store the 8 GPRs.
7497	const unsigned MinParameterSaveAreaSize = `8` * PtrByteSize;
7498	const unsigned NumBytes = std::max<unsigned>(
7499	a: LinkageSize + MinParameterSaveAreaSize, b: CCInfo.getStackSize());
7500
7501	// Adjust the stack pointer for the new arguments...
7502	// These operations are automatically eliminated by the prolog/epilog pass.
7503	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
7504	SDValue CallSeqStart = Chain;
7505
7506	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
7507	SmallVector<SDValue, `8`> MemOpChains;
7508
7509	// Set up a copy of the stack pointer for loading and storing any
7510	// arguments that may not fit in the registers available for argument
7511	// passing.
7512	const SDValue StackPtr = IsPPC64 ? DAG.getRegister(Reg: PPC::X1, VT: MVT::i64)
7513	: DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
7514
7515	for (unsigned I = `0`, E = ArgLocs.size(); I != E;) {
7516	const unsigned ValNo = ArgLocs [I].getValNo();
7517	SDValue Arg = OutVals [ValNo];
7518	ISD::ArgFlagsTy Flags = Outs [ValNo].Flags;
7519
7520	if (Flags.isByVal()) {
7521	const unsigned ByValSize = Flags.getByValSize();
7522
7523	// Nothing to do for zero-sized ByVals on the caller side.
7524	if (!ByValSize) {
7525	++I;
7526	continue;
7527	}
7528
7529	auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7530	return DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: PtrVT, Chain,
7531	Ptr: (LoadOffset != `0`)
7532	? DAG.getObjectPtrOffset(
7533	SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7534	: Arg,
7535	PtrInfo: MachinePointerInfo (), MemVT: VT);
7536	};
7537
7538	unsigned LoadOffset = `0`;
7539
7540	// Initialize registers, which are fully occupied by the by-val argument.
7541	while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs [I].isRegLoc()) {
7542	SDValue Load = GetLoad (PtrVT, LoadOffset);
7543	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7544	LoadOffset += PtrByteSize;
7545	const CCValAssign &ByValVA = ArgLocs [I++];
7546	assert(ByValVA.getValNo() == ValNo &&
7547	"Unexpected location for pass-by-value argument.");
7548	RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: Load));
7549	}
7550
7551	if (LoadOffset == ByValSize)
7552	continue;
7553
7554	// There must be one more loc to handle the remainder.
7555	assert(ArgLocs[I].getValNo() == ValNo &&
7556	"Expected additional location for by-value argument.");
7557
7558	if (ArgLocs [I].isMemLoc()) {
7559	assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7560	const CCValAssign &ByValVA = ArgLocs [I++];
7561	ISD::ArgFlagsTy MemcpyFlags = Flags;
7562	// Only memcpy the bytes that don't pass in register.
7563	MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7564	Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7565	Arg: (LoadOffset != `0`) ? DAG.getObjectPtrOffset(
7566	SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7567	: Arg,
7568	PtrOff: DAG.getObjectPtrOffset(
7569	SL: dl, Ptr: StackPtr, Offset: TypeSize::getFixed(ExactSize: ByValVA.getLocMemOffset())),
7570	CallSeqStart, Flags: MemcpyFlags, DAG, dl);
7571	continue;
7572	}
7573
7574	// Initialize the final register residue.
7575	// Any residue that occupies the final by-val arg register must be
7576	// left-justified on AIX. Loads must be a power-of-2 size and cannot be
7577	// larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7578	// 2 and 1 byte loads.
7579	const unsigned ResidueBytes = ByValSize % PtrByteSize;
7580	assert(ResidueBytes != `0` && LoadOffset + PtrByteSize > ByValSize &&
7581	"Unexpected register residue for by-value argument.");
7582	SDValue ResidueVal;
7583	for (unsigned Bytes = `0`; Bytes != ResidueBytes;) {
7584	const unsigned N = llvm::bit_floor(Value: ResidueBytes - Bytes);
7585	const MVT VT =
7586	N == `1` ? MVT::i8
7587	: ((N == `2`) ? MVT::i16 : (N == `4` ? MVT::i32 : MVT::i64));
7588	SDValue Load = GetLoad (VT, LoadOffset);
7589	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7590	LoadOffset += N;
7591	Bytes += N;
7592
7593	// By-val arguments are passed left-justfied in register.
7594	// Every load here needs to be shifted, otherwise a full register load
7595	// should have been used.
7596	assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * `8`) &&
7597	"Unexpected load emitted during handling of pass-by-value "
7598	"argument.");
7599	unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * `8`);
7600	EVT ShiftAmountTy =
7601	getShiftAmountTy(LHSTy: Load ->getValueType(ResNo: `0`), DL: DAG.getDataLayout());
7602	SDValue SHLAmt = DAG.getConstant(Val: NumSHLBits, DL: dl, VT: ShiftAmountTy);
7603	SDValue ShiftedLoad =
7604	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: Load.getValueType(), N1: Load, N2: SHLAmt);
7605	ResidueVal = ResidueVal ? DAG.getNode(Opcode: ISD::OR, DL: dl, VT: PtrVT, N1: ResidueVal,
7606	N2: ShiftedLoad)
7607	: ShiftedLoad;
7608	}
7609
7610	const CCValAssign &ByValVA = ArgLocs [I++];
7611	RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: ResidueVal));
7612	continue;
7613	}
7614
7615	CCValAssign &VA = ArgLocs [I++];
7616	const MVT LocVT = VA.getLocVT();
7617	const MVT ValVT = VA.getValVT();
7618
7619	switch (VA.getLocInfo()) {
7620	default:
7621	report_fatal_error(reason: "Unexpected argument extension type.");
7622	case CCValAssign::Full:
7623	break;
7624	case CCValAssign::ZExt:
7625	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7626	break;
7627	case CCValAssign::SExt:
7628	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7629	break;
7630	}
7631
7632	if (VA.isRegLoc() && !VA.needsCustom()) {
7633	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
7634	continue;
7635	}
7636
7637	// Vector arguments passed to VarArg functions need custom handling when
7638	// they are passed (at least partially) in GPRs.
7639	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7640	assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7641	// Store value to its stack slot.
7642	SDValue PtrOff =
7643	DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7644	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7645	SDValue Store =
7646	DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
7647	MemOpChains.push_back(Elt: Store);
7648	const unsigned OriginalValNo = VA.getValNo();
7649	// Then load the GPRs from the stack
7650	unsigned LoadOffset = `0`;
7651	auto HandleCustomVecRegLoc = [&]() {
7652	assert(I != E && "Unexpected end of CCvalAssigns.");
7653	assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7654	"Expected custom RegLoc.");
7655	CCValAssign RegVA = ArgLocs [I++];
7656	assert(RegVA.getValNo() == OriginalValNo &&
7657	"Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7658	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
7659	N2: DAG.getConstant(Val: LoadOffset, DL: dl, VT: PtrVT));
7660	SDValue Load = DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Add, PtrInfo: MachinePointerInfo ());
7661	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7662	RegsToPass.push_back(Elt: std::make_pair(x: RegVA.getLocReg(), y&: Load));
7663	LoadOffset += PtrByteSize;
7664	};
7665
7666	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7667	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7668	// R10.
7669	HandleCustomVecRegLoc ();
7670	HandleCustomVecRegLoc ();
7671
7672	if (I != E && ArgLocs [I].isRegLoc() && ArgLocs [I].needsCustom() &&
7673	ArgLocs [I].getValNo() == OriginalValNo) {
7674	assert(!IsPPC64 &&
7675	"Only 2 custom RegLocs expected for 64-bit codegen.");
7676	HandleCustomVecRegLoc ();
7677	HandleCustomVecRegLoc ();
7678	}
7679
7680	continue;
7681	}
7682
7683	if (VA.isMemLoc()) {
7684	SDValue PtrOff =
7685	DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7686	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7687	MemOpChains.push_back(
7688	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff,
7689	PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset()),
7690	Alignment: Subtarget.getFrameLowering()->getStackAlign()));
7691
7692	continue;
7693	}
7694
7695	if (!ValVT.isFloatingPoint())
7696	report_fatal_error(
7697	reason: "Unexpected register handling for calling convention.");
7698
7699	// Custom handling is used for GPR initializations for vararg float
7700	// arguments.
7701	assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7702	LocVT.isInteger() &&
7703	"Custom register handling only expected for VarArg.");
7704
7705	SDValue ArgAsInt =
7706	DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: ValVT.getSizeInBits()), V: Arg);
7707
7708	if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7709	// f32 in 32-bit GPR
7710	// f64 in 64-bit GPR
7711	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgAsInt));
7712	else if (Arg.getValueType().getFixedSizeInBits() <
7713	LocVT.getFixedSizeInBits())
7714	// f32 in 64-bit GPR.
7715	RegsToPass.push_back(Elt: std::make_pair(
7716	x: VA.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: LocVT)));
7717	else {
7718	// f64 in two 32-bit GPRs
7719	// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7720	assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7721	"Unexpected custom register for argument!");
7722	CCValAssign &GPR1 = VA;
7723	SDValue MSWAsI64 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgAsInt,
7724	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i8));
7725	RegsToPass.push_back(Elt: std::make_pair(
7726	x: GPR1.getLocReg(), y: DAG.getZExtOrTrunc(Op: MSWAsI64, DL: dl, VT: MVT::i32)));
7727
7728	if (I != E) {
7729	// If only 1 GPR was available, there will only be one custom GPR and
7730	// the argument will also pass in memory.
7731	CCValAssign &PeekArg = ArgLocs [I];
7732	if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7733	assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7734	CCValAssign &GPR2 = ArgLocs [I++];
7735	RegsToPass.push_back(Elt: std::make_pair(
7736	x: GPR2.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: MVT::i32)));
7737	}
7738	}
7739	}
7740	}
7741
7742	if (!MemOpChains.empty())
7743	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
7744
7745	// For indirect calls, we need to save the TOC base to the stack for
7746	// restoration after the call.
7747	if (CFlags.IsIndirect) {
7748	assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7749	const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7750	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7751	const MVT PtrVT = Subtarget.getScalarIntVT();
7752	const unsigned TOCSaveOffset =
7753	Subtarget.getFrameLowering()->getTOCSaveOffset();
7754
7755	setUsesTOCBasePtr(DAG);
7756	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: TOCBaseReg, VT: PtrVT);
7757	SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
7758	SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: PtrVT);
7759	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7760	Chain = DAG.getStore(
7761	Chain: Val.getValue(R: `1`), dl, Val, Ptr: AddPtr,
7762	PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
7763	}
7764
7765	// Build a sequence of copy-to-reg nodes chained together with token chain
7766	// and flag operands which copy the outgoing args into the appropriate regs.
7767	SDValue InGlue;
7768	for (auto Reg : RegsToPass) {
7769	Chain = DAG.getCopyToReg(Chain, dl, Reg: Reg.first, N: Reg.second, Glue: InGlue);
7770	InGlue = Chain.getValue(R: `1`);
7771	}
7772
7773	const int SPDiff = `0`;
7774	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
7775	Callee, SPDiff, NumBytes, Ins, InVals, CB);
7776	}
7777
7778	bool
7779	PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7780	MachineFunction &MF, bool isVarArg,
7781	const SmallVectorImpl<ISD::OutputArg> &Outs,
7782	LLVMContext &Context,
7783	const Type RetTy) const* {
7784	SmallVector<CCValAssign, `16`> RVLocs;
7785	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7786	return CCInfo.CheckReturn(
7787	Outs, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7788	? RetCC_PPC_Cold
7789	: RetCC_PPC);
7790	}
7791
7792	SDValue
7793	PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7794	bool isVarArg,
7795	const SmallVectorImpl<ISD::OutputArg> &Outs,
7796	const SmallVectorImpl<SDValue> &OutVals,
7797	const SDLoc &dl, SelectionDAG &DAG) const {
7798	SmallVector<CCValAssign, `16`> RVLocs;
7799	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7800	*DAG.getContext());
7801	CCInfo.AnalyzeReturn(Outs,
7802	Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7803	? RetCC_PPC_Cold
7804	: RetCC_PPC);
7805
7806	SDValue Glue;
7807	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
7808
7809	// Copy the result values into the output registers.
7810	for (unsigned i = `0`, RealResIdx = `0`; i != RVLocs.size(); ++i, ++RealResIdx) {
7811	CCValAssign &VA = RVLocs [i];
7812	assert(VA.isRegLoc() && "Can only return in registers!");
7813
7814	SDValue Arg = OutVals [RealResIdx];
7815
7816	switch (VA.getLocInfo()) {
7817	default: llvm_unreachable("Unknown loc info!");
7818	case CCValAssign::Full: break;
7819	case CCValAssign::AExt:
7820	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7821	break;
7822	case CCValAssign::ZExt:
7823	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7824	break;
7825	case CCValAssign::SExt:
7826	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7827	break;
7828	}
7829	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7830	bool isLittleEndian = Subtarget.isLittleEndian();
7831	// Legalize ret f64 -> ret 2 x i32.
7832	SDValue SVal =
7833	DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7834	N2: DAG.getIntPtrConstant(Val: isLittleEndian ? `0` : `1`, DL: dl));
7835	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7836	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7837	SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7838	N2: DAG.getIntPtrConstant(Val: isLittleEndian ? `1` : `0`, DL: dl));
7839	Glue = Chain.getValue(R: `1`);
7840	VA = RVLocs [++i]; // skip ahead to next loc
7841	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7842	} else
7843	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Arg, Glue);
7844	Glue = Chain.getValue(R: `1`);
7845	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7846	}
7847
7848	RetOps [`0`] = Chain; // Update chain.
7849
7850	// Add the glue if we have it.
7851	if (Glue.getNode())
7852	RetOps.push_back(Elt: Glue);
7853
7854	return DAG.getNode(Opcode: PPCISD::RET_GLUE, DL: dl, VT: MVT::Other, Ops: RetOps);
7855	}
7856
7857	SDValue
7858	PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7859	SelectionDAG &DAG) const {
7860	SDLoc dl(Op);
7861
7862	// Get the correct type for integers.
7863	EVT IntVT = Op.getValueType();
7864
7865	// Get the inputs.
7866	SDValue Chain = Op.getOperand(i: `0`);
7867	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7868	// Build a DYNAREAOFFSET node.
7869	SDValue Ops[`2`] = {Chain, FPSIdx};
7870	SDVTList VTs = DAG.getVTList(VT: IntVT);
7871	return DAG.getNode(Opcode: PPCISD::DYNAREAOFFSET, DL: dl, VTList: VTs, Ops);
7872	}
7873
7874	SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7875	SelectionDAG &DAG) const {
7876	// When we pop the dynamic allocation we need to restore the SP link.
7877	SDLoc dl(Op);
7878
7879	// Get the correct type for pointers.
7880	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7881
7882	// Construct the stack pointer operand.
7883	bool isPPC64 = Subtarget.isPPC64();
7884	unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7885	SDValue StackPtr = DAG.getRegister(Reg: SP, VT: PtrVT);
7886
7887	// Get the operands for the STACKRESTORE.
7888	SDValue Chain = Op.getOperand(i: `0`);
7889	SDValue SaveSP = Op.getOperand(i: `1`);
7890
7891	// Load the old link SP.
7892	SDValue LoadLinkSP =
7893	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: StackPtr, PtrInfo: MachinePointerInfo ());
7894
7895	// Restore the stack pointer.
7896	Chain = DAG.getCopyToReg(Chain: LoadLinkSP.getValue(R: `1`), dl, Reg: SP, N: SaveSP);
7897
7898	// Store the old link SP.
7899	return DAG.getStore(Chain, dl, Val: LoadLinkSP, Ptr: StackPtr, PtrInfo: MachinePointerInfo ());
7900	}
7901
7902	SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7903	MachineFunction &MF = DAG.getMachineFunction();
7904	bool isPPC64 = Subtarget.isPPC64();
7905	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7906
7907	// Get current frame pointer save index. The users of this index will be
7908	// primarily DYNALLOC instructions.
7909	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7910	int RASI = FI->getReturnAddrSaveIndex();
7911
7912	// If the frame pointer save index hasn't been defined yet.
7913	if (!RASI) {
7914	// Find out what the fix offset of the frame pointer save area.
7915	int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7916	// Allocate the frame index for frame pointer save area.
7917	RASI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? `8` : `4`, SPOffset: LROffset, IsImmutable: false);
7918	// Save the result.
7919	FI->setReturnAddrSaveIndex(RASI);
7920	}
7921	return DAG.getFrameIndex(FI: RASI, VT: PtrVT);
7922	}
7923
7924	SDValue
7925	PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7926	MachineFunction &MF = DAG.getMachineFunction();
7927	bool isPPC64 = Subtarget.isPPC64();
7928	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7929
7930	// Get current frame pointer save index. The users of this index will be
7931	// primarily DYNALLOC instructions.
7932	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7933	int FPSI = FI->getFramePointerSaveIndex();
7934
7935	// If the frame pointer save index hasn't been defined yet.
7936	if (!FPSI) {
7937	// Find out what the fix offset of the frame pointer save area.
7938	int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7939	// Allocate the frame index for frame pointer save area.
7940	FPSI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? `8` : `4`, SPOffset: FPOffset, IsImmutable: true);
7941	// Save the result.
7942	FI->setFramePointerSaveIndex(FPSI);
7943	}
7944	return DAG.getFrameIndex(FI: FPSI, VT: PtrVT);
7945	}
7946
7947	SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7948	SelectionDAG &DAG) const {
7949	MachineFunction &MF = DAG.getMachineFunction();
7950	// Get the inputs.
7951	SDValue Chain = Op.getOperand(i: `0`);
7952	SDValue Size = Op.getOperand(i: `1`);
7953	SDLoc dl(Op);
7954
7955	// Get the correct type for pointers.
7956	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7957	// Negate the size.
7958	SDValue NegSize = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: PtrVT,
7959	N1: DAG.getConstant(Val: `0`, DL: dl, VT: PtrVT), N2: Size);
7960	// Construct a node for the frame pointer save index.
7961	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7962	SDValue Ops[`3`] = { Chain, NegSize, FPSIdx };
7963	SDVTList VTs = DAG.getVTList(VT1: PtrVT, VT2: MVT::Other);
7964	if (hasInlineStackProbe(MF))
7965	return DAG.getNode(Opcode: PPCISD::PROBED_ALLOCA, DL: dl, VTList: VTs, Ops);
7966	return DAG.getNode(Opcode: PPCISD::DYNALLOC, DL: dl, VTList: VTs, Ops);
7967	}
7968
7969	SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7970	SelectionDAG &DAG) const {
7971	MachineFunction &MF = DAG.getMachineFunction();
7972
7973	bool isPPC64 = Subtarget.isPPC64();
7974	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7975
7976	int FI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64 ? `8` : `4`, SPOffset: `0`, IsImmutable: false);
7977	return DAG.getFrameIndex(FI, VT: PtrVT);
7978	}
7979
7980	SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7981	SelectionDAG &DAG) const {
7982	SDLoc DL(Op);
7983	return DAG.getNode(Opcode: PPCISD::EH_SJLJ_SETJMP, DL,
7984	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
7985	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
7986	}
7987
7988	SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7989	SelectionDAG &DAG) const {
7990	SDLoc DL(Op);
7991	return DAG.getNode(Opcode: PPCISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other,
7992	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
7993	}
7994
7995	SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7996	if (Op.getValueType().isVector())
7997	return LowerVectorLoad(Op, DAG);
7998
7999	assert(Op.getValueType() == MVT::i1 &&
8000	"Custom lowering only for i1 loads");
8001
8002	// First, load 8 bits into 32 bits, then truncate to 1 bit.
8003
8004	SDLoc dl(Op);
8005	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
8006
8007	SDValue Chain = LD->getChain();
8008	SDValue BasePtr = LD->getBasePtr();
8009	MachineMemOperand *MMO = LD->getMemOperand();
8010
8011	SDValue NewLD =
8012	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: getPointerTy(DL: DAG.getDataLayout()), Chain,
8013	Ptr: BasePtr, MemVT: MVT::i8, MMO);
8014	SDValue Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewLD);
8015
8016	SDValue Ops[] = { Result, SDValue (NewLD.getNode(), `1`) };
8017	return DAG.getMergeValues(Ops, dl);
8018	}
8019
8020	SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8021	if (Op.getOperand(i: `1`).getValueType().isVector())
8022	return LowerVectorStore(Op, DAG);
8023
8024	assert(Op.getOperand(`1`).getValueType() == MVT::i1 &&
8025	"Custom lowering only for i1 stores");
8026
8027	// First, zero extend to 32 bits, then use a truncating store to 8 bits.
8028
8029	SDLoc dl(Op);
8030	StoreSDNode *ST = cast<StoreSDNode>(Val&: Op);
8031
8032	SDValue Chain = ST->getChain();
8033	SDValue BasePtr = ST->getBasePtr();
8034	SDValue Value = ST->getValue();
8035	MachineMemOperand *MMO = ST->getMemOperand();
8036
8037	Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
8038	Operand: Value);
8039	return DAG.getTruncStore(Chain, dl, Val: Value, Ptr: BasePtr, SVT: MVT::i8, MMO);
8040	}
8041
8042	// FIXME: Remove this once the ANDI glue bug is fixed:
8043	SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8044	assert(Op.getValueType() == MVT::i1 &&
8045	"Custom lowering only for i1 results");
8046
8047	SDLoc DL(Op);
8048	return DAG.getNode(Opcode: PPCISD::ANDI_rec_1_GT_BIT, DL, VT: MVT::i1, Operand: Op.getOperand(i: `0`));
8049	}
8050
8051	SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8052	SelectionDAG &DAG) const {
8053
8054	// Implements a vector truncate that fits in a vector register as a shuffle.
8055	// We want to legalize vector truncates down to where the source fits in
8056	// a vector register (and target is therefore smaller than vector register
8057	// size). At that point legalization will try to custom lower the sub-legal
8058	// result and get here - where we can contain the truncate as a single target
8059	// operation.
8060
8061	// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8062	// <MSB1\|LSB1, MSB2\|LSB2> to <LSB1, LSB2>
8063	//
8064	// We will implement it for big-endian ordering as this (where x denotes
8065	// undefined):
8066	// < MSB1\|LSB1, MSB2\|LSB2, uu, uu, uu, uu, uu, uu> to
8067	// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8068	//
8069	// The same operation in little-endian ordering will be:
8070	// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to
8071	// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8072
8073	EVT TrgVT = Op.getValueType();
8074	assert(TrgVT.isVector() && "Vector type expected.");
8075	unsigned TrgNumElts = TrgVT.getVectorNumElements();
8076	EVT EltVT = TrgVT.getVectorElementType();
8077	if (!isOperationCustom(Op: Op.getOpcode(), VT: TrgVT) \|\|
8078	TrgVT.getSizeInBits() > `128` \|\| !isPowerOf2_32(Value: TrgNumElts) \|\|
8079	!llvm::has_single_bit<uint32_t>(Value: EltVT.getSizeInBits()))
8080	return SDValue ();
8081
8082	SDValue N1 = Op.getOperand(i: `0`);
8083	EVT SrcVT = N1.getValueType();
8084	unsigned SrcSize = SrcVT.getSizeInBits();
8085	if (SrcSize > `256` \|\| !isPowerOf2_32(Value: SrcVT.getVectorNumElements()) \|\|
8086	!llvm::has_single_bit<uint32_t>(
8087	Value: SrcVT.getVectorElementType().getSizeInBits()))
8088	return SDValue ();
8089	if (SrcSize == `256` && SrcVT.getVectorNumElements() < `2`)
8090	return SDValue ();
8091
8092	unsigned WideNumElts = `128` / EltVT.getSizeInBits();
8093	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8094
8095	SDLoc DL(Op);
8096	SDValue Op1, Op2;
8097	if (SrcSize == `256`) {
8098	EVT VecIdxTy = getVectorIdxTy(DL: DAG.getDataLayout());
8099	EVT SplitVT =
8100	N1.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
8101	unsigned SplitNumElts = SplitVT.getVectorNumElements();
8102	Op1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8103	N2: DAG.getConstant(Val: `0`, DL, VT: VecIdxTy));
8104	Op2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8105	N2: DAG.getConstant(Val: SplitNumElts, DL, VT: VecIdxTy));
8106	}
8107	else {
8108	Op1 = SrcSize == `128` ? N1 : widenVec(DAG, Vec: N1, dl: DL);
8109	Op2 = DAG.getUNDEF(VT: WideVT);
8110	}
8111
8112	// First list the elements we want to keep.
8113	unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8114	SmallVector<int, `16`> ShuffV;
8115	if (Subtarget.isLittleEndian())
8116	for (unsigned i = `0`; i < TrgNumElts; ++i)
8117	ShuffV.push_back(Elt: i * SizeMult);
8118	else
8119	for (unsigned i = `1`; i <= TrgNumElts; ++i)
8120	ShuffV.push_back(Elt: i * SizeMult - `1`);
8121
8122	// Populate the remaining elements with undefs.
8123	for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8124	// ShuffV.push_back(i + WideNumElts);
8125	ShuffV.push_back(Elt: WideNumElts + `1`);
8126
8127	Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op1);
8128	Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op2);
8129	return DAG.getVectorShuffle(VT: WideVT, dl: DL, N1: Op1, N2: Op2, Mask: ShuffV);
8130	}
8131
8132	/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8133	/// possible.
8134	SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8135	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `4`))->get();
8136	EVT ResVT = Op.getValueType();
8137	EVT CmpVT = Op.getOperand(i: `0`).getValueType();
8138	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
8139	SDValue TV = Op.getOperand(i: `2`), FV = Op.getOperand(i: `3`);
8140	SDLoc dl(Op);
8141
8142	// Without power9-vector, we don't have native instruction for f128 comparison.
8143	// Following transformation to libcall is needed for setcc:
8144	// select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8145	if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8146	SDValue Z = DAG.getSetCC(
8147	DL: dl, VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: CmpVT),
8148	LHS, RHS, Cond: CC);
8149	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: Z.getValueType());
8150	return DAG.getSelectCC(DL: dl, LHS: Z, RHS: Zero, True: TV, False: FV, Cond: ISD::SETNE);
8151	}
8152
8153	// Not FP, or using SPE? Not a fsel.
8154	if (!CmpVT.isFloatingPoint() \|\| !TV.getValueType().isFloatingPoint() \|\|
8155	Subtarget.hasSPE())
8156	return Op;
8157
8158	SDNodeFlags Flags = Op.getNode()->getFlags();
8159
8160	// We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8161	// presence of infinities.
8162	if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8163	switch (CC) {
8164	default:
8165	break;
8166	case ISD::SETOGT:
8167	case ISD::SETGT:
8168	return DAG.getNode(Opcode: PPCISD::XSMAXC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8169	case ISD::SETOLT:
8170	case ISD::SETLT:
8171	return DAG.getNode(Opcode: PPCISD::XSMINC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8172	}
8173	}
8174
8175	// We might be able to do better than this under some circumstances, but in
8176	// general, fsel-based lowering of select is a finite-math-only optimization.
8177	// For more information, see section F.3 of the 2.06 ISA specification.
8178	// With ISA 3.0
8179	if (!Flags.hasNoInfs() \|\| !Flags.hasNoNaNs() \|\| ResVT == MVT::f128)
8180	return Op;
8181
8182	// If the RHS of the comparison is a 0.0, we don't need to do the
8183	// subtraction at all.
8184	SDValue Sel1;
8185	if (isFloatingPointZero(Op: RHS))
8186	switch (CC) {
8187	default: break; // SETUO etc aren't handled by fsel.
8188	case ISD::SETNE:
8189	std::swap(a&: TV, b&: FV);
8190	[[fallthrough]];
8191	case ISD::SETEQ:
8192	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8193	LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8194	Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8195	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8196	Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8197	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8198	N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: Sel1, N3: FV);
8199	case ISD::SETULT:
8200	case ISD::SETLT:
8201	std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8202	[[fallthrough]];
8203	case ISD::SETOGE:
8204	case ISD::SETGE:
8205	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8206	LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8207	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8208	case ISD::SETUGT:
8209	case ISD::SETGT:
8210	std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8211	[[fallthrough]];
8212	case ISD::SETOLE:
8213	case ISD::SETLE:
8214	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8215	LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8216	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8217	N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: TV, N3: FV);
8218	}
8219
8220	SDValue Cmp;
8221	switch (CC) {
8222	default: break; // SETUO etc aren't handled by fsel.
8223	case ISD::SETNE:
8224	std::swap(a&: TV, b&: FV);
8225	[[fallthrough]];
8226	case ISD::SETEQ:
8227	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8228	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8229	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8230	Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8231	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8232	Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8233	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8234	N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: Cmp), N2: Sel1, N3: FV);
8235	case ISD::SETULT:
8236	case ISD::SETLT:
8237	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8238	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8239	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8240	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8241	case ISD::SETOGE:
8242	case ISD::SETGE:
8243	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8244	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8245	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8246	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8247	case ISD::SETUGT:
8248	case ISD::SETGT:
8249	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8250	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8251	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8252	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8253	case ISD::SETOLE:
8254	case ISD::SETLE:
8255	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8256	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8257	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8258	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8259	}
8260	return Op;
8261	}
8262
8263	static unsigned getPPCStrictOpcode(unsigned Opc) {
8264	switch (Opc) {
8265	default:
8266	llvm_unreachable("No strict version of this opcode!");
8267	case PPCISD::FCTIDZ:
8268	return PPCISD::STRICT_FCTIDZ;
8269	case PPCISD::FCTIWZ:
8270	return PPCISD::STRICT_FCTIWZ;
8271	case PPCISD::FCTIDUZ:
8272	return PPCISD::STRICT_FCTIDUZ;
8273	case PPCISD::FCTIWUZ:
8274	return PPCISD::STRICT_FCTIWUZ;
8275	case PPCISD::FCFID:
8276	return PPCISD::STRICT_FCFID;
8277	case PPCISD::FCFIDU:
8278	return PPCISD::STRICT_FCFIDU;
8279	case PPCISD::FCFIDS:
8280	return PPCISD::STRICT_FCFIDS;
8281	case PPCISD::FCFIDUS:
8282	return PPCISD::STRICT_FCFIDUS;
8283	}
8284	}
8285
8286	static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8287	const PPCSubtarget &Subtarget) {
8288	SDLoc dl(Op);
8289	bool IsStrict = Op ->isStrictFPOpcode();
8290	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8291	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8292
8293	// TODO: Any other flags to propagate?
8294	SDNodeFlags Flags;
8295	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8296
8297	// For strict nodes, source is the second operand.
8298	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8299	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
8300	MVT DestTy = Op.getSimpleValueType();
8301	assert(Src.getValueType().isFloatingPoint() &&
8302	(DestTy == MVT::i8 \|\| DestTy == MVT::i16 \|\| DestTy == MVT::i32 \|\|
8303	DestTy == MVT::i64) &&
8304	"Invalid FP_TO_INT types");
8305	if (Src.getValueType() == MVT::f32) {
8306	if (IsStrict) {
8307	Src =
8308	DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL: dl,
8309	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8310	Chain = Src.getValue(R: `1`);
8311	} else
8312	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
8313	}
8314	if ((DestTy == MVT::i8 \|\| DestTy == MVT::i16) && Subtarget.hasP9Vector())
8315	DestTy = Subtarget.getScalarIntVT();
8316	unsigned Opc = ISD::DELETED_NODE;
8317	switch (DestTy.SimpleTy) {
8318	default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8319	case MVT::i32:
8320	Opc = IsSigned ? PPCISD::FCTIWZ
8321	: (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8322	break;
8323	case MVT::i64:
8324	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
8325	"i64 FP_TO_UINT is supported only with FPCVT");
8326	Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8327	}
8328	EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8329	SDValue Conv;
8330	if (IsStrict) {
8331	Opc = getPPCStrictOpcode(Opc);
8332	Conv = DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src},
8333	Flags);
8334	} else {
8335	Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: ConvTy, Operand: Src);
8336	}
8337	return Conv;
8338	}
8339
8340	void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8341	SelectionDAG &DAG,
8342	const SDLoc &dl) const {
8343	SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8344	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8345	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8346	bool IsStrict = Op ->isStrictFPOpcode();
8347
8348	// Convert the FP value to an int value through memory.
8349	bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8350	(IsSigned \|\| Subtarget.hasFPCVT());
8351	SDValue FIPtr = DAG.CreateStackTemporary(VT: i32Stack ? MVT::i32 : MVT::f64);
8352	int FI = cast<FrameIndexSDNode>(Val&: FIPtr)->getIndex();
8353	MachinePointerInfo MPI =
8354	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
8355
8356	// Emit a store to the stack slot.
8357	SDValue Chain = IsStrict ? Tmp.getValue(R: `1`) : DAG.getEntryNode();
8358	Align Alignment(DAG.getEVTAlign(MemoryVT: Tmp.getValueType()));
8359	if (i32Stack) {
8360	MachineFunction &MF = DAG.getMachineFunction();
8361	Alignment = Align (`4`);
8362	MachineMemOperand *MMO =
8363	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Alignment);
8364	SDValue Ops[] = { Chain, Tmp, FIPtr };
8365	Chain = DAG.getMemIntrinsicNode(Opcode: PPCISD::STFIWX, dl,
8366	VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT: MVT::i32, MMO);
8367	} else
8368	Chain = DAG.getStore(Chain, dl, Val: Tmp, Ptr: FIPtr, PtrInfo: MPI, Alignment);
8369
8370	// Result is a load from the stack slot. If loading 4 bytes, make sure to
8371	// add in a bias on big endian.
8372	if (Op.getValueType() == MVT::i32 && !i32Stack &&
8373	!Subtarget.isLittleEndian()) {
8374	FIPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: FIPtr.getValueType(), N1: FIPtr,
8375	N2: DAG.getConstant(Val: `4`, DL: dl, VT: FIPtr.getValueType()));
8376	MPI = MPI.getWithOffset(O: `4`);
8377	}
8378
8379	RLI.Chain = Chain;
8380	RLI.Ptr = FIPtr;
8381	RLI.MPI = MPI;
8382	RLI.Alignment = Alignment;
8383	}
8384
8385	/// Custom lowers floating point to integer conversions to use
8386	/// the direct move instructions available in ISA 2.07 to avoid the
8387	/// need for load/store combinations.
8388	SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8389	SelectionDAG &DAG,
8390	const SDLoc &dl) const {
8391	SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8392	SDValue Mov = DAG.getNode(Opcode: PPCISD::MFVSR, DL: dl, VT: Op.getValueType(), Operand: Conv);
8393	if (Op ->isStrictFPOpcode())
8394	return DAG.getMergeValues(Ops: {Mov, Conv.getValue(R: `1`)}, dl);
8395	else
8396	return Mov;
8397	}
8398
8399	SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8400	const SDLoc &dl) const {
8401	bool IsStrict = Op ->isStrictFPOpcode();
8402	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8403	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8404	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8405	EVT SrcVT = Src.getValueType();
8406	EVT DstVT = Op.getValueType();
8407
8408	// FP to INT conversions are legal for f128.
8409	if (SrcVT == MVT::f128)
8410	return Subtarget.hasP9Vector() ? Op : SDValue ();
8411
8412	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8413	// PPC (the libcall is not available).
8414	if (SrcVT == MVT::ppcf128) {
8415	if (DstVT == MVT::i32) {
8416	// TODO: Conservatively pass only nofpexcept flag here. Need to check and
8417	// set other fast-math flags to FP operations in both strict and
8418	// non-strict cases. (FP_TO_SINT, FSUB)
8419	SDNodeFlags Flags;
8420	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8421
8422	if (IsSigned) {
8423	SDValue Lo, Hi;
8424	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: dl, LoVT: MVT::f64, HiVT: MVT::f64);
8425
8426	// Add the two halves of the long double in round-to-zero mode, and use
8427	// a smaller FP_TO_SINT.
8428	if (IsStrict) {
8429	SDValue Res = DAG.getNode(Opcode: PPCISD::STRICT_FADDRTZ, DL: dl,
8430	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8431	Ops: {Op.getOperand(i: `0`), Lo, Hi}, Flags);
8432	return DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8433	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8434	Ops: {Res.getValue(R: `1`), Res}, Flags);
8435	} else {
8436	SDValue Res = DAG.getNode(Opcode: PPCISD::FADDRTZ, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
8437	return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Res);
8438	}
8439	} else {
8440	const uint64_t TwoE31[] = {`0x41e0000000000000LL`, `0`};
8441	APFloat APF = APFloat (APFloat::PPCDoubleDouble(), APInt (`128`, TwoE31));
8442	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8443	SDValue SignMask = DAG.getConstant(Val: `0x80000000`, DL: dl, VT: DstVT);
8444	if (IsStrict) {
8445	// Sel = Src < 0x80000000
8446	// FltOfs = select Sel, 0.0, 0x80000000
8447	// IntOfs = select Sel, 0, 0x80000000
8448	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8449	SDValue Chain = Op.getOperand(i: `0`);
8450	EVT SetCCVT =
8451	getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: SrcVT);
8452	EVT DstSetCCVT =
8453	getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: DstVT);
8454	SDValue Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8455	Chain, IsSignaling: true);
8456	Chain = Sel.getValue(R: `1`);
8457
8458	SDValue FltOfs = DAG.getSelect(
8459	DL: dl, VT: SrcVT, Cond: Sel, LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8460	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8461
8462	SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl,
8463	VTList: DAG.getVTList(VT1: SrcVT, VT2: MVT::Other),
8464	Ops: {Chain, Src, FltOfs}, Flags);
8465	Chain = Val.getValue(R: `1`);
8466	SDValue SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8467	VTList: DAG.getVTList(VT1: DstVT, VT2: MVT::Other),
8468	Ops: {Chain, Val}, Flags);
8469	Chain = SInt.getValue(R: `1`);
8470	SDValue IntOfs = DAG.getSelect(
8471	DL: dl, VT: DstVT, Cond: Sel, LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), RHS: SignMask);
8472	SDValue Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8473	return DAG.getMergeValues(Ops: {Result, Chain}, dl);
8474	} else {
8475	// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8476	// FIXME: generated code sucks.
8477	SDValue True = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::ppcf128, N1: Src, N2: Cst);
8478	True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: True);
8479	True = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: True, N2: SignMask);
8480	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Src);
8481	return DAG.getSelectCC(DL: dl, LHS: Src, RHS: Cst, True, False, Cond: ISD::SETGE);
8482	}
8483	}
8484	}
8485
8486	return SDValue ();
8487	}
8488
8489	if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8490	return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8491
8492	ReuseLoadInfo RLI;
8493	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8494
8495	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8496	Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8497	}
8498
8499	// We're trying to insert a regular store, S, and then a load, L. If the
8500	// incoming value, O, is a load, we might just be able to have our load use the
8501	// address used by O. However, we don't know if anything else will store to
8502	// that address before we can load from it. To prevent this situation, we need
8503	// to insert our load, L, into the chain as a peer of O. To do this, we give L
8504	// the same chain operand as O, we create a token factor from the chain results
8505	// of O and L, and we replace all uses of O's chain result with that token
8506	// factor (this last part is handled by makeEquivalentMemoryOrdering).
8507	bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8508	ReuseLoadInfo &RLI,
8509	SelectionDAG &DAG,
8510	ISD::LoadExtType ET) const {
8511	// Conservatively skip reusing for constrained FP nodes.
8512	if (Op ->isStrictFPOpcode())
8513	return false;
8514
8515	SDLoc dl(Op);
8516	bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8517	(Subtarget.hasFPCVT() \|\| Op.getValueType() == MVT::i32);
8518	if (ET == ISD::NON_EXTLOAD &&
8519	(ValidFPToUint \|\| Op.getOpcode() == ISD::FP_TO_SINT) &&
8520	isOperationLegalOrCustom(Op: Op.getOpcode(),
8521	VT: Op.getOperand(i: `0`).getValueType())) {
8522
8523	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8524	return true;
8525	}
8526
8527	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op);
8528	if (!LD \|\| LD->getExtensionType() != ET \|\| LD->isVolatile() \|\|
8529	LD->isNonTemporal())
8530	return false;
8531	if (LD->getMemoryVT() != MemVT)
8532	return false;
8533
8534	// If the result of the load is an illegal type, then we can't build a
8535	// valid chain for reuse since the legalised loads and token factor node that
8536	// ties the legalised loads together uses a different output chain then the
8537	// illegal load.
8538	if (!isTypeLegal(VT: LD->getValueType(ResNo: `0`)))
8539	return false;
8540
8541	RLI.Ptr = LD->getBasePtr();
8542	if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8543	assert(LD->getAddressingMode() == ISD::PRE_INC &&
8544	"Non-pre-inc AM on PPC?");
8545	RLI.Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RLI.Ptr.getValueType(), N1: RLI.Ptr,
8546	N2: LD->getOffset());
8547	}
8548
8549	RLI.Chain = LD->getChain();
8550	RLI.MPI = LD->getPointerInfo();
8551	RLI.IsDereferenceable = LD->isDereferenceable();
8552	RLI.IsInvariant = LD->isInvariant();
8553	RLI.Alignment = LD->getAlign();
8554	RLI.AAInfo = LD->getAAInfo();
8555	RLI.Ranges = LD->getRanges();
8556
8557	RLI.ResChain = SDValue (LD, LD->isIndexed() ? `2` : `1`);
8558	return true;
8559	}
8560
8561	/// Analyze profitability of direct move
8562	/// prefer float load to int load plus direct move
8563	/// when there is no integer use of int load
8564	bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8565	SDNode *Origin = Op.getOperand(i: Op ->isStrictFPOpcode() ? `1` : `0`).getNode();
8566	if (Origin->getOpcode() != ISD::LOAD)
8567	return true;
8568
8569	// If there is no LXSIBZX/LXSIHZX, like Power8,
8570	// prefer direct move if the memory size is 1 or 2 bytes.
8571	MachineMemOperand *MMO = cast<LoadSDNode>(Val: Origin)->getMemOperand();
8572	if (!Subtarget.hasP9Vector() &&
8573	(!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() <= `2`))
8574	return true;
8575
8576	for (SDUse &Use : Origin->uses()) {
8577
8578	// Only look at the users of the loaded value.
8579	if (Use.getResNo() != `0`)
8580	continue;
8581
8582	SDNode *User = Use.getUser();
8583	if (User->getOpcode() != ISD::SINT_TO_FP &&
8584	User->getOpcode() != ISD::UINT_TO_FP &&
8585	User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8586	User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8587	return true;
8588	}
8589
8590	return false;
8591	}
8592
8593	static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8594	const PPCSubtarget &Subtarget,
8595	SDValue Chain = SDValue ()) {
8596	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8597	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8598	SDLoc dl(Op);
8599
8600	// TODO: Any other flags to propagate?
8601	SDNodeFlags Flags;
8602	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8603
8604	// If we have FCFIDS, then use it when converting to single-precision.
8605	// Otherwise, convert to double-precision and then round.
8606	bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8607	unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8608	: (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8609	EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8610	if (Op ->isStrictFPOpcode()) {
8611	if (!Chain)
8612	Chain = Op.getOperand(i: `0`);
8613	return DAG.getNode(Opcode: getPPCStrictOpcode(Opc: ConvOpc), DL: dl,
8614	VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8615	} else
8616	return DAG.getNode(Opcode: ConvOpc, DL: dl, VT: ConvTy, Operand: Src);
8617	}
8618
8619	/// Custom lowers integer to floating point conversions to use
8620	/// the direct move instructions available in ISA 2.07 to avoid the
8621	/// need for load/store combinations.
8622	SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8623	SelectionDAG &DAG,
8624	const SDLoc &dl) const {
8625	assert((Op.getValueType() == MVT::f32 \|\|
8626	Op.getValueType() == MVT::f64) &&
8627	"Invalid floating point type as target of conversion");
8628	assert(Subtarget.hasFPCVT() &&
8629	"Int to FP conversions with direct moves require FPCVT");
8630	SDValue Src = Op.getOperand(i: Op ->isStrictFPOpcode() ? `1` : `0`);
8631	bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8632	bool Signed = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8633	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8634	unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8635	SDValue Mov = DAG.getNode(Opcode: MovOpc, DL: dl, VT: MVT::f64, Operand: Src);
8636	return convertIntToFP(Op, Src: Mov, DAG, Subtarget);
8637	}
8638
8639	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8640
8641	EVT VecVT = Vec.getValueType();
8642	assert(VecVT.isVector() && "Expected a vector type.");
8643	assert(VecVT.getSizeInBits() < `128` && "Vector is already full width.");
8644
8645	EVT EltVT = VecVT.getVectorElementType();
8646	unsigned WideNumElts = `128` / EltVT.getSizeInBits();
8647	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8648
8649	unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8650	SmallVector<SDValue, `16`> Ops(NumConcat);
8651	Ops [`0`] = Vec;
8652	SDValue UndefVec = DAG.getUNDEF(VT: VecVT);
8653	for (unsigned i = `1`; i < NumConcat; ++i)
8654	Ops [i] = UndefVec;
8655
8656	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: WideVT, Ops);
8657	}
8658
8659	SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8660	const SDLoc &dl) const {
8661	bool IsStrict = Op ->isStrictFPOpcode();
8662	unsigned Opc = Op.getOpcode();
8663	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8664	assert((Opc == ISD::UINT_TO_FP \|\| Opc == ISD::SINT_TO_FP \|\|
8665	Opc == ISD::STRICT_UINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP) &&
8666	"Unexpected conversion type");
8667	assert((Op.getValueType() == MVT::v2f64 \|\| Op.getValueType() == MVT::v4f32) &&
8668	"Supports conversions to v2f64/v4f32 only.");
8669
8670	// TODO: Any other flags to propagate?
8671	SDNodeFlags Flags;
8672	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8673
8674	bool SignedConv = Opc == ISD::SINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP;
8675	bool FourEltRes = Op.getValueType() == MVT::v4f32;
8676
8677	SDValue Wide = widenVec(DAG, Vec: Src, dl);
8678	EVT WideVT = Wide.getValueType();
8679	unsigned WideNumElts = WideVT.getVectorNumElements();
8680	MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8681
8682	SmallVector<int, `16`> ShuffV;
8683	for (unsigned i = `0`; i < WideNumElts; ++i)
8684	ShuffV.push_back(Elt: i + WideNumElts);
8685
8686	int Stride = FourEltRes ? WideNumElts / `4` : WideNumElts / `2`;
8687	int SaveElts = FourEltRes ? `4` : `2`;
8688	if (Subtarget.isLittleEndian())
8689	for (int i = `0`; i < SaveElts; i++)
8690	ShuffV [i * Stride] = i;
8691	else
8692	for (int i = `1`; i <= SaveElts; i++)
8693	ShuffV [i * Stride - `1`] = i - `1`;
8694
8695	SDValue ShuffleSrc2 =
8696	SignedConv ? DAG.getUNDEF(VT: WideVT) : DAG.getConstant(Val: `0`, DL: dl, VT: WideVT);
8697	SDValue Arrange = DAG.getVectorShuffle(VT: WideVT, dl, N1: Wide, N2: ShuffleSrc2, Mask: ShuffV);
8698
8699	SDValue Extend;
8700	if (SignedConv) {
8701	Arrange = DAG.getBitcast(VT: IntermediateVT, V: Arrange);
8702	EVT ExtVT = Src.getValueType();
8703	if (Subtarget.hasP9Altivec())
8704	ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT.getVectorElementType(),
8705	NumElements: IntermediateVT.getVectorNumElements());
8706
8707	Extend = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: IntermediateVT, N1: Arrange,
8708	N2: DAG.getValueType(ExtVT));
8709	} else
8710	Extend = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntermediateVT, Operand: Arrange);
8711
8712	if (IsStrict)
8713	return DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other),
8714	Ops: {Op.getOperand(i: `0`), Extend}, Flags);
8715
8716	return DAG.getNode(Opcode: Opc, DL: dl, VT: Op.getValueType(), Operand: Extend);
8717	}
8718
8719	SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8720	SelectionDAG &DAG) const {
8721	SDLoc dl(Op);
8722	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8723	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724	bool IsStrict = Op ->isStrictFPOpcode();
8725	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8726	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : DAG.getEntryNode();
8727
8728	// TODO: Any other flags to propagate?
8729	SDNodeFlags Flags;
8730	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8731
8732	EVT InVT = Src.getValueType();
8733	EVT OutVT = Op.getValueType();
8734	if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8735	isOperationCustom(Op: Op.getOpcode(), VT: InVT))
8736	return LowerINT_TO_FPVector(Op, DAG, dl);
8737
8738	// Conversions to f128 are legal.
8739	if (Op.getValueType() == MVT::f128)
8740	return Subtarget.hasP9Vector() ? Op : SDValue ();
8741
8742	// Don't handle ppc_fp128 here; let it be lowered to a libcall.
8743	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8744	return SDValue ();
8745
8746	if (Src.getValueType() == MVT::i1) {
8747	SDValue Sel = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: Op.getValueType(), N1: Src,
8748	N2: DAG.getConstantFP(Val: `1.0`, DL: dl, VT: Op.getValueType()),
8749	N3: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: Op.getValueType()));
8750	if (IsStrict)
8751	return DAG.getMergeValues(Ops: {Sel, Chain}, dl);
8752	else
8753	return Sel;
8754	}
8755
8756	// If we have direct moves, we can do all the conversion, skip the store/load
8757	// however, without FPCVT we can't do most conversions.
8758	if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8759	Subtarget.isPPC64() && Subtarget.hasFPCVT())
8760	return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8761
8762	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
8763	"UINT_TO_FP is supported only with FPCVT");
8764
8765	if (Src.getValueType() == MVT::i64) {
8766	SDValue SINT = Src;
8767	// When converting to single-precision, we actually need to convert
8768	// to double-precision first and then round to single-precision.
8769	// To avoid double-rounding effects during that operation, we have
8770	// to prepare the input operand. Bits that might be truncated when
8771	// converting to double-precision are replaced by a bit that won't
8772	// be lost at this stage, but is below the single-precision rounding
8773	// position.
8774	//
8775	// However, if afn is in effect, accept double
8776	// rounding to avoid the extra overhead.
8777	// FIXME: Currently INT_TO_FP can't support fast math flags because
8778	// of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8779	// false.
8780	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8781	!Op ->getFlags().hasApproximateFuncs()) {
8782
8783	// Twiddle input to make sure the low 11 bits are zero. (If this
8784	// is the case, we are guaranteed the value will fit into the 53 bit
8785	// mantissa of an IEEE double-precision value without rounding.)
8786	// If any of those low 11 bits were not zero originally, make sure
8787	// bit 12 (value 2048) is set instead, so that the final rounding
8788	// to single-precision gets the correct result.
8789	SDValue Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64,
8790	N1: SINT, N2: DAG.getConstant(Val: `2047`, DL: dl, VT: MVT::i64));
8791	Round = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8792	N1: Round, N2: DAG.getConstant(Val: `2047`, DL: dl, VT: MVT::i64));
8793	Round = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i64, N1: Round, N2: SINT);
8794	Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64, N1: Round,
8795	N2: DAG.getSignedConstant(Val: -`2048`, DL: dl, VT: MVT::i64));
8796
8797	// However, we cannot use that value unconditionally: if the magnitude
8798	// of the input value is small, the bit-twiddling we did above might
8799	// end up visibly changing the output. Fortunately, in that case, we
8800	// don't need to twiddle bits since the original input will convert
8801	// exactly to double-precision floating-point already. Therefore,
8802	// construct a conditional to use the original value if the top 11
8803	// bits are all sign-bit copies, and use the rounded value computed
8804	// above otherwise.
8805	SDValue Cond = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: MVT::i64,
8806	N1: SINT, N2: DAG.getConstant(Val: `53`, DL: dl, VT: MVT::i32));
8807	Cond = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8808	N1: Cond, N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i64));
8809	Cond = DAG.getSetCC(
8810	DL: dl,
8811	VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: MVT::i64),
8812	LHS: Cond, RHS: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i64), Cond: ISD::SETUGT);
8813
8814	SINT = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i64, N1: Cond, N2: Round, N3: SINT);
8815	}
8816
8817	ReuseLoadInfo RLI;
8818	SDValue Bits;
8819
8820	MachineFunction &MF = DAG.getMachineFunction();
8821	if (canReuseLoadAddress(Op: SINT, MemVT: MVT::i64, RLI, DAG)) {
8822	Bits = DAG.getLoad(VT: MVT::f64, dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8823	Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8824	if (RLI.ResChain)
8825	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
8826	} else if (Subtarget.hasLFIWAX() &&
8827	canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::SEXTLOAD)) {
8828	MachineMemOperand *MMO =
8829	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8830	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8831	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8832	Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWAX, dl,
8833	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8834	Ops, MemVT: MVT::i32, MMO);
8835	if (RLI.ResChain)
8836	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
8837	} else if (Subtarget.hasFPCVT() &&
8838	canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::ZEXTLOAD)) {
8839	MachineMemOperand *MMO =
8840	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8841	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8842	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8843	Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWZX, dl,
8844	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8845	Ops, MemVT: MVT::i32, MMO);
8846	if (RLI.ResChain)
8847	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
8848	} else if (((Subtarget.hasLFIWAX() &&
8849	SINT.getOpcode() == ISD::SIGN_EXTEND) \|\|
8850	(Subtarget.hasFPCVT() &&
8851	SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8852	SINT.getOperand(i: `0`).getValueType() == MVT::i32) {
8853	MachineFrameInfo &MFI = MF.getFrameInfo();
8854	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8855
8856	int FrameIdx = MFI.CreateStackObject(Size: `4`, Alignment: Align (`4`), isSpillSlot: false);
8857	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8858
8859	SDValue Store = DAG.getStore(Chain, dl, Val: SINT.getOperand(i: `0`), Ptr: FIdx,
8860	PtrInfo: MachinePointerInfo::getFixedStack(
8861	MF&: DAG.getMachineFunction(), FI: FrameIdx));
8862	Chain = Store;
8863
8864	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8865	"Expected an i32 store");
8866
8867	RLI.Ptr = FIdx;
8868	RLI.Chain = Chain;
8869	RLI.MPI =
8870	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8871	RLI.Alignment = Align (`4`);
8872
8873	MachineMemOperand *MMO =
8874	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8875	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8876	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8877	Bits = DAG.getMemIntrinsicNode(Opcode: SINT.getOpcode() == ISD::ZERO_EXTEND ?
8878	PPCISD::LFIWZX : PPCISD::LFIWAX,
8879	dl, VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8880	Ops, MemVT: MVT::i32, MMO);
8881	Chain = Bits.getValue(R: `1`);
8882	} else
8883	Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f64, Operand: SINT);
8884
8885	SDValue FP = convertIntToFP(Op, Src: Bits, DAG, Subtarget, Chain);
8886	if (IsStrict)
8887	Chain = FP.getValue(R: `1`);
8888
8889	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8890	if (IsStrict)
8891	FP = DAG.getNode(
8892	Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8893	Ops: {Chain, FP, DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true)},
8894	Flags);
8895	else
8896	FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8897	N2: DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true));
8898	}
8899	return FP;
8900	}
8901
8902	assert(Src.getValueType() == MVT::i32 &&
8903	"Unhandled INT_TO_FP type in custom expander!");
8904	// Since we only generate this in 64-bit mode, we can take advantage of
8905	// 64-bit registers. In particular, sign extend the input value into the
8906	// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8907	// then lfd it and fcfid it.
8908	MachineFunction &MF = DAG.getMachineFunction();
8909	MachineFrameInfo &MFI = MF.getFrameInfo();
8910	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8911
8912	SDValue Ld;
8913	if (Subtarget.hasLFIWAX() \|\| Subtarget.hasFPCVT()) {
8914	ReuseLoadInfo RLI;
8915	bool ReusingLoad;
8916	if (!(ReusingLoad = canReuseLoadAddress(Op: Src, MemVT: MVT::i32, RLI, DAG))) {
8917	int FrameIdx = MFI.CreateStackObject(Size: `4`, Alignment: Align (`4`), isSpillSlot: false);
8918	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8919
8920	SDValue Store = DAG.getStore(Chain, dl, Val: Src, Ptr: FIdx,
8921	PtrInfo: MachinePointerInfo::getFixedStack(
8922	MF&: DAG.getMachineFunction(), FI: FrameIdx));
8923	Chain = Store;
8924
8925	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8926	"Expected an i32 store");
8927
8928	RLI.Ptr = FIdx;
8929	RLI.Chain = Chain;
8930	RLI.MPI =
8931	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8932	RLI.Alignment = Align (`4`);
8933	}
8934
8935	MachineMemOperand *MMO =
8936	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8937	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8938	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8939	Ld = DAG.getMemIntrinsicNode(Opcode: IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8940	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops,
8941	MemVT: MVT::i32, MMO);
8942	Chain = Ld.getValue(R: `1`);
8943	if (ReusingLoad && RLI.ResChain) {
8944	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Ld.getValue(R: `1`));
8945	}
8946	} else {
8947	assert(Subtarget.isPPC64() &&
8948	"i32->FP without LFIWAX supported only on PPC64");
8949
8950	int FrameIdx = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
8951	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8952
8953	SDValue Ext64 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i64, Operand: Src);
8954
8955	// STD the extended value into the stack slot.
8956	SDValue Store = DAG.getStore(
8957	Chain, dl, Val: Ext64, Ptr: FIdx,
8958	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8959	Chain = Store;
8960
8961	// Load the value as a double.
8962	Ld = DAG.getLoad(
8963	VT: MVT::f64, dl, Chain, Ptr: FIdx,
8964	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8965	Chain = Ld.getValue(R: `1`);
8966	}
8967
8968	// FCFID it and return it.
8969	SDValue FP = convertIntToFP(Op, Src: Ld, DAG, Subtarget, Chain);
8970	if (IsStrict)
8971	Chain = FP.getValue(R: `1`);
8972	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973	if (IsStrict)
8974	FP = DAG.getNode(
8975	Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8976	Ops: {Chain, FP, DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true)}, Flags);
8977	else
8978	FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8979	N2: DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true));
8980	}
8981	return FP;
8982	}
8983
8984	SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8985	SelectionDAG &DAG) const {
8986	SDLoc Dl(Op);
8987	MachineFunction &MF = DAG.getMachineFunction();
8988	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8989	SDValue Chain = Op.getOperand(i: `0`);
8990
8991	// If requested mode is constant, just use simpler mtfsb/mffscrni
8992	if (auto *CVal = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))) {
8993	uint64_t Mode = CVal->getZExtValue();
8994	assert(Mode < `4` && "Unsupported rounding mode!");
8995	unsigned InternalRnd = Mode ^ (~(Mode >> `1`) & `1`);
8996	if (Subtarget.isISA3_0())
8997	return SDValue (
8998	DAG.getMachineNode(
8999	Opcode: PPC::MFFSCRNI, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9000	Ops: {DAG.getConstant(Val: InternalRnd, DL: Dl, VT: MVT::i32, isTarget: true), Chain}),
9001	`1`);
9002	SDNode *SetHi = DAG.getMachineNode(
9003	Opcode: (InternalRnd & `2`) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9004	Ops: {DAG.getConstant(Val: `30`, DL: Dl, VT: MVT::i32, isTarget: true), Chain});
9005	SDNode *SetLo = DAG.getMachineNode(
9006	Opcode: (InternalRnd & `1`) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9007	Ops: {DAG.getConstant(Val: `31`, DL: Dl, VT: MVT::i32, isTarget: true), SDValue (SetHi, `0`)});
9008	return SDValue (SetLo, `0`);
9009	}
9010
9011	// Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9012	SDValue One = DAG.getConstant(Val: `1`, DL: Dl, VT: MVT::i32);
9013	SDValue SrcFlag = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: Op.getOperand(i: `1`),
9014	N2: DAG.getConstant(Val: `3`, DL: Dl, VT: MVT::i32));
9015	SDValue DstFlag = DAG.getNode(
9016	Opcode: ISD::XOR, DL: Dl, VT: MVT::i32, N1: SrcFlag,
9017	N2: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32,
9018	N1: DAG.getNOT(DL: Dl,
9019	Val: DAG.getNode(Opcode: ISD::SRL, DL: Dl, VT: MVT::i32, N1: SrcFlag, N2: One),
9020	VT: MVT::i32),
9021	N2: One));
9022	// For Power9, there's faster mffscrn, and we don't need to read FPSCR
9023	SDValue MFFS;
9024	if (!Subtarget.isISA3_0()) {
9025	MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: Dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9026	Chain = MFFS.getValue(R: `1`);
9027	}
9028	SDValue NewFPSCR;
9029	if (Subtarget.isPPC64()) {
9030	if (Subtarget.isISA3_0()) {
9031	NewFPSCR = DAG.getAnyExtOrTrunc(Op: DstFlag, DL: Dl, VT: MVT::i64);
9032	} else {
9033	// Set the last two bits (rounding mode) of bitcasted FPSCR.
9034	SDNode *InsertRN = DAG.getMachineNode(
9035	Opcode: PPC::RLDIMI, dl: Dl, VT: MVT::i64,
9036	Ops: {DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::i64, Operand: MFFS),
9037	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: Dl, VT: MVT::i64, Operand: DstFlag),
9038	DAG.getTargetConstant(Val: `0`, DL: Dl, VT: MVT::i32),
9039	DAG.getTargetConstant(Val: `62`, DL: Dl, VT: MVT::i32)});
9040	NewFPSCR = SDValue (InsertRN, `0`);
9041	}
9042	NewFPSCR = DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::f64, Operand: NewFPSCR);
9043	} else {
9044	// In 32-bit mode, store f64, load and update the lower half.
9045	int SSFI = MF.getFrameInfo().CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
9046	SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9047	SDValue Addr = Subtarget.isLittleEndian()
9048	? StackSlot
9049	: DAG.getNode(Opcode: ISD::ADD, DL: Dl, VT: PtrVT, N1: StackSlot,
9050	N2: DAG.getConstant(Val: `4`, DL: Dl, VT: PtrVT));
9051	if (Subtarget.isISA3_0()) {
9052	Chain = DAG.getStore(Chain, dl: Dl, Val: DstFlag, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9053	} else {
9054	Chain = DAG.getStore(Chain, dl: Dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9055	SDValue Tmp =
9056	DAG.getLoad(VT: MVT::i32, dl: Dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9057	Chain = Tmp.getValue(R: `1`);
9058	Tmp = SDValue (DAG.getMachineNode(
9059	Opcode: PPC::RLWIMI, dl: Dl, VT: MVT::i32,
9060	Ops: {Tmp, DstFlag, DAG.getTargetConstant(Val: `0`, DL: Dl, VT: MVT::i32),
9061	DAG.getTargetConstant(Val: `30`, DL: Dl, VT: MVT::i32),
9062	DAG.getTargetConstant(Val: `31`, DL: Dl, VT: MVT::i32)}),
9063	`0`);
9064	Chain = DAG.getStore(Chain, dl: Dl, Val: Tmp, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9065	}
9066	NewFPSCR =
9067	DAG.getLoad(VT: MVT::f64, dl: Dl, Chain, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9068	Chain = NewFPSCR.getValue(R: `1`);
9069	}
9070	if (Subtarget.isISA3_0())
9071	return SDValue (DAG.getMachineNode(Opcode: PPC::MFFSCRN, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9072	Ops: {NewFPSCR, Chain}),
9073	`1`);
9074	SDValue Zero = DAG.getConstant(Val: `0`, DL: Dl, VT: MVT::i32, isTarget: true);
9075	SDNode *MTFSF = DAG.getMachineNode(
9076	Opcode: PPC::MTFSF, dl: Dl, VT: MVT::Other,
9077	Ops: {DAG.getConstant(Val: `255`, DL: Dl, VT: MVT::i32, isTarget: true), NewFPSCR, Zero, Zero, Chain});
9078	return SDValue (MTFSF, `0`);
9079	}
9080
9081	SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9082	SelectionDAG &DAG) const {
9083	SDLoc dl(Op);
9084	/*
9085	The rounding mode is in bits 30:31 of FPSR, and has the following
9086	settings:
9087	00 Round to nearest
9088	01 Round to 0
9089	10 Round to +inf
9090	11 Round to -inf
9091
9092	GET_ROUNDING, on the other hand, expects the following:
9093	-1 Undefined
9094	0 Round to 0
9095	1 Round to nearest
9096	2 Round to +inf
9097	3 Round to -inf
9098
9099	To perform the conversion, we do:
9100	((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9101	*/
9102
9103	MachineFunction &MF = DAG.getMachineFunction();
9104	EVT VT = Op.getValueType();
9105	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9106
9107	// Save FP Control Word to register
9108	SDValue Chain = Op.getOperand(i: `0`);
9109	SDValue MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9110	Chain = MFFS.getValue(R: `1`);
9111
9112	SDValue CWD;
9113	if (isTypeLegal(VT: MVT::i64)) {
9114	CWD = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32,
9115	Operand: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: MFFS));
9116	} else {
9117	// Save FP register to stack slot
9118	int SSFI = MF.getFrameInfo().CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
9119	SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9120	Chain = DAG.getStore(Chain, dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9121
9122	// Load FP Control Word from low 32 bits of stack slot.
9123	assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9124	"Stack slot adjustment is valid only on big endian subtargets!");
9125	SDValue Four = DAG.getConstant(Val: `4`, DL: dl, VT: PtrVT);
9126	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackSlot, N2: Four);
9127	CWD = DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9128	Chain = CWD.getValue(R: `1`);
9129	}
9130
9131	// Transform as necessary
9132	SDValue CWD1 =
9133	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9134	N1: CWD, N2: DAG.getConstant(Val: `3`, DL: dl, VT: MVT::i32));
9135	SDValue CWD2 =
9136	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
9137	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9138	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32,
9139	N1: CWD, N2: DAG.getConstant(Val: `3`, DL: dl, VT: MVT::i32)),
9140	N2: DAG.getConstant(Val: `3`, DL: dl, VT: MVT::i32)),
9141	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
9142
9143	SDValue RetVal =
9144	DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1, N2: CWD2);
9145
9146	RetVal =
9147	DAG.getNode(Opcode: (VT.getSizeInBits() < `16` ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9148	DL: dl, VT, Operand: RetVal);
9149
9150	return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
9151	}
9152
9153	SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9154	EVT VT = Op.getValueType();
9155	uint64_t BitWidth = VT.getSizeInBits();
9156	SDLoc dl(Op);
9157	assert(Op.getNumOperands() == `3` &&
9158	VT == Op.getOperand(`1`).getValueType() &&
9159	"Unexpected SHL!");
9160
9161	// Expand into a bunch of logical ops. Note that these ops
9162	// depend on the PPC behavior for oversized shift amounts.
9163	SDValue Lo = Op.getOperand(i: `0`);
9164	SDValue Hi = Op.getOperand(i: `1`);
9165	SDValue Amt = Op.getOperand(i: `2`);
9166	EVT AmtVT = Amt.getValueType();
9167
9168	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9169	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9170	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Amt);
9171	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Tmp1);
9172	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR , DL: dl, VT, N1: Tmp2, N2: Tmp3);
9173	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9174	N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9175	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Tmp5);
9176	SDValue OutHi = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9177	SDValue OutLo = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Amt);
9178	SDValue OutOps[] = { OutLo, OutHi };
9179	return DAG.getMergeValues(Ops: OutOps, dl);
9180	}
9181
9182	SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9183	EVT VT = Op.getValueType();
9184	SDLoc dl(Op);
9185	uint64_t BitWidth = VT.getSizeInBits();
9186	assert(Op.getNumOperands() == `3` &&
9187	VT == Op.getOperand(`1`).getValueType() &&
9188	"Unexpected SRL!");
9189
9190	// Expand into a bunch of logical ops. Note that these ops
9191	// depend on the PPC behavior for oversized shift amounts.
9192	SDValue Lo = Op.getOperand(i: `0`);
9193	SDValue Hi = Op.getOperand(i: `1`);
9194	SDValue Amt = Op.getOperand(i: `2`);
9195	EVT AmtVT = Amt.getValueType();
9196
9197	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9198	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9199	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9200	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9201	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9202	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9203	N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9204	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Tmp5);
9205	SDValue OutLo = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9206	SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Amt);
9207	SDValue OutOps[] = { OutLo, OutHi };
9208	return DAG.getMergeValues(Ops: OutOps, dl);
9209	}
9210
9211	SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9212	SDLoc dl(Op);
9213	EVT VT = Op.getValueType();
9214	uint64_t BitWidth = VT.getSizeInBits();
9215	assert(Op.getNumOperands() == `3` &&
9216	VT == Op.getOperand(`1`).getValueType() &&
9217	"Unexpected SRA!");
9218
9219	// Expand into a bunch of logical ops, followed by a select_cc.
9220	SDValue Lo = Op.getOperand(i: `0`);
9221	SDValue Hi = Op.getOperand(i: `1`);
9222	SDValue Amt = Op.getOperand(i: `2`);
9223	EVT AmtVT = Amt.getValueType();
9224
9225	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9226	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9227	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9228	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9229	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9230	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9231	N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9232	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Tmp5);
9233	SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Amt);
9234	SDValue OutLo = DAG.getSelectCC(DL: dl, LHS: Tmp5, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: AmtVT),
9235	True: Tmp4, False: Tmp6, Cond: ISD::SETLE);
9236	SDValue OutOps[] = { OutLo, OutHi };
9237	return DAG.getMergeValues(Ops: OutOps, dl);
9238	}
9239
9240	SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9241	SelectionDAG &DAG) const {
9242	SDLoc dl(Op);
9243	EVT VT = Op.getValueType();
9244	unsigned BitWidth = VT.getSizeInBits();
9245
9246	bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9247	SDValue X = Op.getOperand(i: `0`);
9248	SDValue Y = Op.getOperand(i: `1`);
9249	SDValue Z = Op.getOperand(i: `2`);
9250	EVT AmtVT = Z.getValueType();
9251
9252	// fshl: (X << (Z % BW)) \| (Y >> (BW - (Z % BW)))
9253	// fshr: (X << (BW - (Z % BW))) \| (Y >> (Z % BW))
9254	// This is simpler than TargetLowering::expandFunnelShift because we can rely
9255	// on PowerPC shift by BW being well defined.
9256	Z = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Z,
9257	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT: AmtVT));
9258	SDValue SubZ =
9259	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT, N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Z);
9260	X = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: X, N2: IsFSHL ? Z : SubZ);
9261	Y = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Y, N2: IsFSHL ? SubZ : Z);
9262	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: X, N2: Y);
9263	}
9264
9265	//===----------------------------------------------------------------------===//
9266	// Vector related lowering.
9267	//
9268
9269	/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9270	/// element size of SplatSize. Cast the result to VT.
9271	static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9272	SelectionDAG &DAG, const SDLoc &dl) {
9273	static const MVT VTys[] = { // canonical VT to use for each size.
9274	MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9275	};
9276
9277	EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-`1`];
9278
9279	// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9280	if (Val == ((`1LLU` << (SplatSize * `8`)) - `1`)) {
9281	SplatSize = `1`;
9282	Val = `0xFF`;
9283	}
9284
9285	EVT CanonicalVT = VTys[SplatSize-`1`];
9286
9287	// Build a canonical splat for this value.
9288	// Explicitly truncate APInt here, as this API is used with a mix of
9289	// signed and unsigned values.
9290	return DAG.getBitcast(
9291	VT: ReqVT,
9292	V: DAG.getConstant(Val: APInt (`64`, Val).trunc(width: SplatSize * `8`), DL: dl, VT: CanonicalVT));
9293	}
9294
9295	/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9296	/// specified intrinsic ID.
9297	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9298	const SDLoc &dl, EVT DestVT = MVT::Other) {
9299	if (DestVT == MVT::Other) DestVT = Op.getValueType();
9300	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9301	N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op);
9302	}
9303
9304	/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9305	/// specified intrinsic ID.
9306	static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9307	SelectionDAG &DAG, const SDLoc &dl,
9308	EVT DestVT = MVT::Other) {
9309	if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9310	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9311	N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: LHS, N3: RHS);
9312	}
9313
9314	/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9315	/// specified intrinsic ID.
9316	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9317	SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9318	EVT DestVT = MVT::Other) {
9319	if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9320	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9321	N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op0, N3: Op1, N4: Op2);
9322	}
9323
9324	/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9325	/// amount. The result has the specified value type.
9326	static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9327	SelectionDAG &DAG, const SDLoc &dl) {
9328	// Force LHS/RHS to be the right type.
9329	LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: LHS);
9330	RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: RHS);
9331
9332	int Ops[`16`];
9333	for (unsigned i = `0`; i != `16`; ++i)
9334	Ops[i] = i + Amt;
9335	SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: LHS, N2: RHS, Mask: Ops);
9336	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9337	}
9338
9339	/// Do we have an efficient pattern in a .td file for this node?
9340	///
9341	/// \param V - pointer to the BuildVectorSDNode being matched
9342	/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9343	///
9344	/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9345	/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9346	/// the opposite is true (expansion is beneficial) are:
9347	/// - The node builds a vector out of integers that are not 32 or 64-bits
9348	/// - The node builds a vector out of constants
9349	/// - The node is a "load-and-splat"
9350	/// In all other cases, we will choose to keep the BUILD_VECTOR.
9351	static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9352	bool HasDirectMove,
9353	bool HasP8Vector) {
9354	EVT VecVT = V->getValueType(ResNo: `0`);
9355	bool RightType = VecVT == MVT::v2f64 \|\|
9356	(HasP8Vector && VecVT == MVT::v4f32) \|\|
9357	(HasDirectMove && (VecVT == MVT::v2i64 \|\| VecVT == MVT::v4i32));
9358	if (!RightType)
9359	return false;
9360
9361	bool IsSplat = true;
9362	bool IsLoad = false;
9363	SDValue Op0 = V->getOperand(Num: `0`);
9364
9365	// This function is called in a block that confirms the node is not a constant
9366	// splat. So a constant BUILD_VECTOR here means the vector is built out of
9367	// different constants.
9368	if (V->isConstant())
9369	return false;
9370	for (int i = `0`, e = V->getNumOperands(); i < e; ++i) {
9371	if (V->getOperand(Num: i).isUndef())
9372	return false;
9373	// We want to expand nodes that represent load-and-splat even if the
9374	// loaded value is a floating point truncation or conversion to int.
9375	if (V->getOperand(Num: i).getOpcode() == ISD::LOAD \|\|
9376	(V->getOperand(Num: i).getOpcode() == ISD::FP_ROUND &&
9377	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD) \|\|
9378	(V->getOperand(Num: i).getOpcode() == ISD::FP_TO_SINT &&
9379	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD) \|\|
9380	(V->getOperand(Num: i).getOpcode() == ISD::FP_TO_UINT &&
9381	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD))
9382	IsLoad = true;
9383	// If the operands are different or the input is not a load and has more
9384	// uses than just this BV node, then it isn't a splat.
9385	if (V->getOperand(Num: i) != Op0 \|\|
9386	(!IsLoad && !V->isOnlyUserOf(N: V->getOperand(Num: i).getNode())))
9387	IsSplat = false;
9388	}
9389	return !(IsSplat && IsLoad);
9390	}
9391
9392	// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9393	SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9394
9395	SDLoc dl(Op);
9396	SDValue Op0 = Op ->getOperand(Num: `0`);
9397
9398	if (!Subtarget.isPPC64() \|\| (Op0.getOpcode() != ISD::BUILD_PAIR) \|\|
9399	(Op.getValueType() != MVT::f128))
9400	return SDValue ();
9401
9402	SDValue Lo = Op0.getOperand(i: `0`);
9403	SDValue Hi = Op0.getOperand(i: `1`);
9404	if ((Lo.getValueType() != MVT::i64) \|\| (Hi.getValueType() != MVT::i64))
9405	return SDValue ();
9406
9407	if (!Subtarget.isLittleEndian())
9408	std::swap(a&: Lo, b&: Hi);
9409
9410	return DAG.getNode(Opcode: PPCISD::BUILD_FP128, DL: dl, VT: MVT::f128, N1: Lo, N2: Hi);
9411	}
9412
9413	static const SDValue getNormalLoadInput(const* SDValue &Op, bool &IsPermuted) {
9414	const SDValue *InputLoad = &Op;
9415	while (InputLoad->getOpcode() == ISD::BITCAST)
9416	InputLoad = &InputLoad->getOperand(i: `0`);
9417	if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR \|\|
9418	InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9419	IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9420	InputLoad = &InputLoad->getOperand(i: `0`);
9421	}
9422	if (InputLoad->getOpcode() != ISD::LOAD)
9423	return nullptr;
9424	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
9425	return ISD::isNormalLoad(N: LD) ? InputLoad : nullptr;
9426	}
9427
9428	// Convert the argument APFloat to a single precision APFloat if there is no
9429	// loss in information during the conversion to single precision APFloat and the
9430	// resulting number is not a denormal number. Return true if successful.
9431	bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9432	APFloat APFloatToConvert = ArgAPFloat;
9433	bool LosesInfo = true;
9434	APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9435	losesInfo: &LosesInfo);
9436	bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9437	if (Success)
9438	ArgAPFloat = APFloatToConvert;
9439	return Success;
9440	}
9441
9442	// Bitcast the argument APInt to a double and convert it to a single precision
9443	// APFloat, bitcast the APFloat to an APInt and assign it to the original
9444	// argument if there is no loss in information during the conversion from
9445	// double to single precision APFloat and the resulting number is not a denormal
9446	// number. Return true if successful.
9447	bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9448	double DpValue = ArgAPInt.bitsToDouble();
9449	APFloat APFloatDp(DpValue);
9450	bool Success = convertToNonDenormSingle(ArgAPFloat&: APFloatDp);
9451	if (Success)
9452	ArgAPInt = APFloatDp.bitcastToAPInt();
9453	return Success;
9454	}
9455
9456	// Nondestructive check for convertTonNonDenormSingle.
9457	bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9458	// Only convert if it loses info, since XXSPLTIDP should
9459	// handle the other case.
9460	APFloat APFloatToConvert = ArgAPFloat;
9461	bool LosesInfo = true;
9462	APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9463	losesInfo: &LosesInfo);
9464
9465	return (!LosesInfo && !APFloatToConvert.isDenormal());
9466	}
9467
9468	static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9469	unsigned &Opcode) {
9470	LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Val: Op.getOperand(i: `0`));
9471	if (!InputNode \|\| !Subtarget.hasVSX() \|\| !ISD::isUNINDEXEDLoad(N: InputNode))
9472	return false;
9473
9474	EVT Ty = Op ->getValueType(ResNo: `0`);
9475	// For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9476	// as we cannot handle extending loads for these types.
9477	if ((Ty == MVT::v2f64 \|\| Ty == MVT::v4f32 \|\| Ty == MVT::v4i32) &&
9478	ISD::isNON_EXTLoad(N: InputNode))
9479	return true;
9480
9481	EVT MemVT = InputNode->getMemoryVT();
9482	// For v8i16 and v16i8 types, extending loads can be handled as long as the
9483	// memory VT is the same vector element VT type.
9484	// The loads feeding into the v8i16 and v16i8 types will be extending because
9485	// scalar i8/i16 are not legal types.
9486	if ((Ty == MVT::v8i16 \|\| Ty == MVT::v16i8) && ISD::isEXTLoad(N: InputNode) &&
9487	(MemVT == Ty.getVectorElementType()))
9488	return true;
9489
9490	if (Ty == MVT::v2i64) {
9491	// Check the extend type, when the input type is i32, and the output vector
9492	// type is v2i64.
9493	if (MemVT == MVT::i32) {
9494	if (ISD::isZEXTLoad(N: InputNode))
9495	Opcode = PPCISD::ZEXT_LD_SPLAT;
9496	if (ISD::isSEXTLoad(N: InputNode))
9497	Opcode = PPCISD::SEXT_LD_SPLAT;
9498	}
9499	return true;
9500	}
9501	return false;
9502	}
9503
9504	bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
9505	bool IsLittleEndian) {
9506	assert(BVN.getNumOperands() > `0` && "Unexpected 0-size build vector");
9507
9508	BitMask.clearAllBits();
9509	EVT VT = BVN.getValueType(ResNo: `0`);
9510	unsigned VTSize = VT.getSizeInBits();
9511	APInt ConstValue(VTSize, `0`);
9512
9513	unsigned EltWidth = VT.getScalarSizeInBits();
9514
9515	unsigned BitPos = `0`;
9516	for (auto OpVal : BVN.op_values()) {
9517	auto *CN = dyn_cast<ConstantSDNode>(Val&: OpVal);
9518
9519	if (!CN)
9520	return false;
9521	// The elements in a vector register are ordered in reverse byte order
9522	// between little-endian and big-endian modes.
9523	ConstValue.insertBits(SubBits: CN->getAPIntValue().zextOrTrunc(width: EltWidth),
9524	bitPosition: IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9525	BitPos += EltWidth;
9526	}
9527
9528	for (unsigned J = `0`; J < `16`; ++J) {
9529	APInt ExtractValue = ConstValue.extractBits(numBits: `8`, bitPosition: J * `8`);
9530	if (ExtractValue != `0x00` && ExtractValue != `0xFF`)
9531	return false;
9532	if (ExtractValue == `0xFF`)
9533	BitMask.setBit(J);
9534	}
9535	return true;
9536	}
9537
9538	// If this is a case we can't handle, return null and let the default
9539	// expansion code take care of it. If we CAN select this case, and if it
9540	// selects to a single instruction, return Op. Otherwise, if we can codegen
9541	// this case more efficiently than a constant pool load, lower it to the
9542	// sequence of ops that should be used.
9543	SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9544	SelectionDAG &DAG) const {
9545	SDLoc dl(Op);
9546	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: Op.getNode());
9547	assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9548
9549	if (Subtarget.hasP10Vector()) {
9550	APInt BitMask(`32`, `0`);
9551	// If the value of the vector is all zeros or all ones,
9552	// we do not convert it to MTVSRBMI.
9553	// The xxleqv instruction sets a vector with all ones.
9554	// The xxlxor instruction sets a vector with all zeros.
9555	if (isValidMtVsrBmi(BitMask, BVN&: *BVN, IsLittleEndian: Subtarget.isLittleEndian()) &&
9556	BitMask != `0` && BitMask != `0xffff`) {
9557	SDValue SDConstant = DAG.getTargetConstant(Val: BitMask, DL: dl, VT: MVT::i32);
9558	MachineSDNode *MSDNode =
9559	DAG.getMachineNode(Opcode: PPC::MTVSRBMI, dl, VT: MVT::v16i8, Op1: SDConstant);
9560	SDValue SDV = SDValue (MSDNode, `0`);
9561	EVT DVT = BVN->getValueType(ResNo: `0`);
9562	EVT SVT = SDV.getValueType();
9563	if (SVT != DVT) {
9564	SDV = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: DVT, Operand: SDV);
9565	}
9566	return SDV;
9567	}
9568	// Recognize build vector patterns to emit VSX vector instructions
9569	// instead of loading value from memory.
9570	if (SDValue VecPat = combineBVLoadsSpecialValue(Operand: Op, DAG))
9571	return VecPat;
9572	}
9573	// Check if this is a splat of a constant value.
9574	APInt APSplatBits, APSplatUndef;
9575	unsigned SplatBitSize;
9576	bool HasAnyUndefs;
9577	bool BVNIsConstantSplat =
9578	BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
9579	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian());
9580
9581	// If it is a splat of a double, check if we can shrink it to a 32 bit
9582	// non-denormal float which when converted back to double gives us the same
9583	// double. This is to exploit the XXSPLTIDP instruction.
9584	// If we lose precision, we use XXSPLTI32DX.
9585	if (BVNIsConstantSplat && (SplatBitSize == `64`) &&
9586	Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9587	// Check the type first to short-circuit so we don't modify APSplatBits if
9588	// this block isn't executed.
9589	if ((Op ->getValueType(ResNo: `0`) == MVT::v2f64) &&
9590	convertToNonDenormSingle(ArgAPInt&: APSplatBits)) {
9591	SDValue SplatNode = DAG.getNode(
9592	Opcode: PPCISD::XXSPLTI_SP_TO_DP, DL: dl, VT: MVT::v2f64,
9593	Operand: DAG.getTargetConstant(Val: APSplatBits.getZExtValue(), DL: dl, VT: MVT::i32));
9594	return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9595	} else {
9596	// We may lose precision, so we have to use XXSPLTI32DX.
9597
9598	uint32_t Hi = Hi_32(Value: APSplatBits.getZExtValue());
9599	uint32_t Lo = Lo_32(Value: APSplatBits.getZExtValue());
9600	SDValue SplatNode = DAG.getUNDEF(VT: MVT::v2i64);
9601
9602	if (!Hi \|\| !Lo)
9603	// If either load is 0, then we should generate XXLXOR to set to 0.
9604	SplatNode = DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::v2i64);
9605
9606	if (Hi)
9607	SplatNode = DAG.getNode(
9608	Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9609	N2: DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
9610	N3: DAG.getTargetConstant(Val: Hi, DL: dl, VT: MVT::i32));
9611
9612	if (Lo)
9613	SplatNode =
9614	DAG.getNode(Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9615	N2: DAG.getTargetConstant(Val: `1`, DL: dl, VT: MVT::i32),
9616	N3: DAG.getTargetConstant(Val: Lo, DL: dl, VT: MVT::i32));
9617
9618	return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9619	}
9620	}
9621
9622	bool IsSplat64 = false;
9623	uint64_t SplatBits = `0`;
9624	int32_t SextVal = `0`;
9625	if (BVNIsConstantSplat && SplatBitSize <= `64`) {
9626	SplatBits = APSplatBits.getZExtValue();
9627	if (SplatBitSize <= `32`) {
9628	SextVal = SignExtend32(X: SplatBits, B: SplatBitSize);
9629	} else if (SplatBitSize == `64` && Subtarget.hasP8Altivec()) {
9630	int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9631	bool P9Vector = Subtarget.hasP9Vector();
9632	int32_t Hi = P9Vector ? `127` : `15`;
9633	int32_t Lo = P9Vector ? -`128` : -`16`;
9634	IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9635	SextVal = static_cast<int32_t>(SplatBits);
9636	}
9637	}
9638
9639	if (!BVNIsConstantSplat \|\| (SplatBitSize > `32` && !IsSplat64)) {
9640	unsigned NewOpcode = PPCISD::LD_SPLAT;
9641
9642	// Handle load-and-splat patterns as we have instructions that will do this
9643	// in one go.
9644	if (DAG.isSplatValue(V: Op, AllowUndefs: true) &&
9645	isValidSplatLoad(Subtarget, Op, Opcode&: NewOpcode)) {
9646	const SDValue *InputLoad = &Op.getOperand(i: `0`);
9647	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
9648
9649	// If the input load is an extending load, it will be an i32 -> i64
9650	// extending load and isValidSplatLoad() will update NewOpcode.
9651	unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9652	unsigned ElementSize =
9653	MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? `1` : `2`);
9654
9655	assert(((ElementSize == `2` * MemorySize)
9656	? (NewOpcode == PPCISD::ZEXT_LD_SPLAT \|\|
9657	NewOpcode == PPCISD::SEXT_LD_SPLAT)
9658	: (NewOpcode == PPCISD::LD_SPLAT)) &&
9659	"Unmatched element size and opcode!\n");
9660
9661	// Checking for a single use of this load, we have to check for vector
9662	// width (128 bits) / ElementSize uses (since each operand of the
9663	// BUILD_VECTOR is a separate use of the value.
9664	unsigned NumUsesOfInputLD = `128` / ElementSize;
9665	for (SDValue BVInOp : Op ->ops())
9666	if (BVInOp.isUndef())
9667	NumUsesOfInputLD--;
9668
9669	// Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9670	// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9671	// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9672	// 15", but function IsValidSplatLoad() now will only return true when
9673	// the data at index 0 is not nullptr. So we will not get into trouble for
9674	// these cases.
9675	//
9676	// case 1 - lfiwzx/lfiwax
9677	// 1.1: load result is i32 and is sign/zero extend to i64;
9678	// 1.2: build a v2i64 vector type with above loaded value;
9679	// 1.3: the vector has only one value at index 0, others are all undef;
9680	// 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9681	if (NumUsesOfInputLD == `1` &&
9682	(Op ->getValueType(ResNo: `0`) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9683	!Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9684	Subtarget.hasLFIWAX()))
9685	return SDValue ();
9686
9687	// case 2 - lxvr[hb]x
9688	// 2.1: load result is at most i16;
9689	// 2.2: build a vector with above loaded value;
9690	// 2.3: the vector has only one value at index 0, others are all undef;
9691	// 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9692	if (NumUsesOfInputLD == `1` && Subtarget.isLittleEndian() &&
9693	Subtarget.isISA3_1() && ElementSize <= `16`)
9694	return SDValue ();
9695
9696	assert(NumUsesOfInputLD > `0` && "No uses of input LD of a build_vector?");
9697	if (InputLoad->getNode()->hasNUsesOfValue(NUses: NumUsesOfInputLD, Value: `0`) &&
9698	Subtarget.hasVSX()) {
9699	SDValue Ops[] = {
9700	LD->getChain(), // Chain
9701	LD->getBasePtr(), // Ptr
9702	DAG.getValueType(Op.getValueType()) // VT
9703	};
9704	SDValue LdSplt = DAG.getMemIntrinsicNode(
9705	Opcode: NewOpcode, dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other), Ops,
9706	MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
9707	// Replace all uses of the output chain of the original load with the
9708	// output chain of the new load.
9709	DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: `1`),
9710	To: LdSplt.getValue(R: `1`));
9711	return LdSplt;
9712	}
9713	}
9714
9715	// In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9716	// 32-bits can be lowered to VSX instructions under certain conditions.
9717	// Without VSX, there is no pattern more efficient than expanding the node.
9718	if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9719	haveEfficientBuildVectorPattern(V: BVN, HasDirectMove: Subtarget.hasDirectMove(),
9720	HasP8Vector: Subtarget.hasP8Vector()))
9721	return Op;
9722	return SDValue ();
9723	}
9724
9725	uint64_t SplatUndef = APSplatUndef.getZExtValue();
9726	unsigned SplatSize = SplatBitSize / `8`;
9727
9728	// First, handle single instruction cases.
9729
9730	// All zeros?
9731	if (SplatBits == `0`) {
9732	// Canonicalize all zero vectors to be v4i32.
9733	if (Op.getValueType() != MVT::v4i32 \|\| HasAnyUndefs) {
9734	SDValue Z = DAG.getConstant(Val: `0`, DL: dl, VT: MVT::v4i32);
9735	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Z);
9736	}
9737	return Op;
9738	}
9739
9740	// We have XXSPLTIW for constant splats four bytes wide.
9741	// Given vector length is a multiple of 4, 2-byte splats can be replaced
9742	// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9743	// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9744	// turned into a 4-byte splat of 0xABABABAB.
9745	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == `2`)
9746	return getCanonicalConstSplat(Val: SplatBits \| (SplatBits << `16`), SplatSize: SplatSize * `2`,
9747	VT: Op.getValueType(), DAG, dl);
9748
9749	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == `4`)
9750	return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9751	dl);
9752
9753	// We have XXSPLTIB for constant splats one byte wide.
9754	if (Subtarget.hasP9Vector() && SplatSize == `1`)
9755	return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9756	dl);
9757
9758	// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9759	// Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9760	if (SextVal >= -`16` && SextVal <= `15`) {
9761	// SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9762	// generate a splat word with extend for size 8.
9763	unsigned UseSize = SplatSize == `8` ? `4` : SplatSize;
9764	SDValue Res =
9765	getCanonicalConstSplat(Val: SextVal, SplatSize: UseSize, VT: Op.getValueType(), DAG, dl);
9766	if (SplatSize != `8`)
9767	return Res;
9768	return BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vupklsw, Op: Res, DAG, dl);
9769	}
9770
9771	// Two instruction sequences.
9772
9773	if (Subtarget.hasP9Vector() && SextVal >= -`128` && SextVal <= `127`) {
9774	SDValue C = DAG.getConstant(Val: (unsigned char)SextVal, DL: dl, VT: MVT::i32);
9775	SmallVector<SDValue, `16`> Ops(`16`, C);
9776	SDValue BV = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops);
9777	unsigned IID;
9778	EVT VT;
9779	switch (SplatSize) {
9780	default:
9781	llvm_unreachable("Unexpected type for vector constant.");
9782	case `2`:
9783	IID = Intrinsic::ppc_altivec_vupklsb;
9784	VT = MVT::v8i16;
9785	break;
9786	case `4`:
9787	IID = Intrinsic::ppc_altivec_vextsb2w;
9788	VT = MVT::v4i32;
9789	break;
9790	case `8`:
9791	IID = Intrinsic::ppc_altivec_vextsb2d;
9792	VT = MVT::v2i64;
9793	break;
9794	}
9795	SDValue Extend = BuildIntrinsicOp(IID, Op: BV, DAG, dl, DestVT: VT);
9796	return DAG.getBitcast(VT: Op ->getValueType(ResNo: `0`), V: Extend);
9797	}
9798	assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9799
9800	// If this value is in the range [-32,30] and is even, use:
9801	// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9802	// If this value is in the range [17,31] and is odd, use:
9803	// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9804	// If this value is in the range [-31,-17] and is odd, use:
9805	// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9806	// Note the last two are three-instruction sequences.
9807	if (SextVal >= -`32` && SextVal <= `31`) {
9808	// To avoid having these optimizations undone by constant folding,
9809	// we convert to a pseudo that will be expanded later into one of
9810	// the above forms.
9811	SDValue Elt = DAG.getSignedConstant(Val: SextVal, DL: dl, VT: MVT::i32);
9812	EVT VT = (SplatSize == `1` ? MVT::v16i8 :
9813	(SplatSize == `2` ? MVT::v8i16 : MVT::v4i32));
9814	SDValue EltSize = DAG.getConstant(Val: SplatSize, DL: dl, VT: MVT::i32);
9815	SDValue RetVal = DAG.getNode(Opcode: PPCISD::VADD_SPLAT, DL: dl, VT, N1: Elt, N2: EltSize);
9816	if (VT == Op.getValueType())
9817	return RetVal;
9818	else
9819	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: RetVal);
9820	}
9821
9822	// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9823	// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9824	// for fneg/fabs.
9825	if (SplatSize == `4` && SplatBits == (`0x7FFFFFFF`&~SplatUndef)) {
9826	// Make -1 and vspltisw -1:
9827	SDValue OnesV = getCanonicalConstSplat(Val: -`1`, SplatSize: `4`, VT: MVT::v4i32, DAG, dl);
9828
9829	// Make the VSLW intrinsic, computing 0x8000_0000.
9830	SDValue Res = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: OnesV,
9831	RHS: OnesV, DAG, dl);
9832
9833	// xor by OnesV to invert it.
9834	Res = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::v4i32, N1: Res, N2: OnesV);
9835	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9836	}
9837
9838	// Check to see if this is a wide variety of vsplti, binop self cases.*
9839	static const signed char SplatCsts[] = {
9840	-`1`, `1`, -`2`, `2`, -`3`, `3`, -`4`, `4`, -`5`, `5`, -`6`, `6`, -`7`, `7`,
9841	-`8`, `8`, -`9`, `9`, -`10`, `10`, -`11`, `11`, -`12`, `12`, -`13`, `13`, `14`, -`14`, `15`, -`15`, -`16`
9842	};
9843
9844	for (unsigned idx = `0`; idx < std::size(SplatCsts); ++idx) {
9845	// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9846	// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9847	int i = SplatCsts[idx];
9848
9849	// Figure out what shift amount will be used by altivec if shifted by i in
9850	// this splat size.
9851	unsigned TypeShiftAmt = i & (SplatBitSize-`1`);
9852
9853	// vsplti + shl self.
9854	if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9855	SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9856	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9857	Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, `0`,
9858	Intrinsic::ppc_altivec_vslw
9859	};
9860	Res = BuildIntrinsicOp(IID: IIDs[SplatSize-`1`], LHS: Res, RHS: Res, DAG, dl);
9861	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9862	}
9863
9864	// vsplti + srl self.
9865	if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9866	SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9867	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9868	Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, `0`,
9869	Intrinsic::ppc_altivec_vsrw
9870	};
9871	Res = BuildIntrinsicOp(IID: IIDs[SplatSize-`1`], LHS: Res, RHS: Res, DAG, dl);
9872	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9873	}
9874
9875	// vsplti + rol self.
9876	if (SextVal == (int)(((unsigned)i << TypeShiftAmt) \|
9877	((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9878	SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9879	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9880	Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, `0`,
9881	Intrinsic::ppc_altivec_vrlw
9882	};
9883	Res = BuildIntrinsicOp(IID: IIDs[SplatSize-`1`], LHS: Res, RHS: Res, DAG, dl);
9884	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9885	}
9886
9887	// t = vsplti c, result = vsldoi t, t, 1
9888	if (SextVal == (int)(((unsigned)i << `8`) \| (i < `0` ? `0xFF` : `0`))) {
9889	SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9890	unsigned Amt = Subtarget.isLittleEndian() ? `15` : `1`;
9891	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9892	}
9893	// t = vsplti c, result = vsldoi t, t, 2
9894	if (SextVal == (int)(((unsigned)i << `16`) \| (i < `0` ? `0xFFFF` : `0`))) {
9895	SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9896	unsigned Amt = Subtarget.isLittleEndian() ? `14` : `2`;
9897	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9898	}
9899	// t = vsplti c, result = vsldoi t, t, 3
9900	if (SextVal == (int)(((unsigned)i << `24`) \| (i < `0` ? `0xFFFFFF` : `0`))) {
9901	SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9902	unsigned Amt = Subtarget.isLittleEndian() ? `13` : `3`;
9903	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9904	}
9905	}
9906
9907	return SDValue ();
9908	}
9909
9910	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9911	/// the specified operations to build the shuffle.
9912	static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9913	SDValue RHS, SelectionDAG &DAG,
9914	const SDLoc &dl) {
9915	unsigned OpNum = (PFEntry >> `26`) & `0x0F`;
9916	unsigned LHSID = (PFEntry >> `13`) & ((`1` << `13`)-`1`);
9917	unsigned RHSID = (PFEntry >> `0`) & ((`1` << `13`)-`1`);
9918
9919	enum {
9920	OP_COPY = `0`, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9921	OP_VMRGHW,
9922	OP_VMRGLW,
9923	OP_VSPLTISW0,
9924	OP_VSPLTISW1,
9925	OP_VSPLTISW2,
9926	OP_VSPLTISW3,
9927	OP_VSLDOI4,
9928	OP_VSLDOI8,
9929	OP_VSLDOI12
9930	};
9931
9932	if (OpNum == OP_COPY) {
9933	if (LHSID == (`1``9`+`2`)`9`+`3`) return LHS;
9934	assert(LHSID == ((`4``9`+`5`)`9`+`6`)*`9`+`7` && "Illegal OP_COPY!");
9935	return RHS;
9936	}
9937
9938	SDValue OpLHS, OpRHS;
9939	OpLHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9940	OpRHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9941
9942	int ShufIdxs[`16`];
9943	switch (OpNum) {
9944	default: llvm_unreachable("Unknown i32 permute!");
9945	case OP_VMRGHW:
9946	ShufIdxs[ `0`] = `0`; ShufIdxs[ `1`] = `1`; ShufIdxs[ `2`] = `2`; ShufIdxs[ `3`] = `3`;
9947	ShufIdxs[ `4`] = `16`; ShufIdxs[ `5`] = `17`; ShufIdxs[ `6`] = `18`; ShufIdxs[ `7`] = `19`;
9948	ShufIdxs[ `8`] = `4`; ShufIdxs[ `9`] = `5`; ShufIdxs[`10`] = `6`; ShufIdxs[`11`] = `7`;
9949	ShufIdxs[`12`] = `20`; ShufIdxs[`13`] = `21`; ShufIdxs[`14`] = `22`; ShufIdxs[`15`] = `23`;
9950	break;
9951	case OP_VMRGLW:
9952	ShufIdxs[ `0`] = `8`; ShufIdxs[ `1`] = `9`; ShufIdxs[ `2`] = `10`; ShufIdxs[ `3`] = `11`;
9953	ShufIdxs[ `4`] = `24`; ShufIdxs[ `5`] = `25`; ShufIdxs[ `6`] = `26`; ShufIdxs[ `7`] = `27`;
9954	ShufIdxs[ `8`] = `12`; ShufIdxs[ `9`] = `13`; ShufIdxs[`10`] = `14`; ShufIdxs[`11`] = `15`;
9955	ShufIdxs[`12`] = `28`; ShufIdxs[`13`] = `29`; ShufIdxs[`14`] = `30`; ShufIdxs[`15`] = `31`;
9956	break;
9957	case OP_VSPLTISW0:
9958	for (unsigned i = `0`; i != `16`; ++i)
9959	ShufIdxs[i] = (i&`3`)+`0`;
9960	break;
9961	case OP_VSPLTISW1:
9962	for (unsigned i = `0`; i != `16`; ++i)
9963	ShufIdxs[i] = (i&`3`)+`4`;
9964	break;
9965	case OP_VSPLTISW2:
9966	for (unsigned i = `0`; i != `16`; ++i)
9967	ShufIdxs[i] = (i&`3`)+`8`;
9968	break;
9969	case OP_VSPLTISW3:
9970	for (unsigned i = `0`; i != `16`; ++i)
9971	ShufIdxs[i] = (i&`3`)+`12`;
9972	break;
9973	case OP_VSLDOI4:
9974	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `4`, VT: OpLHS.getValueType(), DAG, dl);
9975	case OP_VSLDOI8:
9976	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `8`, VT: OpLHS.getValueType(), DAG, dl);
9977	case OP_VSLDOI12:
9978	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `12`, VT: OpLHS.getValueType(), DAG, dl);
9979	}
9980	EVT VT = OpLHS.getValueType();
9981	OpLHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpLHS);
9982	OpRHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpRHS);
9983	SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OpLHS, N2: OpRHS, Mask: ShufIdxs);
9984	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9985	}
9986
9987	/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9988	/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9989	/// SDValue.
9990	SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9991	SelectionDAG &DAG) const {
9992	const unsigned BytesInVector = `16`;
9993	bool IsLE = Subtarget.isLittleEndian();
9994	SDLoc dl(N);
9995	SDValue V1 = N->getOperand(Num: `0`);
9996	SDValue V2 = N->getOperand(Num: `1`);
9997	unsigned ShiftElts = `0`, InsertAtByte = `0`;
9998	bool Swap = false;
9999
10000	// Shifts required to get the byte we want at element 7.
10001	unsigned LittleEndianShifts[] = {`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
10002	`0`, `15`, `14`, `13`, `12`, `11`, `10`, `9`};
10003	unsigned BigEndianShifts[] = {`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`,
10004	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
10005
10006	ArrayRef<int> Mask = N->getMask();
10007	int OriginalOrder[] = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`};
10008
10009	// For each mask element, find out if we're just inserting something
10010	// from V2 into V1 or vice versa.
10011	// Possible permutations inserting an element from V2 into V1:
10012	// X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10013	// 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10014	// ...
10015	// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10016	// Inserting from V1 into V2 will be similar, except mask range will be
10017	// [16,31].
10018
10019	bool FoundCandidate = false;
10020	// If both vector operands for the shuffle are the same vector, the mask
10021	// will contain only elements from the first one and the second one will be
10022	// undef.
10023	unsigned VINSERTBSrcElem = IsLE ? `8` : `7`;
10024	// Go through the mask of half-words to find an element that's being moved
10025	// from one vector to the other.
10026	for (unsigned i = `0`; i < BytesInVector; ++i) {
10027	unsigned CurrentElement = Mask [i];
10028	// If 2nd operand is undefined, we should only look for element 7 in the
10029	// Mask.
10030	if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10031	continue;
10032
10033	bool OtherElementsInOrder = true;
10034	// Examine the other elements in the Mask to see if they're in original
10035	// order.
10036	for (unsigned j = `0`; j < BytesInVector; ++j) {
10037	if (j == i)
10038	continue;
10039	// If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10040	// from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10041	// in which we always assume we're always picking from the 1st operand.
10042	int MaskOffset =
10043	(!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : `0`;
10044	if (Mask [j] != OriginalOrder[j] + MaskOffset) {
10045	OtherElementsInOrder = false;
10046	break;
10047	}
10048	}
10049	// If other elements are in original order, we record the number of shifts
10050	// we need to get the element we want into element 7. Also record which byte
10051	// in the vector we should insert into.
10052	if (OtherElementsInOrder) {
10053	// If 2nd operand is undefined, we assume no shifts and no swapping.
10054	if (V2.isUndef()) {
10055	ShiftElts = `0`;
10056	Swap = false;
10057	} else {
10058	// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10059	ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & `0xF`]
10060	: BigEndianShifts[CurrentElement & `0xF`];
10061	Swap = CurrentElement < BytesInVector;
10062	}
10063	InsertAtByte = IsLE ? BytesInVector - (i + `1`) : i;
10064	FoundCandidate = true;
10065	break;
10066	}
10067	}
10068
10069	if (!FoundCandidate)
10070	return SDValue ();
10071
10072	// Candidate found, construct the proper SDAG sequence with VINSERTB,
10073	// optionally with VECSHL if shift is required.
10074	if (Swap)
10075	std::swap(a&: V1, b&: V2);
10076	if (V2.isUndef())
10077	V2 = V1;
10078	if (ShiftElts) {
10079	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10080	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10081	return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: Shl,
10082	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10083	}
10084	return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: V2,
10085	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10086	}
10087
10088	/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10089	/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10090	/// SDValue.
10091	SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10092	SelectionDAG &DAG) const {
10093	const unsigned NumHalfWords = `8`;
10094	const unsigned BytesInVector = NumHalfWords * `2`;
10095	// Check that the shuffle is on half-words.
10096	if (!isNByteElemShuffleMask(N, Width: `2`, StepLen: `1`))
10097	return SDValue ();
10098
10099	bool IsLE = Subtarget.isLittleEndian();
10100	SDLoc dl(N);
10101	SDValue V1 = N->getOperand(Num: `0`);
10102	SDValue V2 = N->getOperand(Num: `1`);
10103	unsigned ShiftElts = `0`, InsertAtByte = `0`;
10104	bool Swap = false;
10105
10106	// Shifts required to get the half-word we want at element 3.
10107	unsigned LittleEndianShifts[] = {`4`, `3`, `2`, `1`, `0`, `7`, `6`, `5`};
10108	unsigned BigEndianShifts[] = {`5`, `6`, `7`, `0`, `1`, `2`, `3`, `4`};
10109
10110	uint32_t Mask = `0`;
10111	uint32_t OriginalOrderLow = `0x1234567`;
10112	uint32_t OriginalOrderHigh = `0x89ABCDEF`;
10113	// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10114	// 32-bit space, only need 4-bit nibbles per element.
10115	for (unsigned i = `0`; i < NumHalfWords; ++i) {
10116	unsigned MaskShift = (NumHalfWords - `1` - i) * `4`;
10117	Mask \|= ((uint32_t)(N->getMaskElt(Idx: i * `2`) / `2`) << MaskShift);
10118	}
10119
10120	// For each mask element, find out if we're just inserting something
10121	// from V2 into V1 or vice versa. Possible permutations inserting an element
10122	// from V2 into V1:
10123	// X, 1, 2, 3, 4, 5, 6, 7
10124	// 0, X, 2, 3, 4, 5, 6, 7
10125	// 0, 1, X, 3, 4, 5, 6, 7
10126	// 0, 1, 2, X, 4, 5, 6, 7
10127	// 0, 1, 2, 3, X, 5, 6, 7
10128	// 0, 1, 2, 3, 4, X, 6, 7
10129	// 0, 1, 2, 3, 4, 5, X, 7
10130	// 0, 1, 2, 3, 4, 5, 6, X
10131	// Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10132
10133	bool FoundCandidate = false;
10134	// Go through the mask of half-words to find an element that's being moved
10135	// from one vector to the other.
10136	for (unsigned i = `0`; i < NumHalfWords; ++i) {
10137	unsigned MaskShift = (NumHalfWords - `1` - i) * `4`;
10138	uint32_t MaskOneElt = (Mask >> MaskShift) & `0xF`;
10139	uint32_t MaskOtherElts = ~(`0xF` << MaskShift);
10140	uint32_t TargetOrder = `0x0`;
10141
10142	// If both vector operands for the shuffle are the same vector, the mask
10143	// will contain only elements from the first one and the second one will be
10144	// undef.
10145	if (V2.isUndef()) {
10146	ShiftElts = `0`;
10147	unsigned VINSERTHSrcElem = IsLE ? `4` : `3`;
10148	TargetOrder = OriginalOrderLow;
10149	Swap = false;
10150	// Skip if not the correct element or mask of other elements don't equal
10151	// to our expected order.
10152	if (MaskOneElt == VINSERTHSrcElem &&
10153	(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10154	InsertAtByte = IsLE ? BytesInVector - (i + `1`) * `2` : i * `2`;
10155	FoundCandidate = true;
10156	break;
10157	}
10158	} else { // If both operands are defined.
10159	// Target order is [8,15] if the current mask is between [0,7].
10160	TargetOrder =
10161	(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10162	// Skip if mask of other elements don't equal our expected order.
10163	if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10164	// We only need the last 3 bits for the number of shifts.
10165	ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & `0x7`]
10166	: BigEndianShifts[MaskOneElt & `0x7`];
10167	InsertAtByte = IsLE ? BytesInVector - (i + `1`) * `2` : i * `2`;
10168	Swap = MaskOneElt < NumHalfWords;
10169	FoundCandidate = true;
10170	break;
10171	}
10172	}
10173	}
10174
10175	if (!FoundCandidate)
10176	return SDValue ();
10177
10178	// Candidate found, construct the proper SDAG sequence with VINSERTH,
10179	// optionally with VECSHL if shift is required.
10180	if (Swap)
10181	std::swap(a&: V1, b&: V2);
10182	if (V2.isUndef())
10183	V2 = V1;
10184	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10185	if (ShiftElts) {
10186	// Double ShiftElts because we're left shifting on v16i8 type.
10187	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10188	N3: DAG.getConstant(Val: `2` * ShiftElts, DL: dl, VT: MVT::i32));
10189	SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: Shl);
10190	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10191	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10192	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10193	}
10194	SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V2);
10195	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10196	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10197	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10198	}
10199
10200	/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10201	/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10202	/// return the default SDValue.
10203	SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10204	SelectionDAG &DAG) const {
10205	// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10206	// to v16i8. Peek through the bitcasts to get the actual operands.
10207	SDValue LHS = peekThroughBitcasts(V: SVN->getOperand(Num: `0`));
10208	SDValue RHS = peekThroughBitcasts(V: SVN->getOperand(Num: `1`));
10209
10210	auto ShuffleMask = SVN->getMask();
10211	SDValue VecShuffle(SVN, `0`);
10212	SDLoc DL(SVN);
10213
10214	// Check that we have a four byte shuffle.
10215	if (!isNByteElemShuffleMask(N: SVN, Width: `4`, StepLen: `1`))
10216	return SDValue ();
10217
10218	// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10219	if (RHS ->getOpcode() != ISD::BUILD_VECTOR) {
10220	std::swap(a&: LHS, b&: RHS);
10221	VecShuffle = peekThroughBitcasts(V: DAG.getCommutedVectorShuffle(SV: *SVN));
10222	ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(Val&: VecShuffle);
10223	if (!CommutedSV)
10224	return SDValue ();
10225	ShuffleMask = CommutedSV->getMask();
10226	}
10227
10228	// Ensure that the RHS is a vector of constants.
10229	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: RHS.getNode());
10230	if (!BVN)
10231	return SDValue ();
10232
10233	// Check if RHS is a splat of 4-bytes (or smaller).
10234	APInt APSplatValue, APSplatUndef;
10235	unsigned SplatBitSize;
10236	bool HasAnyUndefs;
10237	if (!BVN->isConstantSplat(SplatValue&: APSplatValue, SplatUndef&: APSplatUndef, SplatBitSize,
10238	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian()) \|\|
10239	SplatBitSize > `32`)
10240	return SDValue ();
10241
10242	// Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10243	// The instruction splats a constant C into two words of the source vector
10244	// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10245	// Thus we check that the shuffle mask is the equivalent of
10246	// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10247	// Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10248	// within each word are consecutive, so we only need to check the first byte.
10249	SDValue Index;
10250	bool IsLE = Subtarget.isLittleEndian();
10251	if ((ShuffleMask [`0`] == `0` && ShuffleMask [`8`] == `8`) &&
10252	(ShuffleMask [`4`] % `4` == `0` && ShuffleMask [`12`] % `4` == `0` &&
10253	ShuffleMask [`4`] > `15` && ShuffleMask [`12`] > `15`))
10254	Index = DAG.getTargetConstant(Val: IsLE ? `0` : `1`, DL, VT: MVT::i32);
10255	else if ((ShuffleMask [`4`] == `4` && ShuffleMask [`12`] == `12`) &&
10256	(ShuffleMask [`0`] % `4` == `0` && ShuffleMask [`8`] % `4` == `0` &&
10257	ShuffleMask [`0`] > `15` && ShuffleMask [`8`] > `15`))
10258	Index = DAG.getTargetConstant(Val: IsLE ? `1` : `0`, DL, VT: MVT::i32);
10259	else
10260	return SDValue ();
10261
10262	// If the splat is narrower than 32-bits, we need to get the 32-bit value
10263	// for XXSPLTI32DX.
10264	unsigned SplatVal = APSplatValue.getZExtValue();
10265	for (; SplatBitSize < `32`; SplatBitSize <<= `1`)
10266	SplatVal \|= (SplatVal << SplatBitSize);
10267
10268	SDValue SplatNode = DAG.getNode(
10269	Opcode: PPCISD::XXSPLTI32DX, DL, VT: MVT::v2i64, N1: DAG.getBitcast(VT: MVT::v2i64, V: LHS),
10270	N2: Index, N3: DAG.getTargetConstant(Val: SplatVal, DL, VT: MVT::i32));
10271	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: SplatNode);
10272	}
10273
10274	/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10275	/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10276	/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10277	/// i.e (or (shl x, C1), (srl x, 128-C1)).
10278	SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10279	assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10280	assert(Op.getValueType() == MVT::v1i128 &&
10281	"Only set v1i128 as custom, other type shouldn't reach here!");
10282	SDLoc dl(Op);
10283	SDValue N0 = peekThroughBitcasts(V: Op.getOperand(i: `0`));
10284	SDValue N1 = peekThroughBitcasts(V: Op.getOperand(i: `1`));
10285	unsigned SHLAmt = N1.getConstantOperandVal(i: `0`);
10286	if (SHLAmt % `8` == `0`) {
10287	std::array<int, `16`> Mask;
10288	std::iota(first: Mask.begin(), last: Mask.end(), value: `0`);
10289	std::rotate(first: Mask.begin(), middle: Mask.begin() + SHLAmt / `8`, last: Mask.end());
10290	if (SDValue Shuffle =
10291	DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: DAG.getBitcast(VT: MVT::v16i8, V: N0),
10292	N2: DAG.getUNDEF(VT: MVT::v16i8), Mask))
10293	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: Shuffle);
10294	}
10295	SDValue ArgVal = DAG.getBitcast(VT: MVT::i128, V: N0);
10296	SDValue SHLOp = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ArgVal,
10297	N2: DAG.getConstant(Val: SHLAmt, DL: dl, VT: MVT::i32));
10298	SDValue SRLOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: ArgVal,
10299	N2: DAG.getConstant(Val: `128` - SHLAmt, DL: dl, VT: MVT::i32));
10300	SDValue OROp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i128, N1: SHLOp, N2: SRLOp);
10301	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: OROp);
10302	}
10303
10304	/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10305	/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10306	/// return the code it can be lowered into. Worst case, it can always be
10307	/// lowered into a vperm.
10308	SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10309	SelectionDAG &DAG) const {
10310	SDLoc dl(Op);
10311	SDValue V1 = Op.getOperand(i: `0`);
10312	SDValue V2 = Op.getOperand(i: `1`);
10313	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10314
10315	// Any nodes that were combined in the target-independent combiner prior
10316	// to vector legalization will not be sent to the target combine. Try to
10317	// combine it here.
10318	if (SDValue NewShuffle = combineVectorShuffle(SVN: SVOp, DAG)) {
10319	if (!isa<ShuffleVectorSDNode>(Val: NewShuffle))
10320	return NewShuffle;
10321	Op = NewShuffle;
10322	SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10323	V1 = Op.getOperand(i: `0`);
10324	V2 = Op.getOperand(i: `1`);
10325	}
10326	EVT VT = Op.getValueType();
10327	bool isLittleEndian = Subtarget.isLittleEndian();
10328
10329	unsigned ShiftElts, InsertAtByte;
10330	bool Swap = false;
10331
10332	// If this is a load-and-splat, we can do that with a single instruction
10333	// in some cases. However if the load has multiple uses, we don't want to
10334	// combine it because that will just produce multiple loads.
10335	bool IsPermutedLoad = false;
10336	const SDValue *InputLoad = getNormalLoadInput(Op: V1, IsPermuted&: IsPermutedLoad);
10337	if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10338	(PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`) \|\| PPC::isSplatShuffleMask(N: SVOp, EltSize: `8`)) &&
10339	InputLoad->hasOneUse()) {
10340	bool IsFourByte = PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`);
10341	int SplatIdx =
10342	PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: IsFourByte ? `4` : `8`, DAG);
10343
10344	// The splat index for permuted loads will be in the left half of the vector
10345	// which is strictly wider than the loaded value by 8 bytes. So we need to
10346	// adjust the splat index to point to the correct address in memory.
10347	if (IsPermutedLoad) {
10348	assert((isLittleEndian \|\| IsFourByte) &&
10349	"Unexpected size for permuted load on big endian target");
10350	SplatIdx += IsFourByte ? `2` : `1`;
10351	assert((SplatIdx < (IsFourByte ? `4` : `2`)) &&
10352	"Splat of a value outside of the loaded memory");
10353	}
10354
10355	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
10356	// For 4-byte load-and-splat, we need Power9.
10357	if ((IsFourByte && Subtarget.hasP9Vector()) \|\| !IsFourByte) {
10358	uint64_t Offset = `0`;
10359	if (IsFourByte)
10360	Offset = isLittleEndian ? (`3` - SplatIdx) * `4` : SplatIdx * `4`;
10361	else
10362	Offset = isLittleEndian ? (`1` - SplatIdx) * `8` : SplatIdx * `8`;
10363
10364	// If the width of the load is the same as the width of the splat,
10365	// loading with an offset would load the wrong memory.
10366	if (LD->getValueType(ResNo: `0`).getSizeInBits() == (IsFourByte ? `32` : `64`))
10367	Offset = `0`;
10368
10369	SDValue BasePtr = LD->getBasePtr();
10370	if (Offset != `0`)
10371	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
10372	N1: BasePtr, N2: DAG.getIntPtrConstant(Val: Offset, DL: dl));
10373	SDValue Ops[] = {
10374	LD->getChain(), // Chain
10375	BasePtr, // BasePtr
10376	DAG.getValueType(Op.getValueType()) // VT
10377	};
10378	SDVTList VTL =
10379	DAG.getVTList(VT1: IsFourByte ? MVT::v4i32 : MVT::v2i64, VT2: MVT::Other);
10380	SDValue LdSplt =
10381	DAG.getMemIntrinsicNode(Opcode: PPCISD::LD_SPLAT, dl, VTList: VTL,
10382	Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
10383	DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: `1`), To: LdSplt.getValue(R: `1`));
10384	if (LdSplt.getValueType() != SVOp->getValueType(ResNo: `0`))
10385	LdSplt = DAG.getBitcast(VT: SVOp->getValueType(ResNo: `0`), V: LdSplt);
10386	return LdSplt;
10387	}
10388	}
10389
10390	// All v2i64 and v2f64 shuffles are legal
10391	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
10392	return Op;
10393
10394	if (Subtarget.hasP9Vector() &&
10395	PPC::isXXINSERTWMask(N: SVOp, ShiftElts, InsertAtByte, Swap,
10396	IsLE: isLittleEndian)) {
10397	if (V2.isUndef())
10398	V2 = V1;
10399	else if (Swap)
10400	std::swap(a&: V1, b&: V2);
10401	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10402	SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2);
10403	if (ShiftElts) {
10404	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv2, N2: Conv2,
10405	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10406	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Shl,
10407	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10408	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10409	}
10410	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10411	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10412	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10413	}
10414
10415	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10416	SDValue SplatInsertNode;
10417	if ((SplatInsertNode = lowerToXXSPLTI32DX(SVN: SVOp, DAG)))
10418	return SplatInsertNode;
10419	}
10420
10421	if (Subtarget.hasP9Altivec()) {
10422	SDValue NewISDNode;
10423	if ((NewISDNode = lowerToVINSERTH(N: SVOp, DAG)))
10424	return NewISDNode;
10425
10426	if ((NewISDNode = lowerToVINSERTB(N: SVOp, DAG)))
10427	return NewISDNode;
10428	}
10429
10430	if (Subtarget.hasVSX() &&
10431	PPC::isXXSLDWIShuffleMask(N: SVOp, ShiftElts, Swap, IsLE: isLittleEndian)) {
10432	if (Swap)
10433	std::swap(a&: V1, b&: V2);
10434	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10435	SDValue Conv2 =
10436	DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2.isUndef() ? V1 : V2);
10437
10438	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10439	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10440	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Shl);
10441	}
10442
10443	if (Subtarget.hasVSX() &&
10444	PPC::isXXPERMDIShuffleMask(N: SVOp, DM&: ShiftElts, Swap, IsLE: isLittleEndian)) {
10445	if (Swap)
10446	std::swap(a&: V1, b&: V2);
10447	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10448	SDValue Conv2 =
10449	DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V2.isUndef() ? V1 : V2);
10450
10451	SDValue PermDI = DAG.getNode(Opcode: PPCISD::XXPERMDI, DL: dl, VT: MVT::v2i64, N1: Conv1, N2: Conv2,
10452	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10453	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: PermDI);
10454	}
10455
10456	if (Subtarget.hasP9Vector()) {
10457	if (PPC::isXXBRHShuffleMask(N: SVOp)) {
10458	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10459	SDValue ReveHWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v8i16, Operand: Conv);
10460	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveHWord);
10461	} else if (PPC::isXXBRWShuffleMask(N: SVOp)) {
10462	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10463	SDValue ReveWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v4i32, Operand: Conv);
10464	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveWord);
10465	} else if (PPC::isXXBRDShuffleMask(N: SVOp)) {
10466	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10467	SDValue ReveDWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Conv);
10468	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveDWord);
10469	} else if (PPC::isXXBRQShuffleMask(N: SVOp)) {
10470	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: V1);
10471	SDValue ReveQWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v1i128, Operand: Conv);
10472	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveQWord);
10473	}
10474	}
10475
10476	if (Subtarget.hasVSX()) {
10477	if (V2.isUndef() && PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`)) {
10478	int SplatIdx = PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: `4`, DAG);
10479
10480	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10481	SDValue Splat = DAG.getNode(Opcode: PPCISD::XXSPLT, DL: dl, VT: MVT::v4i32, N1: Conv,
10482	N2: DAG.getConstant(Val: SplatIdx, DL: dl, VT: MVT::i32));
10483	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Splat);
10484	}
10485
10486	// Left shifts of 8 bytes are actually swaps. Convert accordingly.
10487	if (V2.isUndef() && PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) == `8`) {
10488	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: V1);
10489	SDValue Swap = DAG.getNode(Opcode: PPCISD::SWAP_NO_CHAIN, DL: dl, VT: MVT::v2f64, Operand: Conv);
10490	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Swap);
10491	}
10492	}
10493
10494	// Cases that are handled by instructions that take permute immediates
10495	// (such as vsplt) should be left as VECTOR_SHUFFLE nodes so they can be*
10496	// selected by the instruction selector.
10497	if (V2.isUndef()) {
10498	if (PPC::isSplatShuffleMask(N: SVOp, EltSize: `1`) \|\|
10499	PPC::isSplatShuffleMask(N: SVOp, EltSize: `2`) \|\|
10500	PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`) \|\|
10501	PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10502	PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10503	PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) != -`1` \|\|
10504	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind: `1`, DAG) \|\|
10505	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind: `1`, DAG) \|\|
10506	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind: `1`, DAG) \|\|
10507	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind: `1`, DAG) \|\|
10508	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind: `1`, DAG) \|\|
10509	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind: `1`, DAG) \|\|
10510	(Subtarget.hasP8Altivec() && (
10511	PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10512	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind: `1`, DAG) \|\|
10513	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind: `1`, DAG)))) {
10514	return Op;
10515	}
10516	}
10517
10518	// Altivec has a variety of "shuffle immediates" that take two vector inputs
10519	// and produce a fixed permutation. If any of these match, do not lower to
10520	// VPERM.
10521	unsigned int ShuffleKind = isLittleEndian ? `2` : `0`;
10522	if (PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10523	PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10524	PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind, DAG) != -`1` \|\|
10525	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind, DAG) \|\|
10526	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind, DAG) \|\|
10527	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind, DAG) \|\|
10528	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind, DAG) \|\|
10529	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind, DAG) \|\|
10530	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind, DAG) \|\|
10531	(Subtarget.hasP8Altivec() && (
10532	PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10533	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind, DAG) \|\|
10534	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind, DAG))))
10535	return Op;
10536
10537	// Check to see if this is a shuffle of 4-byte values. If so, we can use our
10538	// perfect shuffle table to emit an optimal matching sequence.
10539	ArrayRef<int> PermMask = SVOp->getMask();
10540
10541	if (!DisablePerfectShuffle && !isLittleEndian) {
10542	unsigned PFIndexes[`4`];
10543	bool isFourElementShuffle = true;
10544	for (unsigned i = `0`; i != `4` && isFourElementShuffle;
10545	++i) { // Element number
10546	unsigned EltNo = `8`; // Start out undef.
10547	for (unsigned j = `0`; j != `4`; ++j) { // Intra-element byte.
10548	if (PermMask [i * `4` + j] < `0`)
10549	continue; // Undef, ignore it.
10550
10551	unsigned ByteSource = PermMask [i * `4` + j];
10552	if ((ByteSource & `3`) != j) {
10553	isFourElementShuffle = false;
10554	break;
10555	}
10556
10557	if (EltNo == `8`) {
10558	EltNo = ByteSource / `4`;
10559	} else if (EltNo != ByteSource / `4`) {
10560	isFourElementShuffle = false;
10561	break;
10562	}
10563	}
10564	PFIndexes[i] = EltNo;
10565	}
10566
10567	// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10568	// perfect shuffle vector to determine if it is cost effective to do this as
10569	// discrete instructions, or whether we should use a vperm.
10570	// For now, we skip this for little endian until such time as we have a
10571	// little-endian perfect shuffle table.
10572	if (isFourElementShuffle) {
10573	// Compute the index in the perfect shuffle table.
10574	unsigned PFTableIndex = PFIndexes[`0`] * `9` * `9` * `9` + PFIndexes[`1`] * `9` * `9` +
10575	PFIndexes[`2`] * `9` + PFIndexes[`3`];
10576
10577	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10578	unsigned Cost = (PFEntry >> `30`);
10579
10580	// Determining when to avoid vperm is tricky. Many things affect the cost
10581	// of vperm, particularly how many times the perm mask needs to be
10582	// computed. For example, if the perm mask can be hoisted out of a loop or
10583	// is already used (perhaps because there are multiple permutes with the
10584	// same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10585	// permute mask out of the loop requires an extra register.
10586	//
10587	// As a compromise, we only emit discrete instructions if the shuffle can
10588	// be generated in 3 or fewer operations. When we have loop information
10589	// available, if this block is within a loop, we should avoid using vperm
10590	// for 3-operation perms and use a constant pool load instead.
10591	if (Cost < `3`)
10592	return GeneratePerfectShuffle(PFEntry, LHS: V1, RHS: V2, DAG, dl);
10593	}
10594	}
10595
10596	// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10597	// vector that will get spilled to the constant pool.
10598	if (V2.isUndef()) V2 = V1;
10599
10600	return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10601	}
10602
10603	SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10604	ArrayRef<int> PermMask, EVT VT,
10605	SDValue V1, SDValue V2) const {
10606	unsigned Opcode = PPCISD::VPERM;
10607	EVT ValType = V1.getValueType();
10608	SDLoc dl(Op);
10609	bool NeedSwap = false;
10610	bool isLittleEndian = Subtarget.isLittleEndian();
10611	bool isPPC64 = Subtarget.isPPC64();
10612
10613	if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10614	(V1 ->hasOneUse() \|\| V2 ->hasOneUse())) {
10615	LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10616	"XXPERM instead\n");
10617	Opcode = PPCISD::XXPERM;
10618
10619	// The second input to XXPERM is also an output so if the second input has
10620	// multiple uses then copying is necessary, as a result we want the
10621	// single-use operand to be used as the second input to prevent copying.
10622	if ((!isLittleEndian && !V2 ->hasOneUse() && V1 ->hasOneUse()) \|\|
10623	(isLittleEndian && !V1 ->hasOneUse() && V2 ->hasOneUse())) {
10624	std::swap(a&: V1, b&: V2);
10625	NeedSwap = !NeedSwap;
10626	}
10627	}
10628
10629	// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10630	// that it is in input element units, not in bytes. Convert now.
10631
10632	// For little endian, the order of the input vectors is reversed, and
10633	// the permutation mask is complemented with respect to 31. This is
10634	// necessary to produce proper semantics with the big-endian-based vperm
10635	// instruction.
10636	EVT EltVT = V1.getValueType().getVectorElementType();
10637	unsigned BytesPerElement = EltVT.getSizeInBits() / `8`;
10638
10639	bool V1HasXXSWAPD = V1 ->getOperand(Num: `0`)->getOpcode() == PPCISD::XXSWAPD;
10640	bool V2HasXXSWAPD = V2 ->getOperand(Num: `0`)->getOpcode() == PPCISD::XXSWAPD;
10641
10642	/*
10643	Vectors will be appended like so: [ V1 \| v2 ]
10644	XXSWAPD on V1:
10645	[ A \| B \| C \| D ] -> [ C \| D \| A \| B ]
10646	0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10647	i.e. index of A, B += 8, and index of C, D -= 8.
10648	XXSWAPD on V2:
10649	[ E \| F \| G \| H ] -> [ G \| H \| E \| F ]
10650	16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10651	i.e. index of E, F += 8, index of G, H -= 8
10652	Swap V1 and V2:
10653	[ V1 \| V2 ] -> [ V2 \| V1 ]
10654	0-15 16-31 0-15 16-31
10655	i.e. index of V1 += 16, index of V2 -= 16
10656	*/
10657
10658	SmallVector<SDValue, `16`> ResultMask;
10659	for (unsigned i = `0`, e = VT.getVectorNumElements(); i != e; ++i) {
10660	unsigned SrcElt = PermMask [i] < `0` ? `0` : PermMask [i];
10661
10662	if (V1HasXXSWAPD) {
10663	if (SrcElt < `8`)
10664	SrcElt += `8`;
10665	else if (SrcElt < `16`)
10666	SrcElt -= `8`;
10667	}
10668	if (V2HasXXSWAPD) {
10669	if (SrcElt > `23`)
10670	SrcElt -= `8`;
10671	else if (SrcElt > `15`)
10672	SrcElt += `8`;
10673	}
10674	if (NeedSwap) {
10675	if (SrcElt < `16`)
10676	SrcElt += `16`;
10677	else
10678	SrcElt -= `16`;
10679	}
10680	for (unsigned j = `0`; j != BytesPerElement; ++j)
10681	if (isLittleEndian)
10682	ResultMask.push_back(
10683	Elt: DAG.getConstant(Val: `31` - (SrcElt * BytesPerElement + j), DL: dl, VT: MVT::i32));
10684	else
10685	ResultMask.push_back(
10686	Elt: DAG.getConstant(Val: SrcElt * BytesPerElement + j, DL: dl, VT: MVT::i32));
10687	}
10688
10689	if (V1HasXXSWAPD) {
10690	dl = SDLoc (V1 ->getOperand(Num: `0`));
10691	V1 = V1 ->getOperand(Num: `0`)->getOperand(Num: `1`);
10692	}
10693	if (V2HasXXSWAPD) {
10694	dl = SDLoc (V2 ->getOperand(Num: `0`));
10695	V2 = V2 ->getOperand(Num: `0`)->getOperand(Num: `1`);
10696	}
10697
10698	if (isPPC64 && (V1HasXXSWAPD \|\| V2HasXXSWAPD)) {
10699	if (ValType != MVT::v2f64)
10700	V1 = DAG.getBitcast(VT: MVT::v2f64, V: V1);
10701	if (V2.getValueType() != MVT::v2f64)
10702	V2 = DAG.getBitcast(VT: MVT::v2f64, V: V2);
10703	}
10704
10705	ShufflesHandledWithVPERM ++;
10706	SDValue VPermMask = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops: ResultMask);
10707	LLVM_DEBUG({
10708	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10709	if (Opcode == PPCISD::XXPERM) {
10710	dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10711	} else {
10712	dbgs() << "Emitting a VPERM for the following shuffle:\n";
10713	}
10714	SVOp->dump();
10715	dbgs() << "With the following permute control vector:\n";
10716	VPermMask.dump();
10717	});
10718
10719	if (Opcode == PPCISD::XXPERM)
10720	VPermMask = DAG.getBitcast(VT: MVT::v4i32, V: VPermMask);
10721
10722	// Only need to place items backwards in LE,
10723	// the mask was properly calculated.
10724	if (isLittleEndian)
10725	std::swap(a&: V1, b&: V2);
10726
10727	SDValue VPERMNode =
10728	DAG.getNode(Opcode, DL: dl, VT: V1.getValueType(), N1: V1, N2: V2, N3: VPermMask);
10729
10730	VPERMNode = DAG.getBitcast(VT: ValType, V: VPERMNode);
10731	return VPERMNode;
10732	}
10733
10734	/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10735	/// vector comparison. If it is, return true and fill in Opc/isDot with
10736	/// information about the intrinsic.
10737	static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10738	bool &isDot, const PPCSubtarget &Subtarget) {
10739	unsigned IntrinsicID = Intrin.getConstantOperandVal(i: `0`);
10740	CompareOpc = -`1`;
10741	isDot = false;
10742	switch (IntrinsicID) {
10743	default:
10744	return false;
10745	// Comparison predicates.
10746	case Intrinsic::ppc_altivec_vcmpbfp_p:
10747	CompareOpc = `966`;
10748	isDot = true;
10749	break;
10750	case Intrinsic::ppc_altivec_vcmpeqfp_p:
10751	CompareOpc = `198`;
10752	isDot = true;
10753	break;
10754	case Intrinsic::ppc_altivec_vcmpequb_p:
10755	CompareOpc = `6`;
10756	isDot = true;
10757	break;
10758	case Intrinsic::ppc_altivec_vcmpequh_p:
10759	CompareOpc = `70`;
10760	isDot = true;
10761	break;
10762	case Intrinsic::ppc_altivec_vcmpequw_p:
10763	CompareOpc = `134`;
10764	isDot = true;
10765	break;
10766	case Intrinsic::ppc_altivec_vcmpequd_p:
10767	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10768	CompareOpc = `199`;
10769	isDot = true;
10770	} else
10771	return false;
10772	break;
10773	case Intrinsic::ppc_altivec_vcmpneb_p:
10774	case Intrinsic::ppc_altivec_vcmpneh_p:
10775	case Intrinsic::ppc_altivec_vcmpnew_p:
10776	case Intrinsic::ppc_altivec_vcmpnezb_p:
10777	case Intrinsic::ppc_altivec_vcmpnezh_p:
10778	case Intrinsic::ppc_altivec_vcmpnezw_p:
10779	if (Subtarget.hasP9Altivec()) {
10780	switch (IntrinsicID) {
10781	default:
10782	llvm_unreachable("Unknown comparison intrinsic.");
10783	case Intrinsic::ppc_altivec_vcmpneb_p:
10784	CompareOpc = `7`;
10785	break;
10786	case Intrinsic::ppc_altivec_vcmpneh_p:
10787	CompareOpc = `71`;
10788	break;
10789	case Intrinsic::ppc_altivec_vcmpnew_p:
10790	CompareOpc = `135`;
10791	break;
10792	case Intrinsic::ppc_altivec_vcmpnezb_p:
10793	CompareOpc = `263`;
10794	break;
10795	case Intrinsic::ppc_altivec_vcmpnezh_p:
10796	CompareOpc = `327`;
10797	break;
10798	case Intrinsic::ppc_altivec_vcmpnezw_p:
10799	CompareOpc = `391`;
10800	break;
10801	}
10802	isDot = true;
10803	} else
10804	return false;
10805	break;
10806	case Intrinsic::ppc_altivec_vcmpgefp_p:
10807	CompareOpc = `454`;
10808	isDot = true;
10809	break;
10810	case Intrinsic::ppc_altivec_vcmpgtfp_p:
10811	CompareOpc = `710`;
10812	isDot = true;
10813	break;
10814	case Intrinsic::ppc_altivec_vcmpgtsb_p:
10815	CompareOpc = `774`;
10816	isDot = true;
10817	break;
10818	case Intrinsic::ppc_altivec_vcmpgtsh_p:
10819	CompareOpc = `838`;
10820	isDot = true;
10821	break;
10822	case Intrinsic::ppc_altivec_vcmpgtsw_p:
10823	CompareOpc = `902`;
10824	isDot = true;
10825	break;
10826	case Intrinsic::ppc_altivec_vcmpgtsd_p:
10827	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10828	CompareOpc = `967`;
10829	isDot = true;
10830	} else
10831	return false;
10832	break;
10833	case Intrinsic::ppc_altivec_vcmpgtub_p:
10834	CompareOpc = `518`;
10835	isDot = true;
10836	break;
10837	case Intrinsic::ppc_altivec_vcmpgtuh_p:
10838	CompareOpc = `582`;
10839	isDot = true;
10840	break;
10841	case Intrinsic::ppc_altivec_vcmpgtuw_p:
10842	CompareOpc = `646`;
10843	isDot = true;
10844	break;
10845	case Intrinsic::ppc_altivec_vcmpgtud_p:
10846	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10847	CompareOpc = `711`;
10848	isDot = true;
10849	} else
10850	return false;
10851	break;
10852
10853	case Intrinsic::ppc_altivec_vcmpequq:
10854	case Intrinsic::ppc_altivec_vcmpgtsq:
10855	case Intrinsic::ppc_altivec_vcmpgtuq:
10856	if (!Subtarget.isISA3_1())
10857	return false;
10858	switch (IntrinsicID) {
10859	default:
10860	llvm_unreachable("Unknown comparison intrinsic.");
10861	case Intrinsic::ppc_altivec_vcmpequq:
10862	CompareOpc = `455`;
10863	break;
10864	case Intrinsic::ppc_altivec_vcmpgtsq:
10865	CompareOpc = `903`;
10866	break;
10867	case Intrinsic::ppc_altivec_vcmpgtuq:
10868	CompareOpc = `647`;
10869	break;
10870	}
10871	break;
10872
10873	// VSX predicate comparisons use the same infrastructure
10874	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10875	case Intrinsic::ppc_vsx_xvcmpgedp_p:
10876	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10877	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10878	case Intrinsic::ppc_vsx_xvcmpgesp_p:
10879	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10880	if (Subtarget.hasVSX()) {
10881	switch (IntrinsicID) {
10882	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10883	CompareOpc = `99`;
10884	break;
10885	case Intrinsic::ppc_vsx_xvcmpgedp_p:
10886	CompareOpc = `115`;
10887	break;
10888	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10889	CompareOpc = `107`;
10890	break;
10891	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10892	CompareOpc = `67`;
10893	break;
10894	case Intrinsic::ppc_vsx_xvcmpgesp_p:
10895	CompareOpc = `83`;
10896	break;
10897	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10898	CompareOpc = `75`;
10899	break;
10900	}
10901	isDot = true;
10902	} else
10903	return false;
10904	break;
10905
10906	// Normal Comparisons.
10907	case Intrinsic::ppc_altivec_vcmpbfp:
10908	CompareOpc = `966`;
10909	break;
10910	case Intrinsic::ppc_altivec_vcmpeqfp:
10911	CompareOpc = `198`;
10912	break;
10913	case Intrinsic::ppc_altivec_vcmpequb:
10914	CompareOpc = `6`;
10915	break;
10916	case Intrinsic::ppc_altivec_vcmpequh:
10917	CompareOpc = `70`;
10918	break;
10919	case Intrinsic::ppc_altivec_vcmpequw:
10920	CompareOpc = `134`;
10921	break;
10922	case Intrinsic::ppc_altivec_vcmpequd:
10923	if (Subtarget.hasP8Altivec())
10924	CompareOpc = `199`;
10925	else
10926	return false;
10927	break;
10928	case Intrinsic::ppc_altivec_vcmpneb:
10929	case Intrinsic::ppc_altivec_vcmpneh:
10930	case Intrinsic::ppc_altivec_vcmpnew:
10931	case Intrinsic::ppc_altivec_vcmpnezb:
10932	case Intrinsic::ppc_altivec_vcmpnezh:
10933	case Intrinsic::ppc_altivec_vcmpnezw:
10934	if (Subtarget.hasP9Altivec())
10935	switch (IntrinsicID) {
10936	default:
10937	llvm_unreachable("Unknown comparison intrinsic.");
10938	case Intrinsic::ppc_altivec_vcmpneb:
10939	CompareOpc = `7`;
10940	break;
10941	case Intrinsic::ppc_altivec_vcmpneh:
10942	CompareOpc = `71`;
10943	break;
10944	case Intrinsic::ppc_altivec_vcmpnew:
10945	CompareOpc = `135`;
10946	break;
10947	case Intrinsic::ppc_altivec_vcmpnezb:
10948	CompareOpc = `263`;
10949	break;
10950	case Intrinsic::ppc_altivec_vcmpnezh:
10951	CompareOpc = `327`;
10952	break;
10953	case Intrinsic::ppc_altivec_vcmpnezw:
10954	CompareOpc = `391`;
10955	break;
10956	}
10957	else
10958	return false;
10959	break;
10960	case Intrinsic::ppc_altivec_vcmpgefp:
10961	CompareOpc = `454`;
10962	break;
10963	case Intrinsic::ppc_altivec_vcmpgtfp:
10964	CompareOpc = `710`;
10965	break;
10966	case Intrinsic::ppc_altivec_vcmpgtsb:
10967	CompareOpc = `774`;
10968	break;
10969	case Intrinsic::ppc_altivec_vcmpgtsh:
10970	CompareOpc = `838`;
10971	break;
10972	case Intrinsic::ppc_altivec_vcmpgtsw:
10973	CompareOpc = `902`;
10974	break;
10975	case Intrinsic::ppc_altivec_vcmpgtsd:
10976	if (Subtarget.hasP8Altivec())
10977	CompareOpc = `967`;
10978	else
10979	return false;
10980	break;
10981	case Intrinsic::ppc_altivec_vcmpgtub:
10982	CompareOpc = `518`;
10983	break;
10984	case Intrinsic::ppc_altivec_vcmpgtuh:
10985	CompareOpc = `582`;
10986	break;
10987	case Intrinsic::ppc_altivec_vcmpgtuw:
10988	CompareOpc = `646`;
10989	break;
10990	case Intrinsic::ppc_altivec_vcmpgtud:
10991	if (Subtarget.hasP8Altivec())
10992	CompareOpc = `711`;
10993	else
10994	return false;
10995	break;
10996	case Intrinsic::ppc_altivec_vcmpequq_p:
10997	case Intrinsic::ppc_altivec_vcmpgtsq_p:
10998	case Intrinsic::ppc_altivec_vcmpgtuq_p:
10999	if (!Subtarget.isISA3_1())
11000	return false;
11001	switch (IntrinsicID) {
11002	default:
11003	llvm_unreachable("Unknown comparison intrinsic.");
11004	case Intrinsic::ppc_altivec_vcmpequq_p:
11005	CompareOpc = `455`;
11006	break;
11007	case Intrinsic::ppc_altivec_vcmpgtsq_p:
11008	CompareOpc = `903`;
11009	break;
11010	case Intrinsic::ppc_altivec_vcmpgtuq_p:
11011	CompareOpc = `647`;
11012	break;
11013	}
11014	isDot = true;
11015	break;
11016	}
11017	return true;
11018	}
11019
11020	/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11021	/// lower, do it, otherwise return null.
11022	SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11023	SelectionDAG &DAG) const {
11024	unsigned IntrinsicID = Op.getConstantOperandVal(i: `0`);
11025
11026	SDLoc dl(Op);
11027	// Note: BCD instructions expect the immediate operand in vector form (v4i32),
11028	// but the builtin provides it as a scalar. To satisfy the instruction
11029	// encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11030	auto MapNodeWithSplatVector =
11031	[&](unsigned Opcode,
11032	std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11033	SDValue SplatVal =
11034	DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: MVT::v4i32, Operand: Op.getOperand(i: `2`));
11035
11036	SmallVector<SDValue, `4`> Ops{SplatVal, Op.getOperand(i: `1`)};
11037	Ops.append(in_start: ExtraOps.begin(), in_end: ExtraOps.end());
11038	return DAG.getNode(Opcode, DL: dl, VT: MVT::v16i8, Ops);
11039	};
11040
11041	switch (IntrinsicID) {
11042	case Intrinsic::thread_pointer:
11043	// Reads the thread pointer register, used for __builtin_thread_pointer.
11044	if (Subtarget.isPPC64())
11045	return DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
11046	return DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
11047
11048	case Intrinsic::ppc_rldimi: {
11049	assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11050	SDValue Src = Op.getOperand(i: `1`);
11051	APInt Mask = Op.getConstantOperandAPInt(i: `4`);
11052	if (Mask.isZero())
11053	return Op.getOperand(i: `2`);
11054	if (Mask.isAllOnes())
11055	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src, N2: Op.getOperand(i: `3`));
11056	uint64_t SH = Op.getConstantOperandVal(i: `3`);
11057	unsigned MB = `0`, ME = `0`;
11058	if (!isRunOfOnes64(Val: Mask.getZExtValue(), MB, ME))
11059	report_fatal_error(reason: "invalid rldimi mask!");
11060	// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11061	if (ME < `63` - SH) {
11062	Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11063	N2: DAG.getConstant(Val: ME + SH + `1`, DL: dl, VT: MVT::i32));
11064	} else if (ME > `63` - SH) {
11065	Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11066	N2: DAG.getConstant(Val: ME + SH - `63`, DL: dl, VT: MVT::i32));
11067	}
11068	return SDValue (
11069	DAG.getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64,
11070	Ops: {Op.getOperand(i: `2`), Src,
11071	DAG.getTargetConstant(Val: `63` - ME, DL: dl, VT: MVT::i32),
11072	DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32)}),
11073	`0`);
11074	}
11075
11076	case Intrinsic::ppc_rlwimi: {
11077	APInt Mask = Op.getConstantOperandAPInt(i: `4`);
11078	if (Mask.isZero())
11079	return Op.getOperand(i: `2`);
11080	if (Mask.isAllOnes())
11081	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i32, N1: Op.getOperand(i: `1`),
11082	N2: Op.getOperand(i: `3`));
11083	unsigned MB = `0`, ME = `0`;
11084	if (!isRunOfOnes(Val: Mask.getZExtValue(), MB, ME))
11085	report_fatal_error(reason: "invalid rlwimi mask!");
11086	return SDValue (DAG.getMachineNode(
11087	Opcode: PPC::RLWIMI, dl, VT: MVT::i32,
11088	Ops: {Op.getOperand(i: `2`), Op.getOperand(i: `1`), Op.getOperand(i: `3`),
11089	DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11090	DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11091	`0`);
11092	}
11093
11094	case Intrinsic::ppc_bcdshift:
11095	return MapNodeWithSplatVector (PPCISD::BCDSHIFT, {Op.getOperand(i: `3`)});
11096	case Intrinsic::ppc_bcdshiftround:
11097	return MapNodeWithSplatVector (PPCISD::BCDSHIFTROUND, {Op.getOperand(i: `3`)});
11098	case Intrinsic::ppc_bcdtruncate:
11099	return MapNodeWithSplatVector (PPCISD::BCDTRUNC, {Op.getOperand(i: `3`)});
11100	case Intrinsic::ppc_bcdunsignedtruncate:
11101	return MapNodeWithSplatVector (PPCISD::BCDUTRUNC);
11102	case Intrinsic::ppc_bcdunsignedshift:
11103	return MapNodeWithSplatVector (PPCISD::BCDUSHIFT);
11104
11105	case Intrinsic::ppc_rlwnm: {
11106	if (Op.getConstantOperandVal(i: `3`) == `0`)
11107	return DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32);
11108	unsigned MB = `0`, ME = `0`;
11109	if (!isRunOfOnes(Val: Op.getConstantOperandVal(i: `3`), MB, ME))
11110	report_fatal_error(reason: "invalid rlwnm mask!");
11111	return SDValue (
11112	DAG.getMachineNode(Opcode: PPC::RLWNM, dl, VT: MVT::i32,
11113	Ops: {Op.getOperand(i: `1`), Op.getOperand(i: `2`),
11114	DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11115	DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11116	`0`);
11117	}
11118
11119	case Intrinsic::ppc_mma_disassemble_acc: {
11120	if (Subtarget.isISAFuture()) {
11121	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11122	SDValue WideVec =
11123	SDValue (DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes,
11124	Ops: Op.getOperand(i: `1`)),
11125	`0`);
11126	SmallVector<SDValue, `4`> RetOps;
11127	SDValue Value = SDValue (WideVec.getNode(), `0`);
11128	SDValue Value2 = SDValue (WideVec.getNode(), `1`);
11129
11130	SDValue Extract;
11131	Extract = DAG.getNode(
11132	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11133	N1: Subtarget.isLittleEndian() ? Value2 : Value,
11134	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `1` : `0`,
11135	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11136	RetOps.push_back(Elt: Extract);
11137	Extract = DAG.getNode(
11138	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11139	N1: Subtarget.isLittleEndian() ? Value2 : Value,
11140	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `0` : `1`,
11141	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11142	RetOps.push_back(Elt: Extract);
11143	Extract = DAG.getNode(
11144	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11145	N1: Subtarget.isLittleEndian() ? Value : Value2,
11146	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `1` : `0`,
11147	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11148	RetOps.push_back(Elt: Extract);
11149	Extract = DAG.getNode(
11150	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11151	N1: Subtarget.isLittleEndian() ? Value : Value2,
11152	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `0` : `1`,
11153	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11154	RetOps.push_back(Elt: Extract);
11155	return DAG.getMergeValues(Ops: RetOps, dl);
11156	}
11157	[[fallthrough]];
11158	}
11159	case Intrinsic::ppc_vsx_disassemble_pair: {
11160	int NumVecs = `2`;
11161	SDValue WideVec = Op.getOperand(i: `1`);
11162	if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11163	NumVecs = `4`;
11164	WideVec = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: WideVec);
11165	}
11166	SmallVector<SDValue, `4`> RetOps;
11167	for (int VecNo = `0`; VecNo < NumVecs; VecNo++) {
11168	SDValue Extract = DAG.getNode(
11169	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: WideVec,
11170	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? NumVecs - `1` - VecNo
11171	: VecNo,
11172	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11173	RetOps.push_back(Elt: Extract);
11174	}
11175	return DAG.getMergeValues(Ops: RetOps, dl);
11176	}
11177
11178	case Intrinsic::ppc_build_dmr: {
11179	SmallVector<SDValue, `8`> Pairs;
11180	SmallVector<SDValue, `8`> Chains;
11181	for (int i = `1`; i < `9`; i += `2`) {
11182	SDValue Hi = Op.getOperand(i);
11183	SDValue Lo = Op.getOperand(i: i + `1`);
11184	if (Hi ->getOpcode() == ISD::LOAD)
11185	Chains.push_back(Elt: Hi.getValue(R: `1`));
11186	if (Lo ->getOpcode() == ISD::LOAD)
11187	Chains.push_back(Elt: Lo.getValue(R: `1`));
11188	Pairs.push_back(
11189	Elt: DAG.getNode(Opcode: PPCISD::PAIR_BUILD, DL: dl, VT: MVT::v256i1, Ops: {Hi, Lo}));
11190	}
11191	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Chains);
11192	SDValue Value = DMFInsert1024(Pairs, dl: SDLoc (Op), DAG);
11193	return DAG.getMergeValues(Ops: {Value, TF}, dl);
11194	}
11195
11196	case Intrinsic::ppc_mma_dmxxextfdmr512: {
11197	assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11198	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11199	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
11200	"Specify P of 0 or 1 for lower or upper 512 bytes");
11201	unsigned HiLo = Idx->getSExtValue();
11202	unsigned Opcode;
11203	unsigned Subx;
11204	if (HiLo == `0`) {
11205	Opcode = PPC::DMXXEXTFDMR512;
11206	Subx = PPC::sub_wacc_lo;
11207	} else {
11208	Opcode = PPC::DMXXEXTFDMR512_HI;
11209	Subx = PPC::sub_wacc_hi;
11210	}
11211	SDValue Subreg(
11212	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
11213	Op1: Op.getOperand(i: `1`),
11214	Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11215	`0`);
11216	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11217	return SDValue (DAG.getMachineNode(Opcode, dl, ResultTys: ReturnTypes, Ops: Subreg), `0`);
11218	}
11219
11220	case Intrinsic::ppc_mma_dmxxextfdmr256: {
11221	assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11222	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11223	assert(Idx && (Idx->getSExtValue() >= `0` \|\| Idx->getSExtValue() <= `3`) &&
11224	"Specify a dmr row pair 0-3");
11225	unsigned IdxVal = Idx->getSExtValue();
11226	unsigned Subx;
11227	switch (IdxVal) {
11228	case `0`:
11229	Subx = PPC::sub_dmrrowp0;
11230	break;
11231	case `1`:
11232	Subx = PPC::sub_dmrrowp1;
11233	break;
11234	case `2`:
11235	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11236	break;
11237	case `3`:
11238	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11239	break;
11240	}
11241	SDValue Subreg(
11242	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v256i1,
11243	Op1: Op.getOperand(i: `1`),
11244	Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11245	`0`);
11246	SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11247	return SDValue (
11248	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR256, dl, VT: MVT::v256i1, Ops: {Subreg, P}),
11249	`0`);
11250	}
11251
11252	case Intrinsic::ppc_mma_dmxxinstdmr512: {
11253	assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11254	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `4`));
11255	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
11256	"Specify P of 0 or 1 for lower or upper 512 bytes");
11257	unsigned HiLo = Idx->getSExtValue();
11258	unsigned Opcode;
11259	unsigned Subx;
11260	if (HiLo == `0`) {
11261	Opcode = PPCISD::INST512;
11262	Subx = PPC::sub_wacc_lo;
11263	} else {
11264	Opcode = PPCISD::INST512HI;
11265	Subx = PPC::sub_wacc_hi;
11266	}
11267	SDValue Wacc = DAG.getNode(Opcode, DL: dl, VT: MVT::v512i1, N1: Op.getOperand(i: `2`),
11268	N2: Op.getOperand(i: `3`));
11269	SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11270	return SDValue (DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11271	Op1: Op.getOperand(i: `1`), Op2: Wacc, Op3: SubReg),
11272	`0`);
11273	}
11274
11275	case Intrinsic::ppc_mma_dmxxinstdmr256: {
11276	assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11277	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `3`));
11278	assert(Idx && (Idx->getSExtValue() >= `0` \|\| Idx->getSExtValue() <= `3`) &&
11279	"Specify a dmr row pair 0-3");
11280	unsigned IdxVal = Idx->getSExtValue();
11281	unsigned Subx;
11282	switch (IdxVal) {
11283	case `0`:
11284	Subx = PPC::sub_dmrrowp0;
11285	break;
11286	case `1`:
11287	Subx = PPC::sub_dmrrowp1;
11288	break;
11289	case `2`:
11290	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11291	break;
11292	case `3`:
11293	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11294	break;
11295	}
11296	SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11297	SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11298	SDValue DMRRowp =
11299	DAG.getNode(Opcode: PPCISD::INST256, DL: dl, VT: MVT::v256i1, N1: Op.getOperand(i: `2`), N2: P);
11300	return SDValue (DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11301	Op1: Op.getOperand(i: `1`), Op2: DMRRowp, Op3: SubReg),
11302	`0`);
11303	}
11304
11305	case Intrinsic::ppc_mma_xxmfacc:
11306	case Intrinsic::ppc_mma_xxmtacc: {
11307	// Allow pre-isa-future subtargets to lower as normal.
11308	if (!Subtarget.isISAFuture())
11309	return SDValue ();
11310	// The intrinsics for xxmtacc and xxmfacc take one argument of
11311	// type v512i1, for future cpu the corresponding wacc instruction
11312	// dmxx[inst\|extf]dmr512 is always generated for type v512i1, negating
11313	// the need to produce the xxm[t\|f]acc.
11314	SDValue WideVec = Op.getOperand(i: `1`);
11315	DAG.ReplaceAllUsesWith(From: Op, To: WideVec);
11316	return SDValue ();
11317	}
11318
11319	case Intrinsic::ppc_unpack_longdouble: {
11320	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11321	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
11322	"Argument of long double unpack must be 0 or 1!");
11323	return DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: dl, VT: MVT::f64, N1: Op.getOperand(i: `1`),
11324	N2: DAG.getConstant(Val: !!(Idx->getSExtValue()), DL: dl,
11325	VT: Idx->getValueType(ResNo: `0`)));
11326	}
11327
11328	case Intrinsic::ppc_compare_exp_lt:
11329	case Intrinsic::ppc_compare_exp_gt:
11330	case Intrinsic::ppc_compare_exp_eq:
11331	case Intrinsic::ppc_compare_exp_uo: {
11332	unsigned Pred;
11333	switch (IntrinsicID) {
11334	case Intrinsic::ppc_compare_exp_lt:
11335	Pred = PPC::PRED_LT;
11336	break;
11337	case Intrinsic::ppc_compare_exp_gt:
11338	Pred = PPC::PRED_GT;
11339	break;
11340	case Intrinsic::ppc_compare_exp_eq:
11341	Pred = PPC::PRED_EQ;
11342	break;
11343	case Intrinsic::ppc_compare_exp_uo:
11344	Pred = PPC::PRED_UN;
11345	break;
11346	}
11347	return SDValue (
11348	DAG.getMachineNode(
11349	Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11350	Ops: {SDValue (DAG.getMachineNode(Opcode: PPC::XSCMPEXPDP, dl, VT: MVT::i32,
11351	Op1: Op.getOperand(i: `1`), Op2: Op.getOperand(i: `2`)),
11352	`0`),
11353	DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32), DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32),
11354	DAG.getTargetConstant(Val: Pred, DL: dl, VT: MVT::i32)}),
11355	`0`);
11356	}
11357	case Intrinsic::ppc_test_data_class: {
11358	EVT OpVT = Op.getOperand(i: `1`).getValueType();
11359	unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11360	: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11361	: PPC::XSTSTDCSP);
11362	// Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC instruction.*
11363	// The XSTSTDC instructions test if a floating-point value matches any of*
11364	// the data classes specified in the mask, setting CR field bits
11365	// accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11366	// convert it to an integer result (1 if match, 0 if no match).
11367	//
11368	// Note: Operands are swapped because XSTSTDC expects (mask, value) but the*
11369	// intrinsic provides (value, mask) as Op.getOperand(1) and
11370	// Op.getOperand(2).
11371	SDValue TestDataClass =
11372	SDValue (DAG.getMachineNode(Opcode: CmprOpc, dl, VT: MVT::i32,
11373	Ops: {Op.getOperand(i: `2`), Op.getOperand(i: `1`)}),
11374	`0`);
11375	if (Subtarget.isISA3_1()) {
11376	// ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11377	// This is more efficient than the SELECT_CC approach used in earlier
11378	// ISAs.
11379	SDValue SubRegIdx = DAG.getTargetConstant(Val: PPC::sub_eq, DL: dl, VT: MVT::i32);
11380	SDValue CRBit =
11381	SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11382	Op1: TestDataClass, Op2: SubRegIdx),
11383	`0`);
11384
11385	return DAG.getNode(Opcode: PPCISD::SETBC, DL: dl, VT: MVT::i32, Operand: CRBit);
11386	}
11387
11388	// Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11389	return SDValue (
11390	DAG.getMachineNode(Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11391	Ops: {TestDataClass, DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32),
11392	DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32),
11393	DAG.getTargetConstant(Val: PPC::PRED_EQ, DL: dl, VT: MVT::i32)}),
11394	`0`);
11395	}
11396	case Intrinsic::ppc_fnmsub: {
11397	EVT VT = Op.getOperand(i: `1`).getValueType();
11398	if (!Subtarget.hasVSX() \|\| (!Subtarget.hasFloat128() && VT == MVT::f128))
11399	return DAG.getNode(
11400	Opcode: ISD::FNEG, DL: dl, VT,
11401	Operand: DAG.getNode(Opcode: ISD::FMA, DL: dl, VT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
11402	N3: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op.getOperand(i: `3`))));
11403	return DAG.getNode(Opcode: PPCISD::FNMSUB, DL: dl, VT, N1: Op.getOperand(i: `1`),
11404	N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
11405	}
11406	case Intrinsic::ppc_convert_f128_to_ppcf128:
11407	case Intrinsic::ppc_convert_ppcf128_to_f128: {
11408	RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11409	? RTLIB::CONVERT_PPCF128_F128
11410	: RTLIB::CONVERT_F128_PPCF128;
11411	MakeLibCallOptions CallOptions;
11412	std::pair<SDValue, SDValue> Result =
11413	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op.getOperand(i: `1`), CallOptions,
11414	dl, Chain: SDValue ());
11415	return Result.first;
11416	}
11417	case Intrinsic::ppc_maxfe:
11418	case Intrinsic::ppc_maxfl:
11419	case Intrinsic::ppc_maxfs:
11420	case Intrinsic::ppc_minfe:
11421	case Intrinsic::ppc_minfl:
11422	case Intrinsic::ppc_minfs: {
11423	EVT VT = Op.getValueType();
11424	assert(
11425	all_of(Op->ops().drop_front(`4`),
11426	[VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11427	"ppc_[max\|min]f[e\|l\|s] must have uniform type arguments");
11428	(void)VT;
11429	ISD::CondCode CC = ISD::SETGT;
11430	if (IntrinsicID == Intrinsic::ppc_minfe \|\|
11431	IntrinsicID == Intrinsic::ppc_minfl \|\|
11432	IntrinsicID == Intrinsic::ppc_minfs)
11433	CC = ISD::SETLT;
11434	unsigned I = Op.getNumOperands() - `2`, Cnt = I;
11435	SDValue Res = Op.getOperand(i: I);
11436	for (--I; Cnt != `0`; --Cnt, I = (--I == `0` ? (Op.getNumOperands() - `1`) : I)) {
11437	Res =
11438	DAG.getSelectCC(DL: dl, LHS: Res, RHS: Op.getOperand(i: I), True: Res, False: Op.getOperand(i: I), Cond: CC);
11439	}
11440	return Res;
11441	}
11442	}
11443
11444	// If this is a lowered altivec predicate compare, CompareOpc is set to the
11445	// opcode number of the comparison.
11446	int CompareOpc;
11447	bool isDot;
11448	if (!getVectorCompareInfo(Intrin: Op, CompareOpc, isDot, Subtarget))
11449	return SDValue (); // Don't custom lower most intrinsics.
11450
11451	// If this is a non-dot comparison, make the VCMP node and we are done.
11452	if (!isDot) {
11453	SDValue Tmp = DAG.getNode(Opcode: PPCISD::VCMP, DL: dl, VT: Op.getOperand(i: `2`).getValueType(),
11454	N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
11455	N3: DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32));
11456	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Tmp);
11457	}
11458
11459	// Create the PPCISD altivec 'dot' comparison node.
11460	SDValue Ops[] = {
11461	Op.getOperand(i: `2`), // LHS
11462	Op.getOperand(i: `3`), // RHS
11463	DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
11464	};
11465	EVT VTs[] = { Op.getOperand(i: `2`).getValueType(), MVT::Glue };
11466	SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
11467
11468	// Unpack the result based on how the target uses it.
11469	unsigned BitNo; // Bit # of CR6.
11470	bool InvertBit; // Invert result?
11471	unsigned Bitx;
11472	unsigned SetOp;
11473	switch (Op.getConstantOperandVal(i: `1`)) {
11474	default: // Can't happen, don't crash on invalid number though.
11475	case `0`: // Return the value of the EQ bit of CR6.
11476	BitNo = `0`;
11477	InvertBit = false;
11478	Bitx = PPC::sub_eq;
11479	SetOp = PPCISD::SETBC;
11480	break;
11481	case `1`: // Return the inverted value of the EQ bit of CR6.
11482	BitNo = `0`;
11483	InvertBit = true;
11484	Bitx = PPC::sub_eq;
11485	SetOp = PPCISD::SETBCR;
11486	break;
11487	case `2`: // Return the value of the LT bit of CR6.
11488	BitNo = `2`;
11489	InvertBit = false;
11490	Bitx = PPC::sub_lt;
11491	SetOp = PPCISD::SETBC;
11492	break;
11493	case `3`: // Return the inverted value of the LT bit of CR6.
11494	BitNo = `2`;
11495	InvertBit = true;
11496	Bitx = PPC::sub_lt;
11497	SetOp = PPCISD::SETBCR;
11498	break;
11499	}
11500
11501	SDValue GlueOp = CompNode.getValue(R: `1`);
11502	if (Subtarget.isISA3_1()) {
11503	SDValue SubRegIdx = DAG.getTargetConstant(Val: Bitx, DL: dl, VT: MVT::i32);
11504	SDValue CR6Reg = DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32);
11505	SDValue CRBit =
11506	SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11507	Op1: CR6Reg, Op2: SubRegIdx, Op3: GlueOp),
11508	`0`);
11509	return DAG.getNode(Opcode: SetOp, DL: dl, VT: MVT::i32, Operand: CRBit);
11510	}
11511
11512	// Now that we have the comparison, emit a copy from the CR to a GPR.
11513	// This is flagged to the above dot comparison.
11514	SDValue Flags = DAG.getNode(Opcode: PPCISD::MFOCRF, DL: dl, VT: MVT::i32,
11515	N1: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32), N2: GlueOp);
11516
11517	// Shift the bit into the low position.
11518	Flags = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: Flags,
11519	N2: DAG.getConstant(Val: `8` - (`3` - BitNo), DL: dl, VT: MVT::i32));
11520	// Isolate the bit.
11521	Flags = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Flags,
11522	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
11523
11524	// If we are supposed to, toggle the bit.
11525	if (InvertBit)
11526	Flags = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: Flags,
11527	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
11528	return Flags;
11529	}
11530
11531	SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11532	SelectionDAG &DAG) const {
11533	unsigned IntrinsicID = Op.getConstantOperandVal(i: `1`);
11534	SDLoc dl(Op);
11535	switch (IntrinsicID) {
11536	case Intrinsic::ppc_amo_lwat_csne:
11537	case Intrinsic::ppc_amo_ldat_csne:
11538	SDValue Chain = Op.getOperand(i: `0`);
11539	SDValue Ptr = Op.getOperand(i: `2`);
11540	SDValue CmpVal = Op.getOperand(i: `3`);
11541	SDValue NewVal = Op.getOperand(i: `4`);
11542
11543	EVT VT = IntrinsicID == Intrinsic::ppc_amo_ldat_csne ? MVT::i64 : MVT::i32;
11544	Type Ty = VT.getTypeForEVT(Context&: DAG.getContext());
11545	Type IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: DAG.getContext());
11546
11547	TargetLowering::ArgListTy Args;
11548	Args.emplace_back(args: DAG.getUNDEF(VT: MVT::i64),
11549	args: Type::getInt64Ty(C&: *DAG.getContext()));
11550	Args.emplace_back(args&: CmpVal, args&: Ty);
11551	Args.emplace_back(args&: NewVal, args&: Ty);
11552	Args.emplace_back(args&: Ptr, args&: IntPtrTy);
11553
11554	// Lower to dummy call to use ABI for consecutive register allocation.
11555	// Places return value, compare value, and new value in X3/X4/X5 as required
11556	// by lwat/ldat FC=16, avoiding a new register class for 3 adjacent
11557	// registers.
11558	const char *SymName = IntrinsicID == Intrinsic::ppc_amo_ldat_csne
11559	? "__ldat_csne_pseudo"
11560	: "__lwat_csne_pseudo";
11561	SDValue Callee =
11562	DAG.getExternalSymbol(Sym: SymName, VT: getPointerTy(DL: DAG.getDataLayout()));
11563
11564	TargetLowering::CallLoweringInfo CLI(DAG);
11565	CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee,
11566	ArgsList: std::move(Args));
11567
11568	auto Result = LowerCallTo(CLI);
11569	return DAG.getMergeValues(Ops: {Result.first, Result.second}, dl);
11570	}
11571	return SDValue ();
11572	}
11573
11574	SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11575	SelectionDAG &DAG) const {
11576	// SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11577	// the beginning of the argument list.
11578	int ArgStart = isa<ConstantSDNode>(Val: Op.getOperand(i: `0`)) ? `0` : `1`;
11579	SDLoc DL(Op);
11580	switch (Op.getConstantOperandVal(i: ArgStart)) {
11581	case Intrinsic::ppc_cfence: {
11582	assert(ArgStart == `1` && "llvm.ppc.cfence must carry a chain argument.");
11583	SDValue Val = Op.getOperand(i: ArgStart + `1`);
11584	EVT Ty = Val.getValueType();
11585	if (Ty == MVT::i128) {
11586	// FIXME: Testing one of two paired registers is sufficient to guarantee
11587	// ordering?
11588	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: Val);
11589	}
11590	unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11591	return SDValue (
11592	DAG.getMachineNode(
11593	Opcode, dl: DL, VT: MVT::Other,
11594	Op1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getScalarIntVT(), Operand: Val),
11595	Op2: Op.getOperand(i: `0`)),
11596	`0`);
11597	}
11598	case Intrinsic::ppc_disassemble_dmr: {
11599	return DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: Op.getOperand(i: ArgStart + `2`),
11600	Ptr: Op.getOperand(i: ArgStart + `1`), PtrInfo: MachinePointerInfo ());
11601	}
11602	case Intrinsic::ppc_amo_stwat:
11603	case Intrinsic::ppc_amo_stdat: {
11604	SDLoc dl(Op);
11605	SDValue Chain = Op.getOperand(i: `0`);
11606	SDValue Ptr = Op.getOperand(i: ArgStart + `1`);
11607	SDValue Val = Op.getOperand(i: ArgStart + `2`);
11608	SDValue FC = Op.getOperand(i: ArgStart + `3`);
11609
11610	return DAG.getNode(Opcode: PPCISD::STAT, DL: dl, VT: MVT::Other, N1: Chain, N2: Val, N3: Ptr, N4: FC);
11611	}
11612	default:
11613	break;
11614	}
11615	return SDValue ();
11616	}
11617
11618	// Lower scalar BSWAP64 to xxbrd.
11619	SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11620	SDLoc dl(Op);
11621	if (!Subtarget.isPPC64())
11622	return Op;
11623	// MTVSRDD
11624	Op = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: MVT::v2i64, N1: Op.getOperand(i: `0`),
11625	N2: Op.getOperand(i: `0`));
11626	// XXBRD
11627	Op = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Op);
11628	// MFVSRD
11629	int VectorIndex = `0`;
11630	if (Subtarget.isLittleEndian())
11631	VectorIndex = `1`;
11632	Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Op,
11633	N2: DAG.getTargetConstant(Val: VectorIndex, DL: dl, VT: MVT::i32));
11634	return Op;
11635	}
11636
11637	// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11638	// compared to a value that is atomically loaded (atomic loads zero-extend).
11639	SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11640	SelectionDAG &DAG) const {
11641	assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11642	"Expecting an atomic compare-and-swap here.");
11643	SDLoc dl(Op);
11644	auto *AtomicNode = cast<AtomicSDNode>(Val: Op.getNode());
11645	EVT MemVT = AtomicNode->getMemoryVT();
11646	if (MemVT.getSizeInBits() >= `32`)
11647	return Op;
11648
11649	SDValue CmpOp = Op.getOperand(i: `2`);
11650	// If this is already correctly zero-extended, leave it alone.
11651	auto HighBits = APInt::getHighBitsSet(numBits: `32`, hiBitsSet: `32` - MemVT.getSizeInBits());
11652	if (DAG.MaskedValueIsZero(Op: CmpOp, Mask: HighBits))
11653	return Op;
11654
11655	// Clear the high bits of the compare operand.
11656	unsigned MaskVal = (`1` << MemVT.getSizeInBits()) - `1`;
11657	SDValue NewCmpOp =
11658	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: CmpOp,
11659	N2: DAG.getConstant(Val: MaskVal, DL: dl, VT: MVT::i32));
11660
11661	// Replace the existing compare operand with the properly zero-extended one.
11662	SmallVector<SDValue, `4`> Ops;
11663	for (int i = `0`, e = AtomicNode->getNumOperands(); i < e; i++)
11664	Ops.push_back(Elt: AtomicNode->getOperand(Num: i));
11665	Ops [`2`] = NewCmpOp;
11666	MachineMemOperand *MMO = AtomicNode->getMemOperand();
11667	SDVTList Tys = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
11668	auto NodeTy =
11669	(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11670	return DAG.getMemIntrinsicNode(Opcode: NodeTy, dl, VTList: Tys, Ops, MemVT, MMO);
11671	}
11672
11673	SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11674	SelectionDAG &DAG) const {
11675	AtomicSDNode *N = cast<AtomicSDNode>(Val: Op.getNode());
11676	EVT MemVT = N->getMemoryVT();
11677	assert(MemVT.getSimpleVT() == MVT::i128 &&
11678	"Expect quadword atomic operations");
11679	SDLoc dl(N);
11680	unsigned Opc = N->getOpcode();
11681	switch (Opc) {
11682	case ISD::ATOMIC_LOAD: {
11683	// Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11684	// lowered to ppc instructions by pattern matching instruction selector.
11685	SDVTList Tys = DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::Other);
11686	SmallVector<SDValue, `4`> Ops{
11687	N->getOperand(Num: `0`),
11688	DAG.getConstant(Val: Intrinsic::ppc_atomic_load_i128, DL: dl, VT: MVT::i32)};
11689	for (int I = `1`, E = N->getNumOperands(); I < E; ++I)
11690	Ops.push_back(Elt: N->getOperand(Num: I));
11691	SDValue LoadedVal = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys,
11692	Ops, MemVT, MMO: N->getMemOperand());
11693	SDValue ValLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal);
11694	SDValue ValHi =
11695	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal.getValue(R: `1`));
11696	ValHi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ValHi,
11697	N2: DAG.getConstant(Val: `64`, DL: dl, VT: MVT::i32));
11698	SDValue Val =
11699	DAG.getNode(Opcode: ISD::OR, DL: dl, ResultTys: {MVT::i128, MVT::Other}, Ops: {ValLo, ValHi});
11700	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, ResultTys: {MVT::i128, MVT::Other},
11701	Ops: {Val, LoadedVal.getValue(R: `2`)});
11702	}
11703	case ISD::ATOMIC_STORE: {
11704	// Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11705	// lowered to ppc instructions by pattern matching instruction selector.
11706	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11707	SmallVector<SDValue, `4`> Ops{
11708	N->getOperand(Num: `0`),
11709	DAG.getConstant(Val: Intrinsic::ppc_atomic_store_i128, DL: dl, VT: MVT::i32)};
11710	SDValue Val = N->getOperand(Num: `1`);
11711	SDValue ValLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: Val);
11712	SDValue ValHi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: Val,
11713	N2: DAG.getConstant(Val: `64`, DL: dl, VT: MVT::i32));
11714	ValHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: ValHi);
11715	Ops.push_back(Elt: ValLo);
11716	Ops.push_back(Elt: ValHi);
11717	Ops.push_back(Elt: N->getOperand(Num: `2`));
11718	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops, MemVT,
11719	MMO: N->getMemOperand());
11720	}
11721	default:
11722	llvm_unreachable("Unexpected atomic opcode");
11723	}
11724	}
11725
11726	static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11727	SelectionDAG &DAG,
11728	const PPCSubtarget &Subtarget) {
11729	assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11730
11731	enum DataClassMask {
11732	DC_NAN = `1` << `6`,
11733	DC_NEG_INF = `1` << `4`,
11734	DC_POS_INF = `1` << `5`,
11735	DC_NEG_ZERO = `1` << `2`,
11736	DC_POS_ZERO = `1` << `3`,
11737	DC_NEG_SUBNORM = `1`,
11738	DC_POS_SUBNORM = `1` << `1`,
11739	};
11740
11741	EVT VT = Op.getValueType();
11742
11743	unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11744	: VT == MVT::f64 ? PPC::XSTSTDCDP
11745	: PPC::XSTSTDCSP;
11746
11747	if (Mask == fcAllFlags)
11748	return DAG.getBoolConstant(V: true, DL: Dl, VT: MVT::i1, OpVT: VT);
11749	if (Mask == `0`)
11750	return DAG.getBoolConstant(V: false, DL: Dl, VT: MVT::i1, OpVT: VT);
11751
11752	// When it's cheaper or necessary to test reverse flags.
11753	if ((Mask & fcNormal) == fcNormal \|\| Mask == ~fcQNan \|\| Mask == ~fcSNan) {
11754	SDValue Rev = getDataClassTest(Op, Mask: ~Mask, Dl, DAG, Subtarget);
11755	return DAG.getNOT(DL: Dl, Val: Rev, VT: MVT::i1);
11756	}
11757
11758	// Power doesn't support testing whether a value is 'normal'. Test the rest
11759	// first, and test if it's 'not not-normal' with expected sign.
11760	if (Mask & fcNormal) {
11761	SDValue Rev(DAG.getMachineNode(
11762	Opcode: TestOp, dl: Dl, VT: MVT::i32,
11763	Op1: DAG.getTargetConstant(Val: DC_NAN \| DC_NEG_INF \| DC_POS_INF \|
11764	DC_NEG_ZERO \| DC_POS_ZERO \|
11765	DC_NEG_SUBNORM \| DC_POS_SUBNORM,
11766	DL: Dl, VT: MVT::i32),
11767	Op2: Op),
11768	`0`);
11769	// Sign are stored in CR bit 0, result are in CR bit 2.
11770	SDValue Sign(
11771	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11772	Op2: DAG.getTargetConstant(Val: PPC::sub_lt, DL: Dl, VT: MVT::i32)),
11773	`0`);
11774	SDValue Normal(DAG.getNOT(
11775	DL: Dl,
11776	Val: SDValue (DAG.getMachineNode(
11777	Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11778	Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11779	`0`),
11780	VT: MVT::i1));
11781	if (Mask & fcPosNormal)
11782	Sign = DAG.getNOT(DL: Dl, Val: Sign, VT: MVT::i1);
11783	SDValue Result = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: Sign, N2: Normal);
11784	if (Mask == fcPosNormal \|\| Mask == fcNegNormal)
11785	return Result;
11786
11787	return DAG.getNode(
11788	Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11789	N1: getDataClassTest(Op, Mask: Mask & ~fcNormal, Dl, DAG, Subtarget), N2: Result);
11790	}
11791
11792	// The instruction doesn't differentiate between signaling or quiet NaN. Test
11793	// the rest first, and test if it 'is NaN and is signaling/quiet'.
11794	if ((Mask & fcNan) == fcQNan \|\| (Mask & fcNan) == fcSNan) {
11795	bool IsQuiet = Mask & fcQNan;
11796	SDValue NanCheck = getDataClassTest(Op, Mask: fcNan, Dl, DAG, Subtarget);
11797
11798	// Quietness is determined by the first bit in fraction field.
11799	uint64_t QuietMask = `0`;
11800	SDValue HighWord;
11801	if (VT == MVT::f128) {
11802	HighWord = DAG.getNode(
11803	Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: DAG.getBitcast(VT: MVT::v4i32, V: Op),
11804	N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? `3` : `0`, DL: Dl));
11805	QuietMask = `0x8000`;
11806	} else if (VT == MVT::f64) {
11807	if (Subtarget.isPPC64()) {
11808	HighWord = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::i32,
11809	N1: DAG.getBitcast(VT: MVT::i64, V: Op),
11810	N2: DAG.getConstant(Val: `1`, DL: Dl, VT: MVT::i32));
11811	} else {
11812	SDValue Vec = DAG.getBitcast(
11813	VT: MVT::v4i32, V: DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: Dl, VT: MVT::v2f64, Operand: Op));
11814	HighWord = DAG.getNode(
11815	Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: Vec,
11816	N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? `1` : `0`, DL: Dl));
11817	}
11818	QuietMask = `0x80000`;
11819	} else if (VT == MVT::f32) {
11820	HighWord = DAG.getBitcast(VT: MVT::i32, V: Op);
11821	QuietMask = `0x400000`;
11822	}
11823	SDValue NanRes = DAG.getSetCC(
11824	DL: Dl, VT: MVT::i1,
11825	LHS: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: HighWord,
11826	N2: DAG.getConstant(Val: QuietMask, DL: Dl, VT: MVT::i32)),
11827	RHS: DAG.getConstant(Val: `0`, DL: Dl, VT: MVT::i32), Cond: IsQuiet ? ISD::SETNE : ISD::SETEQ);
11828	NanRes = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: NanCheck, N2: NanRes);
11829	if (Mask == fcQNan \|\| Mask == fcSNan)
11830	return NanRes;
11831
11832	return DAG.getNode(Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11833	N1: getDataClassTest(Op, Mask: Mask & ~fcNan, Dl, DAG, Subtarget),
11834	N2: NanRes);
11835	}
11836
11837	unsigned NativeMask = `0`;
11838	if ((Mask & fcNan) == fcNan)
11839	NativeMask \|= DC_NAN;
11840	if (Mask & fcNegInf)
11841	NativeMask \|= DC_NEG_INF;
11842	if (Mask & fcPosInf)
11843	NativeMask \|= DC_POS_INF;
11844	if (Mask & fcNegZero)
11845	NativeMask \|= DC_NEG_ZERO;
11846	if (Mask & fcPosZero)
11847	NativeMask \|= DC_POS_ZERO;
11848	if (Mask & fcNegSubnormal)
11849	NativeMask \|= DC_NEG_SUBNORM;
11850	if (Mask & fcPosSubnormal)
11851	NativeMask \|= DC_POS_SUBNORM;
11852	return SDValue (
11853	DAG.getMachineNode(
11854	Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1,
11855	Op1: SDValue (DAG.getMachineNode(
11856	Opcode: TestOp, dl: Dl, VT: MVT::i32,
11857	Op1: DAG.getTargetConstant(Val: NativeMask, DL: Dl, VT: MVT::i32), Op2: Op),
11858	`0`),
11859	Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11860	`0`);
11861	}
11862
11863	SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11864	SelectionDAG &DAG) const {
11865	assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11866	SDValue LHS = Op.getOperand(i: `0`);
11867	uint64_t RHSC = Op.getConstantOperandVal(i: `1`);
11868	SDLoc Dl(Op);
11869	FPClassTest Category = static_cast<FPClassTest>(RHSC);
11870	if (LHS.getValueType() == MVT::ppcf128) {
11871	// The higher part determines the value class.
11872	LHS = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::f64, N1: LHS,
11873	N2: DAG.getConstant(Val: `1`, DL: Dl, VT: MVT::i32));
11874	}
11875
11876	return getDataClassTest(Op: LHS, Mask: Category, Dl, DAG, Subtarget);
11877	}
11878
11879	// Adjust the length value for a load/store with length to account for the
11880	// instructions requiring a left justified length, and for non-byte element
11881	// types requiring scaling by element size.
11882	static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11883	SelectionDAG &DAG) {
11884	SDLoc dl(Val);
11885	EVT VT = Val ->getValueType(ResNo: `0`);
11886	unsigned LeftAdj = Left ? VT.getSizeInBits() - `8` : `0`;
11887	unsigned TypeAdj = llvm::countr_zero<uint32_t>(Val: Bits / `8`);
11888	SDValue SHLAmt = DAG.getConstant(Val: LeftAdj + TypeAdj, DL: dl, VT);
11889	return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Val, N2: SHLAmt);
11890	}
11891
11892	SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11893	auto VPLD = cast<VPLoadSDNode>(Val&: Op);
11894	bool Future = Subtarget.isISAFuture();
11895	SDLoc dl(Op);
11896	assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(`3`).getNode(), true) &&
11897	"Mask predication not supported");
11898	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11899	SDValue Len = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: PtrVT, Operand: VPLD->getOperand(Num: `4`));
11900	unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11901	unsigned EltBits = Op ->getValueType(ResNo: `0`).getScalarType().getSizeInBits();
11902	Len = AdjustLength(Val: Len, Bits: EltBits, Left: !Future, DAG);
11903	SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32),
11904	VPLD->getOperand(Num: `1`), Len};
11905	SDVTList Tys = DAG.getVTList(VT1: Op ->getValueType(ResNo: `0`), VT2: MVT::Other);
11906	SDValue VPL =
11907	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys, Ops,
11908	MemVT: VPLD->getMemoryVT(), MMO: VPLD->getMemOperand());
11909	return VPL;
11910	}
11911
11912	SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11913	auto VPST = cast<VPStoreSDNode>(Val&: Op);
11914	assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(`4`).getNode(), true) &&
11915	"Mask predication not supported");
11916	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11917	SDLoc dl(Op);
11918	SDValue Len = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: PtrVT, Operand: VPST->getOperand(Num: `5`));
11919	unsigned EltBits =
11920	Op ->getOperand(Num: `1`).getValueType().getScalarType().getSizeInBits();
11921	bool Future = Subtarget.isISAFuture();
11922	unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11923	Len = AdjustLength(Val: Len, Bits: EltBits, Left: !Future, DAG);
11924	SDValue Ops[] = {
11925	VPST->getChain(), DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32),
11926	DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: VPST->getOperand(Num: `1`)),
11927	VPST->getOperand(Num: `2`), Len};
11928	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11929	SDValue VPS =
11930	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
11931	MemVT: VPST->getMemoryVT(), MMO: VPST->getMemOperand());
11932	return VPS;
11933	}
11934
11935	SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11936	SelectionDAG &DAG) const {
11937	SDLoc dl(Op);
11938
11939	MachineFunction &MF = DAG.getMachineFunction();
11940	SDValue Op0 = Op.getOperand(i: `0`);
11941	EVT ValVT = Op0.getValueType();
11942	unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11943	if (isa<ConstantSDNode>(Val: Op0) && EltSize <= `32`) {
11944	int64_t IntVal = Op.getConstantOperandVal(i: `0`);
11945	if (IntVal >= -`16` && IntVal <= `15`)
11946	return getCanonicalConstSplat(Val: IntVal, SplatSize: EltSize / `8`, VT: Op.getValueType(), DAG,
11947	dl);
11948	}
11949
11950	ReuseLoadInfo RLI;
11951	if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11952	Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11953	Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11954	canReuseLoadAddress(Op: Op0, MemVT: MVT::i32, RLI, DAG, ET: ISD::NON_EXTLOAD)) {
11955
11956	MachineMemOperand *MMO =
11957	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
11958	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
11959	SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11960	SDValue Bits = DAG.getMemIntrinsicNode(
11961	Opcode: PPCISD::LD_SPLAT, dl, VTList: DAG.getVTList(VT1: MVT::v4i32, VT2: MVT::Other), Ops,
11962	MemVT: MVT::i32, MMO);
11963	if (RLI.ResChain)
11964	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
11965	return Bits.getValue(R: `0`);
11966	}
11967
11968	// Create a stack slot that is 16-byte aligned.
11969	MachineFrameInfo &MFI = MF.getFrameInfo();
11970	int FrameIdx = MFI.CreateStackObject(Size: `16`, Alignment: Align (`16`), isSpillSlot: false);
11971	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11972	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
11973
11974	SDValue Val = Op0;
11975	// P10 hardware store forwarding requires that a single store contains all
11976	// the data for the load. P10 is able to merge a pair of adjacent stores. Try
11977	// to avoid load hit store on P10 when running binaries compiled for older
11978	// processors by generating two mergeable scalar stores to forward with the
11979	// vector load.
11980	if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11981	!Subtarget.isLittleEndian() && ValVT.isInteger() &&
11982	ValVT.getSizeInBits() <= `64`) {
11983	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: Val);
11984	EVT ShiftAmountTy = getShiftAmountTy(LHSTy: MVT::i64, DL: DAG.getDataLayout());
11985	SDValue ShiftBy = DAG.getConstant(
11986	Val: `64` - Op.getValueType().getScalarSizeInBits(), DL: dl, VT: ShiftAmountTy);
11987	Val = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: Val, N2: ShiftBy);
11988	SDValue Plus8 =
11989	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FIdx, N2: DAG.getConstant(Val: `8`, DL: dl, VT: PtrVT));
11990	SDValue Store2 =
11991	DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: Plus8, PtrInfo: MachinePointerInfo ());
11992	SDValue Store = DAG.getStore(Chain: Store2, dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
11993	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx,
11994	PtrInfo: MachinePointerInfo ());
11995	}
11996
11997	// Store the input value into Value#0 of the stack slot.
11998	SDValue Store =
11999	DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
12000	// Load it out.
12001	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
12002	}
12003
12004	SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12005	SelectionDAG &DAG) const {
12006	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12007	"Should only be called for ISD::INSERT_VECTOR_ELT");
12008
12009	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
12010
12011	EVT VT = Op.getValueType();
12012	SDLoc dl(Op);
12013	SDValue V1 = Op.getOperand(i: `0`);
12014	SDValue V2 = Op.getOperand(i: `1`);
12015
12016	if (VT == MVT::v2f64 && C)
12017	return Op;
12018
12019	if (Subtarget.hasP9Vector()) {
12020	// A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12021	// because on P10, it allows this specific insert_vector_elt load pattern to
12022	// utilize the refactored load and store infrastructure in order to exploit
12023	// prefixed loads.
12024	// On targets with inexpensive direct moves (Power9 and up), a
12025	// (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12026	// load since a single precision load will involve conversion to double
12027	// precision on the load followed by another conversion to single precision.
12028	if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12029	(isa<LoadSDNode>(Val: V2))) {
12030	SDValue BitcastVector = DAG.getBitcast(VT: MVT::v4i32, V: V1);
12031	SDValue BitcastLoad = DAG.getBitcast(VT: MVT::i32, V: V2);
12032	SDValue InsVecElt =
12033	DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, VT: MVT::v4i32, N1: BitcastVector,
12034	N2: BitcastLoad, N3: Op.getOperand(i: `2`));
12035	return DAG.getBitcast(VT: MVT::v4f32, V: InsVecElt);
12036	}
12037	}
12038
12039	if (Subtarget.isISA3_1()) {
12040	if ((VT == MVT::v2i64 \|\| VT == MVT::v2f64) && !Subtarget.isPPC64())
12041	return SDValue ();
12042	// On P10, we have legal lowering for constant and variable indices for
12043	// all vectors.
12044	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
12045	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64)
12046	return Op;
12047	}
12048
12049	// Before P10, we have legal lowering for constant indices but not for
12050	// variable ones.
12051	if (!C)
12052	return SDValue ();
12053
12054	// We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12055	if (VT == MVT::v8i16 \|\| VT == MVT::v16i8) {
12056	SDValue Mtvsrz = DAG.getNode(Opcode: PPCISD::MTVSRZ, DL: dl, VT, Operand: V2);
12057	unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / `8`;
12058	unsigned InsertAtElement = C->getZExtValue();
12059	unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12060	if (Subtarget.isLittleEndian()) {
12061	InsertAtByte = (`16` - BytesInEachElement) - InsertAtByte;
12062	}
12063	return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT, N1: V1, N2: Mtvsrz,
12064	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
12065	}
12066	return Op;
12067	}
12068
12069	SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12070	SelectionDAG &DAG) const {
12071	SDLoc dl(Op);
12072	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12073	SDValue LoadChain = LN->getChain();
12074	SDValue BasePtr = LN->getBasePtr();
12075	EVT VT = Op.getValueType();
12076	bool IsV1024i1 = VT == MVT::v1024i1;
12077	bool IsV2048i1 = VT == MVT::v2048i1;
12078
12079	// The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12080	// Dense Math dmr pair registers, respectively.
12081	assert((IsV1024i1 \|\| IsV2048i1) && "Unsupported type.");
12082	(void)IsV2048i1;
12083	assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12084	"Dense Math support required.");
12085	assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12086
12087	SmallVector<SDValue, `8`> Loads;
12088	SmallVector<SDValue, `8`> LoadChains;
12089
12090	SDValue IntrinID = DAG.getConstant(Val: Intrinsic::ppc_vsx_lxvp, DL: dl, VT: MVT::i32);
12091	SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12092	MachineMemOperand *MMO = LN->getMemOperand();
12093	unsigned NumVecs = VT.getSizeInBits() / `256`;
12094	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12095	MachineMemOperand *NewMMO =
12096	DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * `32`, Size: `32`);
12097	if (Idx > `0`) {
12098	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12099	N2: DAG.getConstant(Val: `32`, DL: dl, VT: BasePtr.getValueType()));
12100	LoadOps[`2`] = BasePtr;
12101	}
12102	SDValue Ld = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
12103	VTList: DAG.getVTList(VT1: MVT::v256i1, VT2: MVT::Other),
12104	Ops: LoadOps, MemVT: MVT::v256i1, MMO: NewMMO);
12105	LoadChains.push_back(Elt: Ld.getValue(R: `1`));
12106	Loads.push_back(Elt: Ld);
12107	}
12108
12109	if (Subtarget.isLittleEndian()) {
12110	std::reverse(first: Loads.begin(), last: Loads.end());
12111	std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12112	}
12113
12114	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12115	SDValue Value = DMFInsert1024(Pairs: Loads, dl, DAG);
12116
12117	if (IsV1024i1) {
12118	return DAG.getMergeValues(Ops: {Value, TF}, dl);
12119	}
12120
12121	// Handle Loads for V2048i1 which represents a dmr pair.
12122	SmallVector<SDValue, `4`> MoreLoads{Loads [`4`], Loads [`5`], Loads [`6`], Loads [`7`]};
12123	SDValue Dmr1Value = DMFInsert1024(Pairs: MoreLoads, dl, DAG);
12124
12125	SDValue Dmr0Sub = DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32);
12126	SDValue Dmr1Sub = DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32);
12127
12128	SDValue DmrPRC = DAG.getTargetConstant(Val: PPC::DMRpRCRegClassID, DL: dl, VT: MVT::i32);
12129	const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12130
12131	SDValue DmrPValue = SDValue (
12132	DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v2048i1, Ops: DmrPOps), `0`);
12133
12134	return DAG.getMergeValues(Ops: {DmrPValue, TF}, dl);
12135	}
12136
12137	SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12138	const SDLoc &dl,
12139	SelectionDAG &DAG) const {
12140	SDValue Lo =
12141	DAG.getNode(Opcode: PPCISD::INST512, DL: dl, VT: MVT::v512i1, N1: Pairs [`0`], N2: Pairs [`1`]);
12142	SDValue LoSub = DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32);
12143	SDValue Hi =
12144	DAG.getNode(Opcode: PPCISD::INST512HI, DL: dl, VT: MVT::v512i1, N1: Pairs [`2`], N2: Pairs [`3`]);
12145	SDValue HiSub = DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32);
12146	SDValue RC = DAG.getTargetConstant(Val: PPC::DMRRCRegClassID, DL: dl, VT: MVT::i32);
12147
12148	return SDValue (DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1,
12149	Ops: {RC, Lo, LoSub, Hi, HiSub}),
12150	`0`);
12151	}
12152
12153	SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12154	SelectionDAG &DAG) const {
12155	SDLoc dl(Op);
12156	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12157	SDValue LoadChain = LN->getChain();
12158	SDValue BasePtr = LN->getBasePtr();
12159	EVT VT = Op.getValueType();
12160
12161	if (VT == MVT::v1024i1 \|\| VT == MVT::v2048i1)
12162	return LowerDMFVectorLoad(Op, DAG);
12163
12164	if (VT != MVT::v256i1 && VT != MVT::v512i1)
12165	return Op;
12166
12167	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
12168	assert((VT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
12169	"Type unsupported without MMA");
12170	assert((VT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
12171	"Type unsupported without paired vector support");
12172
12173	// For v256i1 on ISA Future, let the load go through to instruction selection
12174	// where it will be matched to lxvp/plxvp by the instruction patterns.
12175	if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12176	return Op;
12177
12178	// For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12179	// value in 2 or 4 vsx registers.
12180	Align Alignment = LN->getAlign();
12181	SmallVector<SDValue, `4`> Loads;
12182	SmallVector<SDValue, `4`> LoadChains;
12183	unsigned NumVecs = VT.getSizeInBits() / `128`;
12184	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12185	SDValue Load =
12186	DAG.getLoad(VT: MVT::v16i8, dl, Chain: LoadChain, Ptr: BasePtr,
12187	PtrInfo: LN->getPointerInfo().getWithOffset(O: Idx * `16`),
12188	Alignment: commonAlignment(A: Alignment, Offset: Idx * `16`),
12189	MMOFlags: LN->getMemOperand()->getFlags(), AAInfo: LN->getAAInfo());
12190	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12191	N2: DAG.getConstant(Val: `16`, DL: dl, VT: BasePtr.getValueType()));
12192	Loads.push_back(Elt: Load);
12193	LoadChains.push_back(Elt: Load.getValue(R: `1`));
12194	}
12195	if (Subtarget.isLittleEndian()) {
12196	std::reverse(first: Loads.begin(), last: Loads.end());
12197	std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12198	}
12199	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12200	SDValue Value =
12201	DAG.getNode(Opcode: VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12202	DL: dl, VT, Ops: Loads);
12203	SDValue RetOps[] = {Value, TF};
12204	return DAG.getMergeValues(Ops: RetOps, dl);
12205	}
12206
12207	SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12208	SelectionDAG &DAG) const {
12209
12210	SDLoc dl(Op);
12211	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12212	SDValue StoreChain = SN->getChain();
12213	SDValue BasePtr = SN->getBasePtr();
12214	SmallVector<SDValue, `8`> Values;
12215	SmallVector<SDValue, `8`> Stores;
12216	EVT VT = SN->getValue().getValueType();
12217	bool IsV1024i1 = VT == MVT::v1024i1;
12218	bool IsV2048i1 = VT == MVT::v2048i1;
12219
12220	// The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12221	// Dense Math dmr pair registers, respectively.
12222	assert((IsV1024i1 \|\| IsV2048i1) && "Unsupported type.");
12223	(void)IsV2048i1;
12224	assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12225	"Dense Math support required.");
12226	assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12227
12228	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12229	if (IsV1024i1) {
12230	SDValue Lo(DAG.getMachineNode(
12231	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12232	Op1: Op.getOperand(i: `1`),
12233	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12234	`0`);
12235	SDValue Hi(DAG.getMachineNode(
12236	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12237	Op1: Op.getOperand(i: `1`),
12238	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12239	`0`);
12240	MachineSDNode *ExtNode =
12241	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Lo);
12242	Values.push_back(Elt: SDValue (ExtNode, `0`));
12243	Values.push_back(Elt: SDValue (ExtNode, `1`));
12244	ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Hi);
12245	Values.push_back(Elt: SDValue (ExtNode, `0`));
12246	Values.push_back(Elt: SDValue (ExtNode, `1`));
12247	} else {
12248	// This corresponds to v2048i1 which represents a dmr pair.
12249	SDValue Dmr0(
12250	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12251	Op1: Op.getOperand(i: `1`),
12252	Op2: DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32)),
12253	`0`);
12254
12255	SDValue Dmr1(
12256	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12257	Op1: Op.getOperand(i: `1`),
12258	Op2: DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32)),
12259	`0`);
12260
12261	SDValue Dmr0Lo(DAG.getMachineNode(
12262	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12263	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12264	`0`);
12265
12266	SDValue Dmr0Hi(DAG.getMachineNode(
12267	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12268	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12269	`0`);
12270
12271	SDValue Dmr1Lo(DAG.getMachineNode(
12272	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12273	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12274	`0`);
12275
12276	SDValue Dmr1Hi(DAG.getMachineNode(
12277	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12278	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12279	`0`);
12280
12281	MachineSDNode *ExtNode =
12282	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr0Lo);
12283	Values.push_back(Elt: SDValue (ExtNode, `0`));
12284	Values.push_back(Elt: SDValue (ExtNode, `1`));
12285	ExtNode =
12286	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr0Hi);
12287	Values.push_back(Elt: SDValue (ExtNode, `0`));
12288	Values.push_back(Elt: SDValue (ExtNode, `1`));
12289	ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr1Lo);
12290	Values.push_back(Elt: SDValue (ExtNode, `0`));
12291	Values.push_back(Elt: SDValue (ExtNode, `1`));
12292	ExtNode =
12293	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr1Hi);
12294	Values.push_back(Elt: SDValue (ExtNode, `0`));
12295	Values.push_back(Elt: SDValue (ExtNode, `1`));
12296	}
12297
12298	if (Subtarget.isLittleEndian())
12299	std::reverse(first: Values.begin(), last: Values.end());
12300
12301	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
12302	SmallVector<SDValue, `4`> Ops{
12303	StoreChain, DAG.getConstant(Val: Intrinsic::ppc_vsx_stxvp, DL: dl, VT: MVT::i32),
12304	Values [`0`], BasePtr};
12305	MachineMemOperand *MMO = SN->getMemOperand();
12306	unsigned NumVecs = VT.getSizeInBits() / `256`;
12307	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12308	MachineMemOperand *NewMMO =
12309	DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * `32`, Size: `32`);
12310	if (Idx > `0`) {
12311	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12312	N2: DAG.getConstant(Val: `32`, DL: dl, VT: BasePtr.getValueType()));
12313	Ops [`3`] = BasePtr;
12314	}
12315	Ops [`2`] = Values [Idx];
12316	SDValue St = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
12317	MemVT: MVT::v256i1, MMO: NewMMO);
12318	Stores.push_back(Elt: St);
12319	}
12320
12321	SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12322	return TF;
12323	}
12324
12325	SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12326	SelectionDAG &DAG) const {
12327	SDLoc dl(Op);
12328	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12329	SDValue StoreChain = SN->getChain();
12330	SDValue BasePtr = SN->getBasePtr();
12331	SDValue Value = SN->getValue();
12332	SDValue Value2 = SN->getValue();
12333	EVT StoreVT = Value.getValueType();
12334
12335	if (StoreVT == MVT::v1024i1 \|\| StoreVT == MVT::v2048i1)
12336	return LowerDMFVectorStore(Op, DAG);
12337
12338	if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12339	return Op;
12340
12341	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
12342	assert((StoreVT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
12343	"Type unsupported without MMA");
12344	assert((StoreVT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
12345	"Type unsupported without paired vector support");
12346
12347	// For v256i1 on ISA Future, let the store go through to instruction selection
12348	// where it will be matched to stxvp/pstxvp by the instruction patterns.
12349	if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12350	!DisableAutoPairedVecSt)
12351	return Op;
12352
12353	// For other cases, create 2 or 4 v16i8 stores to store the pair or
12354	// accumulator underlying registers individually.
12355	Align Alignment = SN->getAlign();
12356	SmallVector<SDValue, `4`> Stores;
12357	unsigned NumVecs = `2`;
12358	if (StoreVT == MVT::v512i1) {
12359	if (Subtarget.isISAFuture()) {
12360	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12361	MachineSDNode *ExtNode = DAG.getMachineNode(
12362	Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Op.getOperand(i: `1`));
12363
12364	Value = SDValue (ExtNode, `0`);
12365	Value2 = SDValue (ExtNode, `1`);
12366	} else
12367	Value = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: Value);
12368	NumVecs = `4`;
12369	}
12370	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12371	unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - `1` - Idx : Idx;
12372	SDValue Elt;
12373	if (Subtarget.isISAFuture()) {
12374	VecNum = Subtarget.isLittleEndian() ? `1` - (Idx % `2`) : (Idx % `2`);
12375	Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
12376	N1: Idx > `1` ? Value2 : Value,
12377	N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12378	} else
12379	Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: Value,
12380	N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12381
12382	SDValue Store =
12383	DAG.getStore(Chain: StoreChain, dl, Val: Elt, Ptr: BasePtr,
12384	PtrInfo: SN->getPointerInfo().getWithOffset(O: Idx * `16`),
12385	Alignment: commonAlignment(A: Alignment, Offset: Idx * `16`),
12386	MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
12387	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12388	N2: DAG.getConstant(Val: `16`, DL: dl, VT: BasePtr.getValueType()));
12389	Stores.push_back(Elt: Store);
12390	}
12391	SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12392	return TF;
12393	}
12394
12395	SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12396	SDLoc dl(Op);
12397	if (Op.getValueType() == MVT::v4i32) {
12398	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
12399
12400	SDValue Zero = getCanonicalConstSplat(Val: `0`, SplatSize: `1`, VT: MVT::v4i32, DAG, dl);
12401	// +16 as shift amt.
12402	SDValue Neg16 = getCanonicalConstSplat(Val: -`16`, SplatSize: `4`, VT: MVT::v4i32, DAG, dl);
12403	SDValue RHSSwap = // = vrlw RHS, 16
12404	BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vrlw, LHS: RHS, RHS: Neg16, DAG, dl);
12405
12406	// Shrinkify inputs to v8i16.
12407	LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: LHS);
12408	RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHS);
12409	RHSSwap = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHSSwap);
12410
12411	// Low parts multiplied together, generating 32-bit results (we ignore the
12412	// top parts).
12413	SDValue LoProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmulouh,
12414	LHS, RHS, DAG, dl, DestVT: MVT::v4i32);
12415
12416	SDValue HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmsumuhm,
12417	Op0: LHS, Op1: RHSSwap, Op2: Zero, DAG, dl, DestVT: MVT::v4i32);
12418	// Shift the high parts up 16 bits.
12419	HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: HiProd,
12420	RHS: Neg16, DAG, dl);
12421	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::v4i32, N1: LoProd, N2: HiProd);
12422	} else if (Op.getValueType() == MVT::v16i8) {
12423	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
12424	bool isLittleEndian = Subtarget.isLittleEndian();
12425
12426	// Multiply the even 8-bit parts, producing 16-bit sums.
12427	SDValue EvenParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuleub,
12428	LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12429	EvenParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: EvenParts);
12430
12431	// Multiply the odd 8-bit parts, producing 16-bit sums.
12432	SDValue OddParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuloub,
12433	LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12434	OddParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OddParts);
12435
12436	// Merge the results together. Because vmuleub and vmuloub are
12437	// instructions with a big-endian bias, we must reverse the
12438	// element numbering and reverse the meaning of "odd" and "even"
12439	// when generating little endian code.
12440	int Ops[`16`];
12441	for (unsigned i = `0`; i != `8`; ++i) {
12442	if (isLittleEndian) {
12443	Ops[i`2` ] = `2`i;
12444	Ops[i`2`+`1`] = `2`i+`16`;
12445	} else {
12446	Ops[i`2` ] = `2`i+`1`;
12447	Ops[i`2`+`1`] = `2`i+`1`+`16`;
12448	}
12449	}
12450	if (isLittleEndian)
12451	return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OddParts, N2: EvenParts, Mask: Ops);
12452	else
12453	return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: EvenParts, N2: OddParts, Mask: Ops);
12454	} else {
12455	llvm_unreachable("Unknown mul to lower!");
12456	}
12457	}
12458
12459	SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12460	bool IsStrict = Op ->isStrictFPOpcode();
12461	if (Op.getOperand(i: IsStrict ? `1` : `0`).getValueType() == MVT::f128 &&
12462	!Subtarget.hasP9Vector())
12463	return SDValue ();
12464
12465	return Op;
12466	}
12467
12468	// Custom lowering for fpext vf32 to v2f64
12469	SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12470
12471	assert(Op.getOpcode() == ISD::FP_EXTEND &&
12472	"Should only be called for ISD::FP_EXTEND");
12473
12474	// FIXME: handle extends from half precision float vectors on P9.
12475	// We only want to custom lower an extend from v2f32 to v2f64.
12476	if (Op.getValueType() != MVT::v2f64 \|\|
12477	Op.getOperand(i: `0`).getValueType() != MVT::v2f32)
12478	return SDValue ();
12479
12480	SDLoc dl(Op);
12481	SDValue Op0 = Op.getOperand(i: `0`);
12482
12483	switch (Op0.getOpcode()) {
12484	default:
12485	return SDValue ();
12486	case ISD::EXTRACT_SUBVECTOR: {
12487	assert(Op0.getNumOperands() == `2` &&
12488	isa<ConstantSDNode>(Op0->getOperand(`1`)) &&
12489	"Node should have 2 operands with second one being a constant!");
12490
12491	if (Op0.getOperand(i: `0`).getValueType() != MVT::v4f32)
12492	return SDValue ();
12493
12494	// Custom lower is only done for high or low doubleword.
12495	int Idx = Op0.getConstantOperandVal(i: `1`);
12496	if (Idx % `2` != `0`)
12497	return SDValue ();
12498
12499	// Since input is v4f32, at this point Idx is either 0 or 2.
12500	// Shift to get the doubleword position we want.
12501	int DWord = Idx >> `1`;
12502
12503	// High and low word positions are different on little endian.
12504	if (Subtarget.isLittleEndian())
12505	DWord ^= `0x1`;
12506
12507	return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64,
12508	N1: Op0.getOperand(i: `0`), N2: DAG.getConstant(Val: DWord, DL: dl, VT: MVT::i32));
12509	}
12510	case ISD::FADD:
12511	case ISD::FMUL:
12512	case ISD::FSUB: {
12513	SDValue NewLoad[`2`];
12514	for (unsigned i = `0`, ie = Op0.getNumOperands(); i != ie; ++i) {
12515	// Ensure both input are loads.
12516	SDValue LdOp = Op0.getOperand(i);
12517	if (LdOp.getOpcode() != ISD::LOAD)
12518	return SDValue ();
12519	// Generate new load node.
12520	LoadSDNode *LD = cast<LoadSDNode>(Val&: LdOp);
12521	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12522	NewLoad[i] = DAG.getMemIntrinsicNode(
12523	Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12524	MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12525	}
12526	SDValue NewOp =
12527	DAG.getNode(Opcode: Op0.getOpcode(), DL: SDLoc (Op0), VT: MVT::v4f32, N1: NewLoad[`0`],
12528	N2: NewLoad[`1`], Flags: Op0.getNode()->getFlags());
12529	return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewOp,
12530	N2: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32));
12531	}
12532	case ISD::LOAD: {
12533	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op0);
12534	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12535	SDValue NewLd = DAG.getMemIntrinsicNode(
12536	Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12537	MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12538	return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewLd,
12539	N2: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32));
12540	}
12541	}
12542	llvm_unreachable("ERROR:Should return for all cases within swtich.");
12543	}
12544
12545	static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value,
12546	SelectionDAG &DAG,
12547	const PPCSubtarget &STI) {
12548	SDLoc DL(Value);
12549	if (STI.useCRBits())
12550	Value = DAG.getNode(Opcode: ISD::SELECT, DL, VT: SumType, N1: Value,
12551	N2: DAG.getConstant(Val: `1`, DL, VT: SumType),
12552	N3: DAG.getConstant(Val: `0`, DL, VT: SumType));
12553	else
12554	Value = DAG.getZExtOrTrunc(Op: Value, DL, VT: SumType);
12555	SDValue Sum = DAG.getNode(Opcode: PPCISD::ADDC, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32),
12556	N1: Value, N2: DAG.getAllOnesConstant(DL, VT: SumType));
12557	return Sum.getValue(R: `1`);
12558	}
12559
12560	static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag,
12561	EVT CarryType, SelectionDAG &DAG,
12562	const PPCSubtarget &STI) {
12563	SDLoc DL(Flag);
12564	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: SumType);
12565	SDValue Carry = DAG.getNode(
12566	Opcode: PPCISD::ADDE, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32), N1: Zero, N2: Zero, N3: Flag);
12567	if (STI.useCRBits())
12568	return DAG.getSetCC(DL, VT: CarryType, LHS: Carry, RHS: Zero, Cond: ISD::SETNE);
12569	return DAG.getZExtOrTrunc(Op: Carry, DL, VT: CarryType);
12570	}
12571
12572	SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12573
12574	SDLoc DL(Op);
12575	SDNode *N = Op.getNode();
12576	EVT VT = N->getValueType(ResNo: `0`);
12577	EVT CarryType = N->getValueType(ResNo: `1`);
12578	unsigned Opc = N->getOpcode();
12579	bool IsAdd = Opc == ISD::UADDO;
12580	Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12581	SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12582	N1: N->getOperand(Num: `0`), N2: N->getOperand(Num: `1`));
12583	SDValue Carry = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: `1`), CarryType,
12584	DAG, STI: Subtarget);
12585	if (!IsAdd)
12586	Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Carry,
12587	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryType));
12588	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: Carry);
12589	}
12590
12591	SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12592	SelectionDAG &DAG) const {
12593	SDLoc DL(Op);
12594	SDNode *N = Op.getNode();
12595	unsigned Opc = N->getOpcode();
12596	EVT VT = N->getValueType(ResNo: `0`);
12597	EVT CarryType = N->getValueType(ResNo: `1`);
12598	SDValue CarryOp = N->getOperand(Num: `2`);
12599	bool IsAdd = Opc == ISD::UADDO_CARRY;
12600	Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12601	if (!IsAdd)
12602	CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12603	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryOp.getValueType()));
12604	CarryOp = ConvertCarryValueToCarryFlag(SumType: VT, Value: CarryOp, DAG, STI: Subtarget);
12605	SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12606	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`), N3: CarryOp);
12607	CarryOp = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: `1`), CarryType, DAG,
12608	STI: Subtarget);
12609	if (!IsAdd)
12610	CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12611	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryOp.getValueType()));
12612	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: CarryOp);
12613	}
12614
12615	SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12616
12617	SDLoc dl(Op);
12618	SDValue LHS = Op.getOperand(i: `0`);
12619	SDValue RHS = Op.getOperand(i: `1`);
12620	EVT VT = Op.getNode()->getValueType(ResNo: `0`);
12621
12622	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
12623
12624	SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: RHS, N2: LHS);
12625	SDValue Xor2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sub, N2: LHS);
12626
12627	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Xor1, N2: Xor2);
12628
12629	SDValue Overflow =
12630	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: And,
12631	N2: DAG.getConstant(Val: VT.getSizeInBits() - `1`, DL: dl, VT: MVT::i32));
12632
12633	SDValue OverflowTrunc =
12634	DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: `1`), Operand: Overflow);
12635
12636	return DAG.getMergeValues(Ops: {Sub, OverflowTrunc}, dl);
12637	}
12638
12639	/// Implements signed add with overflow detection using the rule:
12640	/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12641	SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12642
12643	SDLoc dl(Op);
12644	SDValue LHS = Op.getOperand(i: `0`);
12645	SDValue RHS = Op.getOperand(i: `1`);
12646	EVT VT = Op.getNode()->getValueType(ResNo: `0`);
12647
12648	SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
12649
12650	// Compute ~(x xor y)
12651	SDValue XorXY = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
12652	SDValue EqvXY = DAG.getNOT(DL: dl, Val: XorXY, VT);
12653	// Compute (s xor x)
12654	SDValue SumXorX = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sum, N2: LHS);
12655
12656	// overflow = (x eqv y) & (s xor x)
12657	SDValue OverflowInSign = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: EqvXY, N2: SumXorX);
12658
12659	// Shift sign bit down to LSB
12660	SDValue Overflow =
12661	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: OverflowInSign,
12662	N2: DAG.getConstant(Val: VT.getSizeInBits() - `1`, DL: dl, VT: MVT::i32));
12663	// Truncate to the overflow type (i1)
12664	SDValue OverflowTrunc =
12665	DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: `1`), Operand: Overflow);
12666
12667	return DAG.getMergeValues(Ops: {Sum, OverflowTrunc}, dl);
12668	}
12669
12670	// Lower unsigned 3-way compare producing -1/0/1.
12671	SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12672	SDLoc DL(Op);
12673	SDValue A = DAG.getFreeze(V: Op.getOperand(i: `0`));
12674	SDValue B = DAG.getFreeze(V: Op.getOperand(i: `1`));
12675	EVT OpVT = A.getValueType();
12676	EVT ResVT = Op.getValueType();
12677
12678	// On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12679	// We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12680	// comparison.
12681	if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12682	A = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: A);
12683	B = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: B);
12684	OpVT = MVT::i64;
12685	}
12686
12687	// First compute diff = A - B.
12688	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: OpVT, N1: A, N2: B);
12689
12690	// Generate B - A using SUBC to capture carry.
12691	SDVTList VTs = DAG.getVTList(VT1: OpVT, VT2: MVT::i32);
12692	SDValue SubC = DAG.getNode(Opcode: PPCISD::SUBC, DL, VTList: VTs, N1: B, N2: A);
12693	SDValue CA0 = SubC.getValue(R: `1`);
12694
12695	// t2 = A - B + CA0 using SUBE.
12696	SDValue SubE1 = DAG.getNode(Opcode: PPCISD::SUBE, DL, VTList: VTs, N1: A, N2: B, N3: CA0);
12697	SDValue CA1 = SubE1.getValue(R: `1`);
12698
12699	// res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12700	SDValue ResPair = DAG.getNode(Opcode: PPCISD::SUBE, DL, VTList: VTs, N1: Diff, N2: SubE1, N3: CA1);
12701
12702	// Extract the first result and truncate to result type if needed.
12703	return DAG.getSExtOrTrunc(Op: ResPair.getValue(R: `0`), DL, VT: ResVT);
12704	}
12705
12706	/// LowerOperation - Provide custom lowering hooks for some operations.
12707	///
12708	SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
12709	switch (Op.getOpcode()) {
12710	default:
12711	llvm_unreachable("Wasn't expecting to be able to lower this!");
12712	case ISD::FPOW: return lowerPow(Op, DAG);
12713	case ISD::FSIN: return lowerSin(Op, DAG);
12714	case ISD::FCOS: return lowerCos(Op, DAG);
12715	case ISD::FLOG: return lowerLog(Op, DAG);
12716	case ISD::FLOG10: return lowerLog10(Op, DAG);
12717	case ISD::FEXP: return lowerExp(Op, DAG);
12718	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12719	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12720	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12721	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12722	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12723	case ISD::STRICT_FSETCC:
12724	case ISD::STRICT_FSETCCS:
12725	case ISD::SETCC: return LowerSETCC(Op, DAG);
12726	case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12727	case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12728	case ISD::SSUBO:
12729	return LowerSSUBO(Op, DAG);
12730	case ISD::SADDO:
12731	return LowerSADDO(Op, DAG);
12732
12733	case ISD::INLINEASM:
12734	case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12735	// Variable argument lowering.
12736	case ISD::VASTART: return LowerVASTART(Op, DAG);
12737	case ISD::VAARG: return LowerVAARG(Op, DAG);
12738	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12739
12740	case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12741	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12742	case ISD::GET_DYNAMIC_AREA_OFFSET:
12743	return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12744
12745	// Exception handling lowering.
12746	case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12747	case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12748	case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12749
12750	case ISD::LOAD: return LowerLOAD(Op, DAG);
12751	case ISD::STORE: return LowerSTORE(Op, DAG);
12752	case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12753	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12754	case ISD::STRICT_FP_TO_UINT:
12755	case ISD::STRICT_FP_TO_SINT:
12756	case ISD::FP_TO_UINT:
12757	case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, dl: SDLoc (Op));
12758	case ISD::STRICT_UINT_TO_FP:
12759	case ISD::STRICT_SINT_TO_FP:
12760	case ISD::UINT_TO_FP:
12761	case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12762	case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12763	case ISD::SET_ROUNDING:
12764	return LowerSET_ROUNDING(Op, DAG);
12765
12766	// Lower 64-bit shifts.
12767	case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12768	case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12769	case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12770
12771	case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12772	case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12773
12774	// Vector-related lowering.
12775	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12776	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12777	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12778	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12779	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12780	case ISD::MUL: return LowerMUL(Op, DAG);
12781	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12782	case ISD::STRICT_FP_ROUND:
12783	case ISD::FP_ROUND:
12784	return LowerFP_ROUND(Op, DAG);
12785	case ISD::ROTL: return LowerROTL(Op, DAG);
12786
12787	// For counter-based loop handling.
12788	case ISD::INTRINSIC_W_CHAIN:
12789	return LowerINTRINSIC_W_CHAIN(Op, DAG);
12790
12791	case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12792
12793	// Frame & Return address.
12794	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12795	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12796
12797	case ISD::INTRINSIC_VOID:
12798	return LowerINTRINSIC_VOID(Op, DAG);
12799	case ISD::BSWAP:
12800	return LowerBSWAP(Op, DAG);
12801	case ISD::ATOMIC_CMP_SWAP:
12802	return LowerATOMIC_CMP_SWAP(Op, DAG);
12803	case ISD::ATOMIC_STORE:
12804	return LowerATOMIC_LOAD_STORE(Op, DAG);
12805	case ISD::IS_FPCLASS:
12806	return LowerIS_FPCLASS(Op, DAG);
12807	case ISD::UADDO:
12808	case ISD::USUBO:
12809	return LowerADDSUBO(Op, DAG);
12810	case ISD::UADDO_CARRY:
12811	case ISD::USUBO_CARRY:
12812	return LowerADDSUBO_CARRY(Op, DAG);
12813	case ISD::UCMP:
12814	return LowerUCMP(Op, DAG);
12815	case ISD::STRICT_LRINT:
12816	case ISD::STRICT_LLRINT:
12817	case ISD::STRICT_LROUND:
12818	case ISD::STRICT_LLROUND:
12819	case ISD::STRICT_FNEARBYINT:
12820	if (Op ->getFlags().hasNoFPExcept())
12821	return Op;
12822	return SDValue ();
12823	case ISD::VP_LOAD:
12824	return LowerVP_LOAD(Op, DAG);
12825	case ISD::VP_STORE:
12826	return LowerVP_STORE(Op, DAG);
12827	}
12828	}
12829
12830	void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
12831	SmallVectorImpl<SDValue>&Results,
12832	SelectionDAG &DAG) const {
12833	SDLoc dl(N);
12834	switch (N->getOpcode()) {
12835	default:
12836	llvm_unreachable("Do not know how to custom type legalize this operation!");
12837	case ISD::ATOMIC_LOAD: {
12838	SDValue Res = LowerATOMIC_LOAD_STORE(Op: SDValue (N, `0`), DAG);
12839	Results.push_back(Elt: Res);
12840	Results.push_back(Elt: Res.getValue(R: `1`));
12841	break;
12842	}
12843	case ISD::READCYCLECOUNTER: {
12844	SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other);
12845	SDValue RTB = DAG.getNode(Opcode: PPCISD::READ_TIME_BASE, DL: dl, VTList: VTs, N: N->getOperand(Num: `0`));
12846
12847	Results.push_back(
12848	Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: RTB, N2: RTB.getValue(R: `1`)));
12849	Results.push_back(Elt: RTB.getValue(R: `2`));
12850	break;
12851	}
12852	case ISD::INTRINSIC_W_CHAIN: {
12853	if (N->getConstantOperandVal(Num: `1`) != Intrinsic::loop_decrement)
12854	break;
12855
12856	assert(N->getValueType(`0`) == MVT::i1 &&
12857	"Unexpected result type for CTR decrement intrinsic");
12858	EVT SVT = getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(),
12859	VT: N->getValueType(ResNo: `0`));
12860	SDVTList VTs = DAG.getVTList(VT1: SVT, VT2: MVT::Other);
12861	SDValue NewInt = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: VTs, N1: N->getOperand(Num: `0`),
12862	N2: N->getOperand(Num: `1`));
12863
12864	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewInt));
12865	Results.push_back(Elt: NewInt.getValue(R: `1`));
12866	break;
12867	}
12868	case ISD::INTRINSIC_WO_CHAIN: {
12869	switch (N->getConstantOperandVal(Num: `0`)) {
12870	case Intrinsic::ppc_pack_longdouble:
12871	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::ppcf128,
12872	N1: N->getOperand(Num: `2`), N2: N->getOperand(Num: `1`)));
12873	break;
12874	case Intrinsic::ppc_maxfe:
12875	case Intrinsic::ppc_minfe:
12876	case Intrinsic::ppc_fnmsub:
12877	case Intrinsic::ppc_convert_f128_to_ppcf128:
12878	Results.push_back(Elt: LowerINTRINSIC_WO_CHAIN(Op: SDValue (N, `0`), DAG));
12879	break;
12880	}
12881	break;
12882	}
12883	case ISD::VAARG: {
12884	if (!Subtarget.isSVR4ABI() \|\| Subtarget.isPPC64())
12885	return;
12886
12887	EVT VT = N->getValueType(ResNo: `0`);
12888
12889	if (VT == MVT::i64) {
12890	SDValue NewNode = LowerVAARG(Op: SDValue (N, `1`), DAG);
12891
12892	Results.push_back(Elt: NewNode);
12893	Results.push_back(Elt: NewNode.getValue(R: `1`));
12894	}
12895	return;
12896	}
12897	case ISD::STRICT_FP_TO_SINT:
12898	case ISD::STRICT_FP_TO_UINT:
12899	case ISD::FP_TO_SINT:
12900	case ISD::FP_TO_UINT: {
12901	// LowerFP_TO_INT() can only handle f32 and f64.
12902	if (N->getOperand(Num: N->isStrictFPOpcode() ? `1` : `0`).getValueType() ==
12903	MVT::ppcf128)
12904	return;
12905	SDValue LoweredValue = LowerFP_TO_INT(Op: SDValue (N, `0`), DAG, dl);
12906	Results.push_back(Elt: LoweredValue);
12907	if (N->isStrictFPOpcode())
12908	Results.push_back(Elt: LoweredValue.getValue(R: `1`));
12909	return;
12910	}
12911	case ISD::TRUNCATE: {
12912	if (!N->getValueType(ResNo: `0`).isVector())
12913	return;
12914	SDValue Lowered = LowerTRUNCATEVector(Op: SDValue (N, `0`), DAG);
12915	if (Lowered)
12916	Results.push_back(Elt: Lowered);
12917	return;
12918	}
12919	case ISD::SCALAR_TO_VECTOR: {
12920	SDValue Lowered = LowerSCALAR_TO_VECTOR(Op: SDValue (N, `0`), DAG);
12921	if (Lowered)
12922	Results.push_back(Elt: Lowered);
12923	return;
12924	}
12925	case ISD::FSHL:
12926	case ISD::FSHR:
12927	// Don't handle funnel shifts here.
12928	return;
12929	case ISD::BITCAST:
12930	// Don't handle bitcast here.
12931	return;
12932	case ISD::FP_EXTEND:
12933	SDValue Lowered = LowerFP_EXTEND(Op: SDValue (N, `0`), DAG);
12934	if (Lowered)
12935	Results.push_back(Elt: Lowered);
12936	return;
12937	}
12938	}
12939
12940	//===----------------------------------------------------------------------===//
12941	// Other Lowering Code
12942	//===----------------------------------------------------------------------===//
12943
12944	static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
12945	return Builder.CreateIntrinsic(ID: Id, Args: {});
12946	}
12947
12948	Value PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type ValueTy,
12949	Value *Addr,
12950	AtomicOrdering Ord) const {
12951	unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12952
12953	assert((SZ == `8` \|\| SZ == `16` \|\| SZ == `32` \|\| SZ == `64`) &&
12954	"Only 8/16/32/64-bit atomic loads supported");
12955	Intrinsic::ID IntID;
12956	switch (SZ) {
12957	default:
12958	llvm_unreachable("Unexpected PrimitiveSize");
12959	case `8`:
12960	IntID = Intrinsic::ppc_lbarx;
12961	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12962	break;
12963	case `16`:
12964	IntID = Intrinsic::ppc_lharx;
12965	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12966	break;
12967	case `32`:
12968	IntID = Intrinsic::ppc_lwarx;
12969	break;
12970	case `64`:
12971	IntID = Intrinsic::ppc_ldarx;
12972	break;
12973	}
12974	Value *Call =
12975	Builder.CreateIntrinsic(ID: IntID, Args: Addr, /FMFSource=/nullptr, Name: "larx");
12976
12977	return Builder.CreateTruncOrBitCast(V: Call, DestTy: ValueTy);
12978	}
12979
12980	// Perform a store-conditional operation to Addr. Return the status of the
12981	// store. This should be 0 if the store succeeded, non-zero otherwise.
12982	Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
12983	Value Val, Value Addr,
12984	AtomicOrdering Ord) const {
12985	Type *Ty = Val->getType();
12986	unsigned SZ = Ty->getPrimitiveSizeInBits();
12987
12988	assert((SZ == `8` \|\| SZ == `16` \|\| SZ == `32` \|\| SZ == `64`) &&
12989	"Only 8/16/32/64-bit atomic loads supported");
12990	Intrinsic::ID IntID;
12991	switch (SZ) {
12992	default:
12993	llvm_unreachable("Unexpected PrimitiveSize");
12994	case `8`:
12995	IntID = Intrinsic::ppc_stbcx;
12996	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12997	break;
12998	case `16`:
12999	IntID = Intrinsic::ppc_sthcx;
13000	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13001	break;
13002	case `32`:
13003	IntID = Intrinsic::ppc_stwcx;
13004	break;
13005	case `64`:
13006	IntID = Intrinsic::ppc_stdcx;
13007	break;
13008	}
13009
13010	if (SZ == `8` \|\| SZ == `16`)
13011	Val = Builder.CreateZExt(V: Val, DestTy: Builder.getInt32Ty());
13012
13013	Value *Call = Builder.CreateIntrinsic(ID: IntID, Args: {Addr, Val},
13014	/FMFSource=/nullptr, Name: "stcx");
13015	return Builder.CreateXor(LHS: Call, RHS: Builder.getInt32(C: `1`));
13016	}
13017
13018	// The mappings for emitLeading/TrailingFence is taken from
13019	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13020	Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
13021	Instruction *Inst,
13022	AtomicOrdering Ord) const {
13023	if (Ord == AtomicOrdering::SequentiallyConsistent)
13024	return callIntrinsic(Builder, Id: Intrinsic::ppc_sync);
13025	if (isReleaseOrStronger(AO: Ord))
13026	return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
13027	return nullptr;
13028	}
13029
13030	Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
13031	Instruction *Inst,
13032	AtomicOrdering Ord) const {
13033	if (Inst->hasAtomicLoad() && isAcquireOrStronger(AO: Ord)) {
13034	// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13035	// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13036	// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13037	if (isa<LoadInst>(Val: Inst))
13038	return Builder.CreateIntrinsic(ID: Intrinsic::ppc_cfence, Types: {Inst->getType()},
13039	Args: {Inst});
13040	// FIXME: Can use isync for rmw operation.
13041	return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
13042	}
13043	return nullptr;
13044	}
13045
13046	MachineBasicBlock *
13047	PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
13048	unsigned AtomicSize,
13049	unsigned BinOpcode,
13050	unsigned CmpOpcode,
13051	unsigned CmpPred) const {
13052	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13053	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13054
13055	auto LoadMnemonic = PPC::LDARX;
13056	auto StoreMnemonic = PPC::STDCX;
13057	switch (AtomicSize) {
13058	default:
13059	llvm_unreachable("Unexpected size of atomic entity");
13060	case `1`:
13061	LoadMnemonic = PPC::LBARX;
13062	StoreMnemonic = PPC::STBCX;
13063	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13064	break;
13065	case `2`:
13066	LoadMnemonic = PPC::LHARX;
13067	StoreMnemonic = PPC::STHCX;
13068	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13069	break;
13070	case `4`:
13071	LoadMnemonic = PPC::LWARX;
13072	StoreMnemonic = PPC::STWCX;
13073	break;
13074	case `8`:
13075	LoadMnemonic = PPC::LDARX;
13076	StoreMnemonic = PPC::STDCX;
13077	break;
13078	}
13079
13080	const BasicBlock *LLVM_BB = BB->getBasicBlock();
13081	MachineFunction *F = BB->getParent();
13082	MachineFunction::iterator It = ++BB->getIterator();
13083
13084	Register dest = MI.getOperand(i: `0`).getReg();
13085	Register ptrA = MI.getOperand(i: `1`).getReg();
13086	Register ptrB = MI.getOperand(i: `2`).getReg();
13087	Register incr = MI.getOperand(i: `3`).getReg();
13088	DebugLoc dl = MI.getDebugLoc();
13089
13090	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13091	MachineBasicBlock *loop2MBB =
13092	CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13093	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13094	F->insert(MBBI: It, MBB: loopMBB);
13095	if (CmpOpcode)
13096	F->insert(MBBI: It, MBB: loop2MBB);
13097	F->insert(MBBI: It, MBB: exitMBB);
13098	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13099	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13100	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13101
13102	MachineRegisterInfo &RegInfo = F->getRegInfo();
13103	Register TmpReg = (!BinOpcode) ? incr :
13104	RegInfo.createVirtualRegister( RegClass: AtomicSize == `8` ? &PPC::G8RCRegClass
13105	: &PPC::GPRCRegClass);
13106
13107	// thisMBB:
13108	// ...
13109	// fallthrough --> loopMBB
13110	BB->addSuccessor(Succ: loopMBB);
13111
13112	// loopMBB:
13113	// l[wd]arx dest, ptr
13114	// add r0, dest, incr
13115	// st[wd]cx. r0, ptr
13116	// bne- loopMBB
13117	// fallthrough --> exitMBB
13118
13119	// For max/min...
13120	// loopMBB:
13121	// l[wd]arx dest, ptr
13122	// cmpl?[wd] dest, incr
13123	// bgt exitMBB
13124	// loop2MBB:
13125	// st[wd]cx. dest, ptr
13126	// bne- loopMBB
13127	// fallthrough --> exitMBB
13128
13129	BB = loopMBB;
13130	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest)
13131	.addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13132	if (BinOpcode)
13133	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg).addReg(RegNo: incr).addReg(RegNo: dest);
13134	if (CmpOpcode) {
13135	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13136	// Signed comparisons of byte or halfword values must be sign-extended.
13137	if (CmpOpcode == PPC::CMPW && AtomicSize < `4`) {
13138	Register ExtReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13139	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: AtomicSize == `1` ? PPC::EXTSB : PPC::EXTSH),
13140	DestReg: ExtReg).addReg(RegNo: dest);
13141	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ExtReg).addReg(RegNo: incr);
13142	} else
13143	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: dest).addReg(RegNo: incr);
13144
13145	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13146	.addImm(Val: CmpPred)
13147	.addReg(RegNo: CrReg)
13148	.addMBB(MBB: exitMBB);
13149	BB->addSuccessor(Succ: loop2MBB);
13150	BB->addSuccessor(Succ: exitMBB);
13151	BB = loop2MBB;
13152	}
13153	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
13154	.addReg(RegNo: TmpReg).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13155	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13156	.addImm(Val: PPC::PRED_NE_MINUS)
13157	.addReg(RegNo: PPC::CR0)
13158	.addMBB(MBB: loopMBB);
13159	BB->addSuccessor(Succ: loopMBB);
13160	BB->addSuccessor(Succ: exitMBB);
13161
13162	// exitMBB:
13163	// ...
13164	BB = exitMBB;
13165	return BB;
13166	}
13167
13168	static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
13169	switch(MI.getOpcode()) {
13170	default:
13171	return false;
13172	case PPC::COPY:
13173	return TII->isSignExtended(Reg: MI.getOperand(i: `1`).getReg(),
13174	MRI: &MI.getMF()->getRegInfo());
13175	case PPC::LHA:
13176	case PPC::LHA8:
13177	case PPC::LHAU:
13178	case PPC::LHAU8:
13179	case PPC::LHAUX:
13180	case PPC::LHAUX8:
13181	case PPC::LHAX:
13182	case PPC::LHAX8:
13183	case PPC::LWA:
13184	case PPC::LWAUX:
13185	case PPC::LWAX:
13186	case PPC::LWAX_32:
13187	case PPC::LWA_32:
13188	case PPC::PLHA:
13189	case PPC::PLHA8:
13190	case PPC::PLHA8pc:
13191	case PPC::PLHApc:
13192	case PPC::PLWA:
13193	case PPC::PLWA8:
13194	case PPC::PLWA8pc:
13195	case PPC::PLWApc:
13196	case PPC::EXTSB:
13197	case PPC::EXTSB8:
13198	case PPC::EXTSB8_32_64:
13199	case PPC::EXTSB8_rec:
13200	case PPC::EXTSB_rec:
13201	case PPC::EXTSH:
13202	case PPC::EXTSH8:
13203	case PPC::EXTSH8_32_64:
13204	case PPC::EXTSH8_rec:
13205	case PPC::EXTSH_rec:
13206	case PPC::EXTSW:
13207	case PPC::EXTSWSLI:
13208	case PPC::EXTSWSLI_32_64:
13209	case PPC::EXTSWSLI_32_64_rec:
13210	case PPC::EXTSWSLI_rec:
13211	case PPC::EXTSW_32:
13212	case PPC::EXTSW_32_64:
13213	case PPC::EXTSW_32_64_rec:
13214	case PPC::EXTSW_rec:
13215	case PPC::SRAW:
13216	case PPC::SRAWI:
13217	case PPC::SRAWI_rec:
13218	case PPC::SRAW_rec:
13219	return true;
13220	}
13221	return false;
13222	}
13223
13224	MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
13225	MachineInstr &MI, MachineBasicBlock *BB,
13226	bool is8bit, // operation
13227	unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13228	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13229	const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13230
13231	// If this is a signed comparison and the value being compared is not known
13232	// to be sign extended, sign extend it here.
13233	DebugLoc dl = MI.getDebugLoc();
13234	MachineFunction *F = BB->getParent();
13235	MachineRegisterInfo &RegInfo = F->getRegInfo();
13236	Register incr = MI.getOperand(i: `3`).getReg();
13237	bool IsSignExtended =
13238	incr.isVirtual() && isSignExtended(MI&: *RegInfo.getVRegDef(Reg: incr), TII);
13239
13240	if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13241	Register ValueReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13242	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueReg)
13243	.addReg(RegNo: MI.getOperand(i: `3`).getReg());
13244	MI.getOperand(i: `3`).setReg(ValueReg);
13245	incr = ValueReg;
13246	}
13247	// If we support part-word atomic mnemonics, just use them
13248	if (Subtarget.hasPartwordAtomics())
13249	return EmitAtomicBinary(MI, BB, AtomicSize: is8bit ? `1` : `2`, BinOpcode, CmpOpcode,
13250	CmpPred);
13251
13252	// In 64 bit mode we have to use 64 bits for addresses, even though the
13253	// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13254	// registers without caring whether they're 32 or 64, but here we're
13255	// doing actual arithmetic on the addresses.
13256	bool is64bit = Subtarget.isPPC64();
13257	bool isLittleEndian = Subtarget.isLittleEndian();
13258	unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13259
13260	const BasicBlock *LLVM_BB = BB->getBasicBlock();
13261	MachineFunction::iterator It = ++BB->getIterator();
13262
13263	Register dest = MI.getOperand(i: `0`).getReg();
13264	Register ptrA = MI.getOperand(i: `1`).getReg();
13265	Register ptrB = MI.getOperand(i: `2`).getReg();
13266
13267	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13268	MachineBasicBlock *loop2MBB =
13269	CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13270	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13271	F->insert(MBBI: It, MBB: loopMBB);
13272	if (CmpOpcode)
13273	F->insert(MBBI: It, MBB: loop2MBB);
13274	F->insert(MBBI: It, MBB: exitMBB);
13275	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13276	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13277	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13278
13279	const TargetRegisterClass *RC =
13280	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13281	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13282
13283	Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
13284	Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13285	Register ShiftReg =
13286	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13287	Register Incr2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13288	Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13289	Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13290	Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13291	Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13292	Register Tmp3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13293	Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13294	Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13295	Register SrwDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13296	Register Ptr1Reg;
13297	Register TmpReg =
13298	(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13299
13300	// thisMBB:
13301	// ...
13302	// fallthrough --> loopMBB
13303	BB->addSuccessor(Succ: loopMBB);
13304
13305	// The 4-byte load must be aligned, while a char or short may be
13306	// anywhere in the word. Hence all this nasty bookkeeping code.
13307	// add ptr1, ptrA, ptrB [copy if ptrA==0]
13308	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13309	// xori shift, shift1, 24 [16]
13310	// rlwinm ptr, ptr1, 0, 0, 29
13311	// slw incr2, incr, shift
13312	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13313	// slw mask, mask2, shift
13314	// loopMBB:
13315	// lwarx tmpDest, ptr
13316	// add tmp, tmpDest, incr2
13317	// andc tmp2, tmpDest, mask
13318	// and tmp3, tmp, mask
13319	// or tmp4, tmp3, tmp2
13320	// stwcx. tmp4, ptr
13321	// bne- loopMBB
13322	// fallthrough --> exitMBB
13323	// srw SrwDest, tmpDest, shift
13324	// rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13325	if (ptrA != ZeroReg) {
13326	Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
13327	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
13328	.addReg(RegNo: ptrA)
13329	.addReg(RegNo: ptrB);
13330	} else {
13331	Ptr1Reg = ptrB;
13332	}
13333	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
13334	// mode.
13335	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
13336	.addReg(RegNo: Ptr1Reg, Flags: {}, SubReg: is64bit ? PPC::sub_32 : `0`)
13337	.addImm(Val: `3`)
13338	.addImm(Val: `27`)
13339	.addImm(Val: is8bit ? `28` : `27`);
13340	if (!isLittleEndian)
13341	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
13342	.addReg(RegNo: Shift1Reg)
13343	.addImm(Val: is8bit ? `24` : `16`);
13344	if (is64bit)
13345	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
13346	.addReg(RegNo: Ptr1Reg)
13347	.addImm(Val: `0`)
13348	.addImm(Val: `61`);
13349	else
13350	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
13351	.addReg(RegNo: Ptr1Reg)
13352	.addImm(Val: `0`)
13353	.addImm(Val: `0`)
13354	.addImm(Val: `29`);
13355	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: Incr2Reg).addReg(RegNo: incr).addReg(RegNo: ShiftReg);
13356	if (is8bit)
13357	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: `255`);
13358	else {
13359	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: `0`);
13360	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
13361	.addReg(RegNo: Mask3Reg)
13362	.addImm(Val: `65535`);
13363	}
13364	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
13365	.addReg(RegNo: Mask2Reg)
13366	.addReg(RegNo: ShiftReg);
13367
13368	BB = loopMBB;
13369	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
13370	.addReg(RegNo: ZeroReg)
13371	.addReg(RegNo: PtrReg);
13372	if (BinOpcode)
13373	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg)
13374	.addReg(RegNo: Incr2Reg)
13375	.addReg(RegNo: TmpDestReg);
13376	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
13377	.addReg(RegNo: TmpDestReg)
13378	.addReg(RegNo: MaskReg);
13379	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: Tmp3Reg).addReg(RegNo: TmpReg).addReg(RegNo: MaskReg);
13380	if (CmpOpcode) {
13381	// For unsigned comparisons, we can directly compare the shifted values.
13382	// For signed comparisons we shift and sign extend.
13383	Register SReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13384	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13385	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: SReg)
13386	.addReg(RegNo: TmpDestReg)
13387	.addReg(RegNo: MaskReg);
13388	unsigned ValueReg = SReg;
13389	unsigned CmpReg = Incr2Reg;
13390	if (CmpOpcode == PPC::CMPW) {
13391	ValueReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13392	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: ValueReg)
13393	.addReg(RegNo: SReg)
13394	.addReg(RegNo: ShiftReg);
13395	Register ValueSReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13396	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueSReg)
13397	.addReg(RegNo: ValueReg);
13398	ValueReg = ValueSReg;
13399	CmpReg = incr;
13400	}
13401	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ValueReg).addReg(RegNo: CmpReg);
13402	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13403	.addImm(Val: CmpPred)
13404	.addReg(RegNo: CrReg)
13405	.addMBB(MBB: exitMBB);
13406	BB->addSuccessor(Succ: loop2MBB);
13407	BB->addSuccessor(Succ: exitMBB);
13408	BB = loop2MBB;
13409	}
13410	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg).addReg(RegNo: Tmp3Reg).addReg(RegNo: Tmp2Reg);
13411	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
13412	.addReg(RegNo: Tmp4Reg)
13413	.addReg(RegNo: ZeroReg)
13414	.addReg(RegNo: PtrReg);
13415	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13416	.addImm(Val: PPC::PRED_NE_MINUS)
13417	.addReg(RegNo: PPC::CR0)
13418	.addMBB(MBB: loopMBB);
13419	BB->addSuccessor(Succ: loopMBB);
13420	BB->addSuccessor(Succ: exitMBB);
13421
13422	// exitMBB:
13423	// ...
13424	BB = exitMBB;
13425	// Since the shift amount is not a constant, we need to clear
13426	// the upper bits with a separate RLWINM.
13427	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: dest)
13428	.addReg(RegNo: SrwDestReg)
13429	.addImm(Val: `0`)
13430	.addImm(Val: is8bit ? `24` : `16`)
13431	.addImm(Val: `31`);
13432	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: SrwDestReg)
13433	.addReg(RegNo: TmpDestReg)
13434	.addReg(RegNo: ShiftReg);
13435	return BB;
13436	}
13437
13438	llvm::MachineBasicBlock *
13439	PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
13440	MachineBasicBlock MBB) const* {
13441	DebugLoc DL = MI.getDebugLoc();
13442	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13443	const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13444
13445	MachineFunction *MF = MBB->getParent();
13446	MachineRegisterInfo &MRI = MF->getRegInfo();
13447
13448	const BasicBlock *BB = MBB->getBasicBlock();
13449	MachineFunction::iterator I = ++MBB->getIterator();
13450
13451	Register DstReg = MI.getOperand(i: `0`).getReg();
13452	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
13453	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13454	Register mainDstReg = MRI.createVirtualRegister(RegClass: RC);
13455	Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC);
13456
13457	MVT PVT = getPointerTy(DL: MF->getDataLayout());
13458	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
13459	"Invalid Pointer Size!");
13460	// For v = setjmp(buf), we generate
13461	//
13462	// thisMBB:
13463	// SjLjSetup mainMBB
13464	// bl mainMBB
13465	// v_restore = 1
13466	// b sinkMBB
13467	//
13468	// mainMBB:
13469	// buf[LabelOffset] = LR
13470	// v_main = 0
13471	//
13472	// sinkMBB:
13473	// v = phi(main, restore)
13474	//
13475
13476	MachineBasicBlock *thisMBB = MBB;
13477	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13478	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13479	MF->insert(MBBI: I, MBB: mainMBB);
13480	MF->insert(MBBI: I, MBB: sinkMBB);
13481
13482	MachineInstrBuilder MIB;
13483
13484	// Transfer the remainder of BB and its successor edges to sinkMBB.
13485	sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB,
13486	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
13487	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13488
13489	// Note that the structure of the jmp_buf used here is not compatible
13490	// with that used by libc, and is not designed to be. Specifically, it
13491	// stores only those 'reserved' registers that LLVM does not otherwise
13492	// understand how to spill. Also, by convention, by the time this
13493	// intrinsic is called, Clang has already stored the frame address in the
13494	// first slot of the buffer and stack address in the third. Following the
13495	// X86 target code, we'll store the jump address in the second slot. We also
13496	// need to save the TOC pointer (R2) to handle jumps between shared
13497	// libraries, and that will be stored in the fourth slot. The thread
13498	// identifier (R13) is not affected.
13499
13500	// thisMBB:
13501	const int64_t LabelOffset = `1` * PVT.getStoreSize();
13502	const int64_t TOCOffset = `3` * PVT.getStoreSize();
13503	const int64_t BPOffset = `4` * PVT.getStoreSize();
13504
13505	// Prepare IP either in reg.
13506	const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
13507	Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
13508	Register BufReg = MI.getOperand(i: `1`).getReg();
13509
13510	if (Subtarget.is64BitELFABI()) {
13511	setUsesTOCBasePtr(*MBB->getParent());
13512	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13513	.addReg(RegNo: PPC::X2)
13514	.addImm(Val: TOCOffset)
13515	.addReg(RegNo: BufReg)
13516	.cloneMemRefs(OtherMI: MI);
13517	}
13518
13519	// Naked functions never have a base pointer, and so we use r1. For all
13520	// other functions, this decision must be delayed until during PEI.
13521	unsigned BaseReg;
13522	if (MF->getFunction().hasFnAttribute(Kind: Attribute::Naked))
13523	BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13524	else
13525	BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13526
13527	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL,
13528	MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13529	.addReg(RegNo: BaseReg)
13530	.addImm(Val: BPOffset)
13531	.addReg(RegNo: BufReg)
13532	.cloneMemRefs(OtherMI: MI);
13533
13534	// Setup
13535	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::BCLalways)).addMBB(MBB: mainMBB);
13536	MIB.addRegMask(Mask: TRI->getNoPreservedMask());
13537
13538	BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: restoreDstReg).addImm(Val: `1`);
13539
13540	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::EH_SjLj_Setup))
13541	.addMBB(MBB: mainMBB);
13542	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: sinkMBB);
13543
13544	thisMBB->addSuccessor(Succ: mainMBB, Prob: BranchProbability::getZero());
13545	thisMBB->addSuccessor(Succ: sinkMBB, Prob: BranchProbability::getOne());
13546
13547	// mainMBB:
13548	// mainDstReg = 0
13549	MIB =
13550	BuildMI(BB: mainMBB, MIMD: DL,
13551	MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), DestReg: LabelReg);
13552
13553	// Store IP
13554	if (Subtarget.isPPC64()) {
13555	MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13556	.addReg(RegNo: LabelReg)
13557	.addImm(Val: LabelOffset)
13558	.addReg(RegNo: BufReg);
13559	} else {
13560	MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STW))
13561	.addReg(RegNo: LabelReg)
13562	.addImm(Val: LabelOffset)
13563	.addReg(RegNo: BufReg);
13564	}
13565	MIB.cloneMemRefs(OtherMI: MI);
13566
13567	BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: mainDstReg).addImm(Val: `0`);
13568	mainMBB->addSuccessor(Succ: sinkMBB);
13569
13570	// sinkMBB:
13571	BuildMI(BB&: *sinkMBB, I: sinkMBB->begin(), MIMD: DL,
13572	MCID: TII->get(Opcode: PPC::PHI), DestReg: DstReg)
13573	.addReg(RegNo: mainDstReg).addMBB(MBB: mainMBB)
13574	.addReg(RegNo: restoreDstReg).addMBB(MBB: thisMBB);
13575
13576	MI.eraseFromParent();
13577	return sinkMBB;
13578	}
13579
13580	MachineBasicBlock *
13581	PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
13582	MachineBasicBlock MBB) const* {
13583	DebugLoc DL = MI.getDebugLoc();
13584	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13585
13586	MachineFunction *MF = MBB->getParent();
13587	MachineRegisterInfo &MRI = MF->getRegInfo();
13588
13589	MVT PVT = getPointerTy(DL: MF->getDataLayout());
13590	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
13591	"Invalid Pointer Size!");
13592
13593	const TargetRegisterClass *RC =
13594	(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13595	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
13596	// Since FP is only updated here but NOT referenced, it's treated as GPR.
13597	unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13598	unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13599	unsigned BP =
13600	(PVT == MVT::i64)
13601	? PPC::X30
13602	: (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13603	: PPC::R30);
13604
13605	MachineInstrBuilder MIB;
13606
13607	const int64_t LabelOffset = `1` * PVT.getStoreSize();
13608	const int64_t SPOffset = `2` * PVT.getStoreSize();
13609	const int64_t TOCOffset = `3` * PVT.getStoreSize();
13610	const int64_t BPOffset = `4` * PVT.getStoreSize();
13611
13612	Register BufReg = MI.getOperand(i: `0`).getReg();
13613
13614	// Reload FP (the jumped-to function may not have had a
13615	// frame pointer, and if so, then its r31 will be restored
13616	// as necessary).
13617	if (PVT == MVT::i64) {
13618	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: FP)
13619	.addImm(Val: `0`)
13620	.addReg(RegNo: BufReg);
13621	} else {
13622	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: FP)
13623	.addImm(Val: `0`)
13624	.addReg(RegNo: BufReg);
13625	}
13626	MIB.cloneMemRefs(OtherMI: MI);
13627
13628	// Reload IP
13629	if (PVT == MVT::i64) {
13630	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: Tmp)
13631	.addImm(Val: LabelOffset)
13632	.addReg(RegNo: BufReg);
13633	} else {
13634	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: Tmp)
13635	.addImm(Val: LabelOffset)
13636	.addReg(RegNo: BufReg);
13637	}
13638	MIB.cloneMemRefs(OtherMI: MI);
13639
13640	// Reload SP
13641	if (PVT == MVT::i64) {
13642	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: SP)
13643	.addImm(Val: SPOffset)
13644	.addReg(RegNo: BufReg);
13645	} else {
13646	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: SP)
13647	.addImm(Val: SPOffset)
13648	.addReg(RegNo: BufReg);
13649	}
13650	MIB.cloneMemRefs(OtherMI: MI);
13651
13652	// Reload BP
13653	if (PVT == MVT::i64) {
13654	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: BP)
13655	.addImm(Val: BPOffset)
13656	.addReg(RegNo: BufReg);
13657	} else {
13658	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: BP)
13659	.addImm(Val: BPOffset)
13660	.addReg(RegNo: BufReg);
13661	}
13662	MIB.cloneMemRefs(OtherMI: MI);
13663
13664	// Reload TOC
13665	if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13666	setUsesTOCBasePtr(*MBB->getParent());
13667	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: PPC::X2)
13668	.addImm(Val: TOCOffset)
13669	.addReg(RegNo: BufReg)
13670	.cloneMemRefs(OtherMI: MI);
13671	}
13672
13673	// Jump
13674	BuildMI(BB&: *MBB, I&: MI, MIMD: DL,
13675	MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(RegNo: Tmp);
13676	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13677
13678	MI.eraseFromParent();
13679	return MBB;
13680	}
13681
13682	bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
13683	// If the function specifically requests inline stack probes, emit them.
13684	if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
13685	return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
13686	"inline-asm";
13687	return false;
13688	}
13689
13690	unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
13691	const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13692	unsigned StackAlign = TFI->getStackAlignment();
13693	assert(StackAlign >= `1` && isPowerOf2_32(StackAlign) &&
13694	"Unexpected stack alignment");
13695	// The default stack probe size is 4096 if the function has no
13696	// stack-probe-size attribute.
13697	const Function &Fn = MF.getFunction();
13698	unsigned StackProbeSize =
13699	Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: `4096`);
13700	// Round down to the stack alignment.
13701	StackProbeSize &= ~(StackAlign - `1`);
13702	return StackProbeSize ? StackProbeSize : StackAlign;
13703	}
13704
13705	// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13706	// into three phases. In the first phase, it uses pseudo instruction
13707	// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13708	// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13709	// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13710	// MaxCallFrameSize so that it can calculate correct data area pointer.
13711	MachineBasicBlock *
13712	PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
13713	MachineBasicBlock MBB) const* {
13714	const bool isPPC64 = Subtarget.isPPC64();
13715	MachineFunction *MF = MBB->getParent();
13716	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13717	DebugLoc DL = MI.getDebugLoc();
13718	const unsigned ProbeSize = getStackProbeSize(MF: *MF);
13719	const BasicBlock *ProbedBB = MBB->getBasicBlock();
13720	MachineRegisterInfo &MRI = MF->getRegInfo();
13721	// The CFG of probing stack looks as
13722	// +-----+
13723	// \| MBB \|
13724	// +--+--+
13725	// \|
13726	// +----v----+
13727	// +--->+ TestMBB +---+
13728	// \| +----+----+ \|
13729	// \| \| \|
13730	// \| +-----v----+ \|
13731	// +---+ BlockMBB \| \|
13732	// +----------+ \|
13733	// \|
13734	// +---------+ \|
13735	// \| TailMBB +<--+
13736	// +---------+
13737	// In MBB, calculate previous frame pointer and final stack pointer.
13738	// In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13739	// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13740	// TailMBB is spliced via \p MI.
13741	MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13742	MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13743	MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13744
13745	MachineFunction::iterator MBBIter = ++MBB->getIterator();
13746	MF->insert(MBBI: MBBIter, MBB: TestMBB);
13747	MF->insert(MBBI: MBBIter, MBB: BlockMBB);
13748	MF->insert(MBBI: MBBIter, MBB: TailMBB);
13749
13750	const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13751	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13752
13753	Register DstReg = MI.getOperand(i: `0`).getReg();
13754	Register NegSizeReg = MI.getOperand(i: `1`).getReg();
13755	Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13756	Register FinalStackPtr = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13757	Register FramePointer = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13758	Register ActualNegSizeReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13759
13760	// Since value of NegSizeReg might be realigned in prologepilog, insert a
13761	// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13762	// NegSize.
13763	unsigned ProbeOpc;
13764	if (!MRI.hasOneNonDBGUse(RegNo: NegSizeReg))
13765	ProbeOpc =
13766	isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13767	else
13768	// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13769	// and NegSizeReg will be allocated in the same phyreg to avoid
13770	// redundant copy when NegSizeReg has only one use which is current MI and
13771	// will be replaced by PREPARE_PROBED_ALLOCA then.
13772	ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13773	: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13774	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: ProbeOpc), DestReg: FramePointer)
13775	.addDef(RegNo: ActualNegSizeReg)
13776	.addReg(RegNo: NegSizeReg)
13777	.add(MO: MI.getOperand(i: `2`))
13778	.add(MO: MI.getOperand(i: `3`));
13779
13780	// Calculate final stack pointer, which equals to SP + ActualNegSize.
13781	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4),
13782	DestReg: FinalStackPtr)
13783	.addReg(RegNo: SPReg)
13784	.addReg(RegNo: ActualNegSizeReg);
13785
13786	// Materialize a scratch register for update.
13787	int64_t NegProbeSize = -(int64_t)ProbeSize;
13788	assert(isInt<`32`>(NegProbeSize) && "Unhandled probe size!");
13789	Register ScratchReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13790	if (!isInt<`16`>(x: NegProbeSize)) {
13791	Register TempReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13792	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg)
13793	.addImm(Val: NegProbeSize >> `16`);
13794	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI),
13795	DestReg: ScratchReg)
13796	.addReg(RegNo: TempReg)
13797	.addImm(Val: NegProbeSize & `0xFFFF`);
13798	} else
13799	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: ScratchReg)
13800	.addImm(Val: NegProbeSize);
13801
13802	{
13803	// Probing leading residual part.
13804	Register Div = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13805	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::DIVD : PPC::DIVW), DestReg: Div)
13806	.addReg(RegNo: ActualNegSizeReg)
13807	.addReg(RegNo: ScratchReg);
13808	Register Mul = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13809	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::MULLD : PPC::MULLW), DestReg: Mul)
13810	.addReg(RegNo: Div)
13811	.addReg(RegNo: ScratchReg);
13812	Register NegMod = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13813	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: NegMod)
13814	.addReg(RegNo: Mul)
13815	.addReg(RegNo: ActualNegSizeReg);
13816	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13817	.addReg(RegNo: FramePointer)
13818	.addReg(RegNo: SPReg)
13819	.addReg(RegNo: NegMod);
13820	}
13821
13822	{
13823	// Remaining part should be multiple of ProbeSize.
13824	Register CmpResult = MRI.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13825	BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::CMPD : PPC::CMPW), DestReg: CmpResult)
13826	.addReg(RegNo: SPReg)
13827	.addReg(RegNo: FinalStackPtr);
13828	BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
13829	.addImm(Val: PPC::PRED_EQ)
13830	.addReg(RegNo: CmpResult)
13831	.addMBB(MBB: TailMBB);
13832	TestMBB->addSuccessor(Succ: BlockMBB);
13833	TestMBB->addSuccessor(Succ: TailMBB);
13834	}
13835
13836	{
13837	// Touch the block.
13838	// \|P...\|P...\|P...
13839	BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13840	.addReg(RegNo: FramePointer)
13841	.addReg(RegNo: SPReg)
13842	.addReg(RegNo: ScratchReg);
13843	BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: TestMBB);
13844	BlockMBB->addSuccessor(Succ: TestMBB);
13845	}
13846
13847	// Calculation of MaxCallFrameSize is deferred to prologepilog, use
13848	// DYNAREAOFFSET pseudo instruction to get the future result.
13849	Register MaxCallFrameSizeReg =
13850	MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13851	BuildMI(BB: TailMBB, MIMD: DL,
13852	MCID: TII->get(Opcode: isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13853	DestReg: MaxCallFrameSizeReg)
13854	.add(MO: MI.getOperand(i: `2`))
13855	.add(MO: MI.getOperand(i: `3`));
13856	BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), DestReg: DstReg)
13857	.addReg(RegNo: SPReg)
13858	.addReg(RegNo: MaxCallFrameSizeReg);
13859
13860	// Splice instructions after MI to TailMBB.
13861	TailMBB->splice(Where: TailMBB->end(), Other: MBB,
13862	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
13863	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13864	MBB->addSuccessor(Succ: TestMBB);
13865
13866	// Delete the pseudo instruction.
13867	MI.eraseFromParent();
13868
13869	++NumDynamicAllocaProbed;
13870	return TailMBB;
13871	}
13872
13873	static bool IsSelectCC(MachineInstr &MI) {
13874	switch (MI.getOpcode()) {
13875	case PPC::SELECT_CC_I4:
13876	case PPC::SELECT_CC_I8:
13877	case PPC::SELECT_CC_F4:
13878	case PPC::SELECT_CC_F8:
13879	case PPC::SELECT_CC_F16:
13880	case PPC::SELECT_CC_VRRC:
13881	case PPC::SELECT_CC_VSFRC:
13882	case PPC::SELECT_CC_VSSRC:
13883	case PPC::SELECT_CC_VSRC:
13884	case PPC::SELECT_CC_SPE4:
13885	case PPC::SELECT_CC_SPE:
13886	return true;
13887	default:
13888	return false;
13889	}
13890	}
13891
13892	static bool IsSelect(MachineInstr &MI) {
13893	switch (MI.getOpcode()) {
13894	case PPC::SELECT_I4:
13895	case PPC::SELECT_I8:
13896	case PPC::SELECT_F4:
13897	case PPC::SELECT_F8:
13898	case PPC::SELECT_F16:
13899	case PPC::SELECT_SPE:
13900	case PPC::SELECT_SPE4:
13901	case PPC::SELECT_VRRC:
13902	case PPC::SELECT_VSFRC:
13903	case PPC::SELECT_VSSRC:
13904	case PPC::SELECT_VSRC:
13905	return true;
13906	default:
13907	return false;
13908	}
13909	}
13910
13911	MachineBasicBlock *
13912	PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
13913	MachineBasicBlock BB) const* {
13914	if (MI.getOpcode() == TargetOpcode::STACKMAP \|\|
13915	MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13916	if (Subtarget.is64BitELFABI() &&
13917	MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13918	!Subtarget.isUsingPCRelativeCalls()) {
13919	// Call lowering should have added an r2 operand to indicate a dependence
13920	// on the TOC base pointer value. It can't however, because there is no
13921	// way to mark the dependence as implicit there, and so the stackmap code
13922	// will confuse it with a regular operand. Instead, add the dependence
13923	// here.
13924	MI.addOperand(Op: MachineOperand::CreateReg(Reg: PPC::X2, isDef: false, isImp: true));
13925	}
13926
13927	return emitPatchPoint(MI, MBB: BB);
13928	}
13929
13930	if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 \|\|
13931	MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13932	return emitEHSjLjSetJmp(MI, MBB: BB);
13933	} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 \|\|
13934	MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13935	return emitEHSjLjLongJmp(MI, MBB: BB);
13936	}
13937
13938	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13939
13940	// To "insert" these instructions we actually have to insert their
13941	// control-flow patterns.
13942	const BasicBlock *LLVM_BB = BB->getBasicBlock();
13943	MachineFunction::iterator It = ++BB->getIterator();
13944
13945	MachineFunction *F = BB->getParent();
13946	MachineRegisterInfo &MRI = F->getRegInfo();
13947
13948	if (Subtarget.hasISEL() &&
13949	(MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
13950	MI.getOpcode() == PPC::SELECT_CC_I8 \|\|
13951	MI.getOpcode() == PPC::SELECT_I4 \|\| MI.getOpcode() == PPC::SELECT_I8)) {
13952	SmallVector<MachineOperand, `2`> Cond;
13953	if (MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
13954	MI.getOpcode() == PPC::SELECT_CC_I8)
13955	Cond.push_back(Elt: MI.getOperand(i: `4`));
13956	else
13957	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
13958	Cond.push_back(Elt: MI.getOperand(i: `1`));
13959
13960	DebugLoc dl = MI.getDebugLoc();
13961	TII->insertSelect(MBB&: *BB, I: MI, DL: dl, DstReg: MI.getOperand(i: `0`).getReg(), Cond,
13962	TrueReg: MI.getOperand(i: `2`).getReg(), FalseReg: MI.getOperand(i: `3`).getReg());
13963	} else if (IsSelectCC(MI) \|\| IsSelect(MI)) {
13964	// The incoming instruction knows the destination vreg to set, the
13965	// condition code register to branch on, the true/false values to
13966	// select between, and a branch opcode to use.
13967
13968	// thisMBB:
13969	// ...
13970	// TrueVal = ...
13971	// cmpTY ccX, r1, r2
13972	// bCC sinkMBB
13973	// fallthrough --> copy0MBB
13974	MachineBasicBlock *thisMBB = BB;
13975	MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13976	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13977	DebugLoc dl = MI.getDebugLoc();
13978	F->insert(MBBI: It, MBB: copy0MBB);
13979	F->insert(MBBI: It, MBB: sinkMBB);
13980
13981	if (isPhysRegUsedAfter(Reg: PPC::CARRY, MBI: MI.getIterator())) {
13982	copy0MBB->addLiveIn(PhysReg: PPC::CARRY);
13983	sinkMBB->addLiveIn(PhysReg: PPC::CARRY);
13984	}
13985
13986	// Set the call frame size on entry to the new basic blocks.
13987	// See https://reviews.llvm.org/D156113.
13988	unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13989	copy0MBB->setCallFrameSize(CallFrameSize);
13990	sinkMBB->setCallFrameSize(CallFrameSize);
13991
13992	// Transfer the remainder of BB and its successor edges to sinkMBB.
13993	sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13994	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13995	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13996
13997	// Next, add the true and fallthrough blocks as its successors.
13998	BB->addSuccessor(Succ: copy0MBB);
13999	BB->addSuccessor(Succ: sinkMBB);
14000
14001	if (IsSelect(MI)) {
14002	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BC))
14003	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
14004	.addMBB(MBB: sinkMBB);
14005	} else {
14006	unsigned SelectPred = MI.getOperand(i: `4`).getImm();
14007	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14008	.addImm(Val: SelectPred)
14009	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
14010	.addMBB(MBB: sinkMBB);
14011	}
14012
14013	// copy0MBB:
14014	// %FalseValue = ...
14015	// # fallthrough to sinkMBB
14016	BB = copy0MBB;
14017
14018	// Update machine-CFG edges
14019	BB->addSuccessor(Succ: sinkMBB);
14020
14021	// sinkMBB:
14022	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
14023	// ...
14024	BB = sinkMBB;
14025	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::PHI), DestReg: MI.getOperand(i: `0`).getReg())
14026	.addReg(RegNo: MI.getOperand(i: `3`).getReg())
14027	.addMBB(MBB: copy0MBB)
14028	.addReg(RegNo: MI.getOperand(i: `2`).getReg())
14029	.addMBB(MBB: thisMBB);
14030	} else if (MI.getOpcode() == PPC::ReadTB) {
14031	// To read the 64-bit time-base register on a 32-bit target, we read the
14032	// two halves. Should the counter have wrapped while it was being read, we
14033	// need to try again.
14034	// ...
14035	// readLoop:
14036	// mfspr Rx,TBU # load from TBU
14037	// mfspr Ry,TB # load from TB
14038	// mfspr Rz,TBU # load from TBU
14039	// cmpw crX,Rx,Rz # check if 'old'='new'
14040	// bne readLoop # branch if they're not equal
14041	// ...
14042
14043	MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14044	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14045	DebugLoc dl = MI.getDebugLoc();
14046	F->insert(MBBI: It, MBB: readMBB);
14047	F->insert(MBBI: It, MBB: sinkMBB);
14048
14049	// Transfer the remainder of BB and its successor edges to sinkMBB.
14050	sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
14051	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
14052	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14053
14054	BB->addSuccessor(Succ: readMBB);
14055	BB = readMBB;
14056
14057	MachineRegisterInfo &RegInfo = F->getRegInfo();
14058	Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
14059	Register LoReg = MI.getOperand(i: `0`).getReg();
14060	Register HiReg = MI.getOperand(i: `1`).getReg();
14061
14062	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: HiReg).addImm(Val: `269`);
14063	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: LoReg).addImm(Val: `268`);
14064	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: ReadAgainReg).addImm(Val: `269`);
14065
14066	Register CmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14067
14068	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CmpReg)
14069	.addReg(RegNo: HiReg)
14070	.addReg(RegNo: ReadAgainReg);
14071	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14072	.addImm(Val: PPC::PRED_NE)
14073	.addReg(RegNo: CmpReg)
14074	.addMBB(MBB: readMBB);
14075
14076	BB->addSuccessor(Succ: readMBB);
14077	BB->addSuccessor(Succ: sinkMBB);
14078	} else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14079	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::ADD4);
14080	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14081	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::ADD4);
14082	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14083	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::ADD4);
14084	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14085	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::ADD8);
14086
14087	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14088	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::AND);
14089	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14090	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::AND);
14091	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14092	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::AND);
14093	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14094	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::AND8);
14095
14096	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14097	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::OR);
14098	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14099	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::OR);
14100	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14101	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::OR);
14102	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14103	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::OR8);
14104
14105	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14106	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::XOR);
14107	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14108	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::XOR);
14109	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14110	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::XOR);
14111	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14112	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::XOR8);
14113
14114	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14115	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::NAND);
14116	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14117	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::NAND);
14118	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14119	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::NAND);
14120	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14121	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::NAND8);
14122
14123	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14124	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::SUBF);
14125	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14126	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::SUBF);
14127	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14128	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::SUBF);
14129	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14130	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::SUBF8);
14131
14132	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14133	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14134	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14135	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14136	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14137	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14138	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14139	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_LT);
14140
14141	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14142	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14143	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14144	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14145	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14146	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14147	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14148	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_GT);
14149
14150	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14151	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14152	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14153	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14154	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14155	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14156	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14157	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_LT);
14158
14159	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14160	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14161	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14162	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14163	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14164	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14165	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14166	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_GT);
14167
14168	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14169	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`);
14170	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14171	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`);
14172	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14173	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`);
14174	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14175	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`);
14176	else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 \|\|
14177	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 \|\|
14178	(Subtarget.hasPartwordAtomics() &&
14179	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) \|\|
14180	(Subtarget.hasPartwordAtomics() &&
14181	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14182	bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14183
14184	auto LoadMnemonic = PPC::LDARX;
14185	auto StoreMnemonic = PPC::STDCX;
14186	switch (MI.getOpcode()) {
14187	default:
14188	llvm_unreachable("Compare and swap of unknown size");
14189	case PPC::ATOMIC_CMP_SWAP_I8:
14190	LoadMnemonic = PPC::LBARX;
14191	StoreMnemonic = PPC::STBCX;
14192	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14193	break;
14194	case PPC::ATOMIC_CMP_SWAP_I16:
14195	LoadMnemonic = PPC::LHARX;
14196	StoreMnemonic = PPC::STHCX;
14197	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14198	break;
14199	case PPC::ATOMIC_CMP_SWAP_I32:
14200	LoadMnemonic = PPC::LWARX;
14201	StoreMnemonic = PPC::STWCX;
14202	break;
14203	case PPC::ATOMIC_CMP_SWAP_I64:
14204	LoadMnemonic = PPC::LDARX;
14205	StoreMnemonic = PPC::STDCX;
14206	break;
14207	}
14208	MachineRegisterInfo &RegInfo = F->getRegInfo();
14209	Register dest = MI.getOperand(i: `0`).getReg();
14210	Register ptrA = MI.getOperand(i: `1`).getReg();
14211	Register ptrB = MI.getOperand(i: `2`).getReg();
14212	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14213	Register oldval = MI.getOperand(i: `3`).getReg();
14214	Register newval = MI.getOperand(i: `4`).getReg();
14215	DebugLoc dl = MI.getDebugLoc();
14216
14217	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14218	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14219	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14220	F->insert(MBBI: It, MBB: loop1MBB);
14221	F->insert(MBBI: It, MBB: loop2MBB);
14222	F->insert(MBBI: It, MBB: exitMBB);
14223	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14224	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
14225	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14226
14227	// thisMBB:
14228	// ...
14229	// fallthrough --> loopMBB
14230	BB->addSuccessor(Succ: loop1MBB);
14231
14232	// loop1MBB:
14233	// l[bhwd]arx dest, ptr
14234	// cmp[wd] dest, oldval
14235	// bne- exitBB
14236	// loop2MBB:
14237	// st[bhwd]cx. newval, ptr
14238	// bne- loopMBB
14239	// b exitBB
14240	// exitBB:
14241	BB = loop1MBB;
14242	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
14243	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::CMPD : PPC::CMPW), DestReg: CrReg)
14244	.addReg(RegNo: dest)
14245	.addReg(RegNo: oldval);
14246	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14247	.addImm(Val: PPC::PRED_NE_MINUS)
14248	.addReg(RegNo: CrReg)
14249	.addMBB(MBB: exitMBB);
14250	BB->addSuccessor(Succ: loop2MBB);
14251	BB->addSuccessor(Succ: exitMBB);
14252
14253	BB = loop2MBB;
14254	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
14255	.addReg(RegNo: newval)
14256	.addReg(RegNo: ptrA)
14257	.addReg(RegNo: ptrB);
14258	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14259	.addImm(Val: PPC::PRED_NE_MINUS)
14260	.addReg(RegNo: PPC::CR0)
14261	.addMBB(MBB: loop1MBB);
14262	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14263	BB->addSuccessor(Succ: loop1MBB);
14264	BB->addSuccessor(Succ: exitMBB);
14265
14266	// exitMBB:
14267	// ...
14268	BB = exitMBB;
14269	} else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 \|\|
14270	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14271	// We must use 64-bit registers for addresses when targeting 64-bit,
14272	// since we're actually doing arithmetic on them. Other registers
14273	// can be 32-bit.
14274	bool is64bit = Subtarget.isPPC64();
14275	bool isLittleEndian = Subtarget.isLittleEndian();
14276	bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14277
14278	Register dest = MI.getOperand(i: `0`).getReg();
14279	Register ptrA = MI.getOperand(i: `1`).getReg();
14280	Register ptrB = MI.getOperand(i: `2`).getReg();
14281	Register oldval = MI.getOperand(i: `3`).getReg();
14282	Register newval = MI.getOperand(i: `4`).getReg();
14283	DebugLoc dl = MI.getDebugLoc();
14284
14285	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14286	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14287	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14288	F->insert(MBBI: It, MBB: loop1MBB);
14289	F->insert(MBBI: It, MBB: loop2MBB);
14290	F->insert(MBBI: It, MBB: exitMBB);
14291	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14292	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
14293	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14294
14295	MachineRegisterInfo &RegInfo = F->getRegInfo();
14296	const TargetRegisterClass *RC =
14297	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14298	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14299
14300	Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
14301	Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14302	Register ShiftReg =
14303	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
14304	Register NewVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14305	Register NewVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14306	Register OldVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14307	Register OldVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14308	Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14309	Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14310	Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14311	Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14312	Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14313	Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14314	Register Ptr1Reg;
14315	Register TmpReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14316	Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14317	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14318	// thisMBB:
14319	// ...
14320	// fallthrough --> loopMBB
14321	BB->addSuccessor(Succ: loop1MBB);
14322
14323	// The 4-byte load must be aligned, while a char or short may be
14324	// anywhere in the word. Hence all this nasty bookkeeping code.
14325	// add ptr1, ptrA, ptrB [copy if ptrA==0]
14326	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14327	// xori shift, shift1, 24 [16]
14328	// rlwinm ptr, ptr1, 0, 0, 29
14329	// slw newval2, newval, shift
14330	// slw oldval2, oldval,shift
14331	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14332	// slw mask, mask2, shift
14333	// and newval3, newval2, mask
14334	// and oldval3, oldval2, mask
14335	// loop1MBB:
14336	// lwarx tmpDest, ptr
14337	// and tmp, tmpDest, mask
14338	// cmpw tmp, oldval3
14339	// bne- exitBB
14340	// loop2MBB:
14341	// andc tmp2, tmpDest, mask
14342	// or tmp4, tmp2, newval3
14343	// stwcx. tmp4, ptr
14344	// bne- loop1MBB
14345	// b exitBB
14346	// exitBB:
14347	// srw dest, tmpDest, shift
14348	if (ptrA != ZeroReg) {
14349	Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
14350	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
14351	.addReg(RegNo: ptrA)
14352	.addReg(RegNo: ptrB);
14353	} else {
14354	Ptr1Reg = ptrB;
14355	}
14356
14357	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
14358	// mode.
14359	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
14360	.addReg(RegNo: Ptr1Reg, Flags: {}, SubReg: is64bit ? PPC::sub_32 : `0`)
14361	.addImm(Val: `3`)
14362	.addImm(Val: `27`)
14363	.addImm(Val: is8bit ? `28` : `27`);
14364	if (!isLittleEndian)
14365	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
14366	.addReg(RegNo: Shift1Reg)
14367	.addImm(Val: is8bit ? `24` : `16`);
14368	if (is64bit)
14369	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
14370	.addReg(RegNo: Ptr1Reg)
14371	.addImm(Val: `0`)
14372	.addImm(Val: `61`);
14373	else
14374	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
14375	.addReg(RegNo: Ptr1Reg)
14376	.addImm(Val: `0`)
14377	.addImm(Val: `0`)
14378	.addImm(Val: `29`);
14379	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: NewVal2Reg)
14380	.addReg(RegNo: newval)
14381	.addReg(RegNo: ShiftReg);
14382	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: OldVal2Reg)
14383	.addReg(RegNo: oldval)
14384	.addReg(RegNo: ShiftReg);
14385	if (is8bit)
14386	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: `255`);
14387	else {
14388	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: `0`);
14389	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
14390	.addReg(RegNo: Mask3Reg)
14391	.addImm(Val: `65535`);
14392	}
14393	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
14394	.addReg(RegNo: Mask2Reg)
14395	.addReg(RegNo: ShiftReg);
14396	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: NewVal3Reg)
14397	.addReg(RegNo: NewVal2Reg)
14398	.addReg(RegNo: MaskReg);
14399	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: OldVal3Reg)
14400	.addReg(RegNo: OldVal2Reg)
14401	.addReg(RegNo: MaskReg);
14402
14403	BB = loop1MBB;
14404	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
14405	.addReg(RegNo: ZeroReg)
14406	.addReg(RegNo: PtrReg);
14407	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: TmpReg)
14408	.addReg(RegNo: TmpDestReg)
14409	.addReg(RegNo: MaskReg);
14410	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CrReg)
14411	.addReg(RegNo: TmpReg)
14412	.addReg(RegNo: OldVal3Reg);
14413	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14414	.addImm(Val: PPC::PRED_NE)
14415	.addReg(RegNo: CrReg)
14416	.addMBB(MBB: exitMBB);
14417	BB->addSuccessor(Succ: loop2MBB);
14418	BB->addSuccessor(Succ: exitMBB);
14419
14420	BB = loop2MBB;
14421	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
14422	.addReg(RegNo: TmpDestReg)
14423	.addReg(RegNo: MaskReg);
14424	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg)
14425	.addReg(RegNo: Tmp2Reg)
14426	.addReg(RegNo: NewVal3Reg);
14427	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
14428	.addReg(RegNo: Tmp4Reg)
14429	.addReg(RegNo: ZeroReg)
14430	.addReg(RegNo: PtrReg);
14431	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14432	.addImm(Val: PPC::PRED_NE)
14433	.addReg(RegNo: PPC::CR0)
14434	.addMBB(MBB: loop1MBB);
14435	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14436	BB->addSuccessor(Succ: loop1MBB);
14437	BB->addSuccessor(Succ: exitMBB);
14438
14439	// exitMBB:
14440	// ...
14441	BB = exitMBB;
14442	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: dest)
14443	.addReg(RegNo: TmpReg)
14444	.addReg(RegNo: ShiftReg);
14445	} else if (MI.getOpcode() == PPC::FADDrtz) {
14446	// This pseudo performs an FADD with rounding mode temporarily forced
14447	// to round-to-zero. We emit this via custom inserter since the FPSCR
14448	// is not modeled at the SelectionDAG level.
14449	Register Dest = MI.getOperand(i: `0`).getReg();
14450	Register Src1 = MI.getOperand(i: `1`).getReg();
14451	Register Src2 = MI.getOperand(i: `2`).getReg();
14452	DebugLoc dl = MI.getDebugLoc();
14453
14454	MachineRegisterInfo &RegInfo = F->getRegInfo();
14455	Register MFFSReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14456
14457	// Save FPSCR value.
14458	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: MFFSReg);
14459
14460	// Set rounding mode to round-to-zero.
14461	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB1))
14462	.addImm(Val: `31`)
14463	.addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14464
14465	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB0))
14466	.addImm(Val: `30`)
14467	.addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14468
14469	// Perform addition.
14470	auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::FADD), DestReg: Dest)
14471	.addReg(RegNo: Src1)
14472	.addReg(RegNo: Src2);
14473	if (MI.getFlag(Flag: MachineInstr::NoFPExcept))
14474	MIB.setMIFlag(MachineInstr::NoFPExcept);
14475
14476	// Restore FPSCR value.
14477	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSFb)).addImm(Val: `1`).addReg(RegNo: MFFSReg);
14478	} else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
14479	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT \|\|
14480	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
14481	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14482	unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
14483	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14484	? PPC::ANDI8_rec
14485	: PPC::ANDI_rec;
14486	bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
14487	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14488
14489	MachineRegisterInfo &RegInfo = F->getRegInfo();
14490	Register Dest = RegInfo.createVirtualRegister(
14491	RegClass: Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14492
14493	DebugLoc Dl = MI.getDebugLoc();
14494	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode), DestReg: Dest)
14495	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
14496	.addImm(Val: `1`);
14497	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14498	DestReg: MI.getOperand(i: `0`).getReg())
14499	.addReg(RegNo: IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14500	} else if (MI.getOpcode() == PPC::TCHECK_RET) {
14501	DebugLoc Dl = MI.getDebugLoc();
14502	MachineRegisterInfo &RegInfo = F->getRegInfo();
14503	Register CRReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14504	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TCHECK), DestReg: CRReg);
14505	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14506	DestReg: MI.getOperand(i: `0`).getReg())
14507	.addReg(RegNo: CRReg);
14508	} else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14509	DebugLoc Dl = MI.getDebugLoc();
14510	unsigned Imm = MI.getOperand(i: `1`).getImm();
14511	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TBEGIN)).addImm(Val: Imm);
14512	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14513	DestReg: MI.getOperand(i: `0`).getReg())
14514	.addReg(RegNo: PPC::CR0EQ);
14515	} else if (MI.getOpcode() == PPC::SETRNDi) {
14516	DebugLoc dl = MI.getDebugLoc();
14517	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
14518
14519	// Save FPSCR value.
14520	if (MRI.use_empty(RegNo: OldFPSCRReg))
14521	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14522	else
14523	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14524
14525	// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14526	// the following settings:
14527	// 00 Round to nearest
14528	// 01 Round to 0
14529	// 10 Round to +inf
14530	// 11 Round to -inf
14531
14532	// When the operand is immediate, using the two least significant bits of
14533	// the immediate to set the bits 62:63 of FPSCR.
14534	unsigned Mode = MI.getOperand(i: `1`).getImm();
14535	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & `1`) ? PPC::MTFSB1 : PPC::MTFSB0))
14536	.addImm(Val: `31`)
14537	.addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14538
14539	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & `2`) ? PPC::MTFSB1 : PPC::MTFSB0))
14540	.addImm(Val: `30`)
14541	.addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14542	} else if (MI.getOpcode() == PPC::SETRND) {
14543	DebugLoc dl = MI.getDebugLoc();
14544
14545	// Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14546	// or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14547	// If the target doesn't have DirectMove, we should use stack to do the
14548	// conversion, because the target doesn't have the instructions like mtvsrd
14549	// or mfvsrd to do this conversion directly.
14550	auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14551	if (Subtarget.hasDirectMove()) {
14552	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg)
14553	.addReg(RegNo: SrcReg);
14554	} else {
14555	// Use stack to do the register copy.
14556	unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14557	MachineRegisterInfo &RegInfo = F->getRegInfo();
14558	const TargetRegisterClass *RC = RegInfo.getRegClass(Reg: SrcReg);
14559	if (RC == &PPC::F8RCRegClass) {
14560	// Copy register from F8RCRegClass to G8RCRegclass.
14561	assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14562	"Unsupported RegClass.");
14563
14564	StoreOp = PPC::STFD;
14565	LoadOp = PPC::LD;
14566	} else {
14567	// Copy register from G8RCRegClass to F8RCRegclass.
14568	assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14569	(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14570	"Unsupported RegClass.");
14571	}
14572
14573	MachineFrameInfo &MFI = F->getFrameInfo();
14574	int FrameIdx = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
14575
14576	MachineMemOperand *MMOStore = F->getMachineMemOperand(
14577	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: `0`),
14578	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14579	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14580
14581	// Store the SrcReg into the stack.
14582	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: StoreOp))
14583	.addReg(RegNo: SrcReg)
14584	.addImm(Val: `0`)
14585	.addFrameIndex(Idx: FrameIdx)
14586	.addMemOperand(MMO: MMOStore);
14587
14588	MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14589	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: `0`),
14590	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14591	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14592
14593	// Load from the stack where SrcReg is stored, and save to DestReg,
14594	// so we have done the RegClass conversion from RegClass::SrcReg to
14595	// RegClass::DestReg.
14596	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: LoadOp), DestReg)
14597	.addImm(Val: `0`)
14598	.addFrameIndex(Idx: FrameIdx)
14599	.addMemOperand(MMO: MMOLoad);
14600	}
14601	};
14602
14603	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
14604
14605	// Save FPSCR value.
14606	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14607
14608	// When the operand is gprc register, use two least significant bits of the
14609	// register and mtfsf instruction to set the bits 62:63 of FPSCR.
14610	//
14611	// copy OldFPSCRTmpReg, OldFPSCRReg
14612	// (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14613	// rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14614	// copy NewFPSCRReg, NewFPSCRTmpReg
14615	// mtfsf 255, NewFPSCRReg
14616	MachineOperand SrcOp = MI.getOperand(i: `1`);
14617	MachineRegisterInfo &RegInfo = F->getRegInfo();
14618	Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14619
14620	copyRegFromG8RCOrF8RC (OldFPSCRTmpReg, OldFPSCRReg);
14621
14622	Register ImDefReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14623	Register ExtSrcReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14624
14625	// The first operand of INSERT_SUBREG should be a register which has
14626	// subregisters, we only care about its RegClass, so we should use an
14627	// IMPLICIT_DEF register.
14628	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImDefReg);
14629	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::INSERT_SUBREG), DestReg: ExtSrcReg)
14630	.addReg(RegNo: ImDefReg)
14631	.add(MO: SrcOp)
14632	.addImm(Val: `1`);
14633
14634	Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14635	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDIMI), DestReg: NewFPSCRTmpReg)
14636	.addReg(RegNo: OldFPSCRTmpReg)
14637	.addReg(RegNo: ExtSrcReg)
14638	.addImm(Val: `0`)
14639	.addImm(Val: `62`);
14640
14641	Register NewFPSCRReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14642	copyRegFromG8RCOrF8RC (NewFPSCRReg, NewFPSCRTmpReg);
14643
14644	// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14645	// bits of FPSCR.
14646	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSF))
14647	.addImm(Val: `255`)
14648	.addReg(RegNo: NewFPSCRReg)
14649	.addImm(Val: `0`)
14650	.addImm(Val: `0`);
14651	} else if (MI.getOpcode() == PPC::SETFLM) {
14652	DebugLoc Dl = MI.getDebugLoc();
14653
14654	// Result of setflm is previous FPSCR content, so we need to save it first.
14655	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
14656	if (MRI.use_empty(RegNo: OldFPSCRReg))
14657	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14658	else
14659	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14660
14661	// Put bits in 32:63 to FPSCR.
14662	Register NewFPSCRReg = MI.getOperand(i: `1`).getReg();
14663	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MTFSF))
14664	.addImm(Val: `255`)
14665	.addReg(RegNo: NewFPSCRReg)
14666	.addImm(Val: `0`)
14667	.addImm(Val: `0`);
14668	} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 \|\|
14669	MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14670	return emitProbedAlloca(MI, MBB: BB);
14671	} else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14672	DebugLoc DL = MI.getDebugLoc();
14673	Register Src = MI.getOperand(i: `2`).getReg();
14674	Register Lo = MI.getOperand(i: `0`).getReg();
14675	Register Hi = MI.getOperand(i: `1`).getReg();
14676	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14677	.addDef(RegNo: Lo)
14678	.addUse(RegNo: Src, Flags: {}, SubReg: PPC::sub_gp8_x1);
14679	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14680	.addDef(RegNo: Hi)
14681	.addUse(RegNo: Src, Flags: {}, SubReg: PPC::sub_gp8_x0);
14682	} else if (MI.getOpcode() == PPC::LQX_PSEUDO \|\|
14683	MI.getOpcode() == PPC::STQX_PSEUDO) {
14684	DebugLoc DL = MI.getDebugLoc();
14685	// Ptr is used as the ptr_rc_no_r0 part
14686	// of LQ/STQ's memory operand and adding result of RA and RB,
14687	// so it has to be g8rc_and_g8rc_nox0.
14688	Register Ptr =
14689	F->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
14690	Register Val = MI.getOperand(i: `0`).getReg();
14691	Register RA = MI.getOperand(i: `1`).getReg();
14692	Register RB = MI.getOperand(i: `2`).getReg();
14693	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::ADD8), DestReg: Ptr).addReg(RegNo: RA).addReg(RegNo: RB);
14694	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
14695	MCID: MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(Opcode: PPC::LQ)
14696	: TII->get(Opcode: PPC::STQ))
14697	.addReg(RegNo: Val, Flags: getDefRegState(B: MI.getOpcode() == PPC::LQX_PSEUDO))
14698	.addImm(Val: `0`)
14699	.addReg(RegNo: Ptr);
14700	} else if (MI.getOpcode() == PPC::LWAT_PSEUDO \|\|
14701	MI.getOpcode() == PPC::LDAT_PSEUDO) {
14702	DebugLoc DL = MI.getDebugLoc();
14703	Register DstReg = MI.getOperand(i: `0`).getReg();
14704	Register PtrReg = MI.getOperand(i: `1`).getReg();
14705	Register ValReg = MI.getOperand(i: `2`).getReg();
14706	unsigned FC = MI.getOperand(i: `3`).getImm();
14707	bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14708	Register Val64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14709	if (IsLwat)
14710	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: Val64)
14711	.addReg(RegNo: ValReg)
14712	.addImm(Val: PPC::sub_32);
14713	else
14714	Val64 = ValReg;
14715
14716	Register G8rPair = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14717	Register UndefG8r = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14718	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: UndefG8r);
14719	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::REG_SEQUENCE), DestReg: G8rPair)
14720	.addReg(RegNo: UndefG8r)
14721	.addImm(Val: PPC::sub_gp8_x0)
14722	.addReg(RegNo: Val64)
14723	.addImm(Val: PPC::sub_gp8_x1);
14724
14725	Register PairResult = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14726	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: IsLwat ? PPC::LWAT : PPC::LDAT), DestReg: PairResult)
14727	.addReg(RegNo: G8rPair)
14728	.addReg(RegNo: PtrReg)
14729	.addImm(Val: FC);
14730	Register Result64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14731	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Result64)
14732	.addReg(RegNo: PairResult, Flags: {}, SubReg: PPC::sub_gp8_x0);
14733	if (IsLwat)
14734	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14735	.addReg(RegNo: Result64, Flags: {}, SubReg: PPC::sub_32);
14736	else
14737	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14738	.addReg(RegNo: Result64);
14739	} else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO \|\|
14740	MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14741	DebugLoc DL = MI.getDebugLoc();
14742	Register DstReg = MI.getOperand(i: `0`).getReg();
14743	Register PtrReg = MI.getOperand(i: `1`).getReg();
14744	unsigned FC = MI.getOperand(i: `2`).getImm();
14745	bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14746
14747	Register Pair = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14748	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: Pair);
14749
14750	Register PairResult = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14751	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14752	DestReg: PairResult)
14753	.addReg(RegNo: Pair)
14754	.addReg(RegNo: PtrReg)
14755	.addImm(Val: FC);
14756	Register Result64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14757	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Result64)
14758	.addReg(RegNo: PairResult, Flags: {}, SubReg: PPC::sub_gp8_x0);
14759	if (IsLwat_Cond)
14760	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14761	.addReg(RegNo: Result64, Flags: {}, SubReg: PPC::sub_32);
14762	else
14763	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14764	.addReg(RegNo: Result64);
14765	} else {
14766	llvm_unreachable("Unexpected instr type to insert");
14767	}
14768
14769	MI.eraseFromParent(); // The pseudo instruction is gone now.
14770	return BB;
14771	}
14772
14773	//===----------------------------------------------------------------------===//
14774	// Target Optimization Hooks
14775	//===----------------------------------------------------------------------===//
14776
14777	static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14778	// For the estimates, convergence is quadratic, so we essentially double the
14779	// number of digits correct after every iteration. For both FRE and FRSQRTE,
14780	// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14781	// this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14782	int RefinementSteps = Subtarget.hasRecipPrec() ? `1` : `3`;
14783	if (VT.getScalarType() == MVT::f64)
14784	RefinementSteps++;
14785	return RefinementSteps;
14786	}
14787
14788	SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14789	const DenormalMode &Mode,
14790	SDNodeFlags Flags) const {
14791	// We only have VSX Vector Test for software Square Root.
14792	EVT VT = Op.getValueType();
14793	if (!isTypeLegal(VT: MVT::i1) \|\|
14794	(VT != MVT::f64 &&
14795	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX())))
14796	return TargetLowering::getSqrtInputTest(Operand: Op, DAG, Mode, Flags);
14797
14798	SDLoc DL(Op);
14799	// The output register of FTSQRT is CR field.
14800	SDValue FTSQRT = DAG.getNode(Opcode: PPCISD::FTSQRT, DL, VT: MVT::i32, Operand: Op, Flags);
14801	// ftsqrt BF,FRB
14802	// Let e_b be the unbiased exponent of the double-precision
14803	// floating-point operand in register FRB.
14804	// fe_flag is set to 1 if either of the following conditions occurs.
14805	// - The double-precision floating-point operand in register FRB is a zero,
14806	// a NaN, or an infinity, or a negative value.
14807	// - e_b is less than or equal to -970.
14808	// Otherwise fe_flag is set to 0.
14809	// Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14810	// not eligible for iteration. (zero/negative/infinity/nan or unbiased
14811	// exponent is less than -970)
14812	SDValue SRIdxVal = DAG.getTargetConstant(Val: PPC::sub_eq, DL, VT: MVT::i32);
14813	return SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i1,
14814	Op1: FTSQRT, Op2: SRIdxVal),
14815	`0`);
14816	}
14817
14818	SDValue
14819	PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14820	SelectionDAG &DAG) const {
14821	// We only have VSX Vector Square Root.
14822	EVT VT = Op.getValueType();
14823	if (VT != MVT::f64 &&
14824	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX()))
14825	return TargetLowering::getSqrtResultForDenormInput(Operand: Op, DAG);
14826
14827	return DAG.getNode(Opcode: PPCISD::FSQRT, DL: SDLoc (Op), VT, Operand: Op);
14828	}
14829
14830	SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14831	int Enabled, int &RefinementSteps,
14832	bool &UseOneConstNR,
14833	bool Reciprocal) const {
14834	EVT VT = Operand.getValueType();
14835	if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) \|\|
14836	(VT == MVT::f64 && Subtarget.hasFRSQRTE()) \|\|
14837	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
14838	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
14839	if (RefinementSteps == ReciprocalEstimate::Unspecified)
14840	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14841
14842	// The Newton-Raphson computation with a single constant does not provide
14843	// enough accuracy on some CPUs.
14844	UseOneConstNR = !Subtarget.needsTwoConstNR();
14845	return DAG.getNode(Opcode: PPCISD::FRSQRTE, DL: SDLoc (Operand), VT, Operand);
14846	}
14847	return SDValue ();
14848	}
14849
14850	SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14851	int Enabled,
14852	int &RefinementSteps) const {
14853	EVT VT = Operand.getValueType();
14854	if ((VT == MVT::f32 && Subtarget.hasFRES()) \|\|
14855	(VT == MVT::f64 && Subtarget.hasFRE()) \|\|
14856	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
14857	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
14858	if (RefinementSteps == ReciprocalEstimate::Unspecified)
14859	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14860	return DAG.getNode(Opcode: PPCISD::FRE, DL: SDLoc (Operand), VT, Operand);
14861	}
14862	return SDValue ();
14863	}
14864
14865	unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14866	// Note: This functionality is used only when arcp is enabled, and
14867	// on cores with reciprocal estimates (which are used when arcp is
14868	// enabled for division), this functionality is redundant with the default
14869	// combiner logic (once the division -> reciprocal/multiply transformation
14870	// has taken place). As a result, this matters more for older cores than for
14871	// newer ones.
14872
14873	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14874	// reciprocal if there are two or more FDIVs (for embedded cores with only
14875	// one FP pipeline) for three or more FDIVs (for generic OOO cores).
14876	switch (Subtarget.getCPUDirective()) {
14877	default:
14878	return `3`;
14879	case PPC::DIR_440:
14880	case PPC::DIR_A2:
14881	case PPC::DIR_E500:
14882	case PPC::DIR_E500mc:
14883	case PPC::DIR_E5500:
14884	return `2`;
14885	}
14886	}
14887
14888	// isConsecutiveLSLoc needs to work even if all adds have not yet been
14889	// collapsed, and so we need to look through chains of them.
14890	static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
14891	int64_t& Offset, SelectionDAG &DAG) {
14892	if (DAG.isBaseWithConstantOffset(Op: Loc)) {
14893	Base = Loc.getOperand(i: `0`);
14894	Offset += cast<ConstantSDNode>(Val: Loc.getOperand(i: `1`))->getSExtValue();
14895
14896	// The base might itself be a base plus an offset, and if so, accumulate
14897	// that as well.
14898	getBaseWithConstantOffset(Loc: Loc.getOperand(i: `0`), Base, Offset, DAG);
14899	}
14900	}
14901
14902	static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
14903	unsigned Bytes, int Dist,
14904	SelectionDAG &DAG) {
14905	if (VT.getSizeInBits() / `8` != Bytes)
14906	return false;
14907
14908	SDValue BaseLoc = Base->getBasePtr();
14909	if (Loc.getOpcode() == ISD::FrameIndex) {
14910	if (BaseLoc.getOpcode() != ISD::FrameIndex)
14911	return false;
14912	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14913	int FI = cast<FrameIndexSDNode>(Val&: Loc)->getIndex();
14914	int BFI = cast<FrameIndexSDNode>(Val&: BaseLoc)->getIndex();
14915	int FS = MFI.getObjectSize(ObjectIdx: FI);
14916	int BFS = MFI.getObjectSize(ObjectIdx: BFI);
14917	if (FS != BFS \|\| FS != (int)Bytes) return false;
14918	return MFI.getObjectOffset(ObjectIdx: FI) == (MFI.getObjectOffset(ObjectIdx: BFI) + Dist*Bytes);
14919	}
14920
14921	SDValue Base1 = Loc, Base2 = BaseLoc;
14922	int64_t Offset1 = `0`, Offset2 = `0`;
14923	getBaseWithConstantOffset(Loc, Base&: Base1, Offset&: Offset1, DAG);
14924	getBaseWithConstantOffset(Loc: BaseLoc, Base&: Base2, Offset&: Offset2, DAG);
14925	if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14926	return true;
14927
14928	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14929	const GlobalValue GV1 = nullptr*;
14930	const GlobalValue GV2 = nullptr*;
14931	Offset1 = `0`;
14932	Offset2 = `0`;
14933	bool isGA1 = TLI.isGAPlusOffset(N: Loc.getNode(), GA&: GV1, Offset&: Offset1);
14934	bool isGA2 = TLI.isGAPlusOffset(N: BaseLoc.getNode(), GA&: GV2, Offset&: Offset2);
14935	if (isGA1 && isGA2 && GV1 == GV2)
14936	return Offset1 == (Offset2 + Dist*Bytes);
14937	return false;
14938	}
14939
14940	// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14941	// not enforce equality of the chain operands.
14942	static bool isConsecutiveLS(SDNode N, LSBaseSDNode Base,
14943	unsigned Bytes, int Dist,
14944	SelectionDAG &DAG) {
14945	if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) {
14946	EVT VT = LS->getMemoryVT();
14947	SDValue Loc = LS->getBasePtr();
14948	return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14949	}
14950
14951	if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14952	EVT VT;
14953	switch (N->getConstantOperandVal(Num: `1`)) {
14954	default: return false;
14955	case Intrinsic::ppc_altivec_lvx:
14956	case Intrinsic::ppc_altivec_lvxl:
14957	case Intrinsic::ppc_vsx_lxvw4x:
14958	case Intrinsic::ppc_vsx_lxvw4x_be:
14959	VT = MVT::v4i32;
14960	break;
14961	case Intrinsic::ppc_vsx_lxvd2x:
14962	case Intrinsic::ppc_vsx_lxvd2x_be:
14963	VT = MVT::v2f64;
14964	break;
14965	case Intrinsic::ppc_altivec_lvebx:
14966	VT = MVT::i8;
14967	break;
14968	case Intrinsic::ppc_altivec_lvehx:
14969	VT = MVT::i16;
14970	break;
14971	case Intrinsic::ppc_altivec_lvewx:
14972	VT = MVT::i32;
14973	break;
14974	}
14975
14976	return isConsecutiveLSLoc(Loc: N->getOperand(Num: `2`), VT, Base, Bytes, Dist, DAG);
14977	}
14978
14979	if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14980	EVT VT;
14981	switch (N->getConstantOperandVal(Num: `1`)) {
14982	default: return false;
14983	case Intrinsic::ppc_altivec_stvx:
14984	case Intrinsic::ppc_altivec_stvxl:
14985	case Intrinsic::ppc_vsx_stxvw4x:
14986	VT = MVT::v4i32;
14987	break;
14988	case Intrinsic::ppc_vsx_stxvd2x:
14989	VT = MVT::v2f64;
14990	break;
14991	case Intrinsic::ppc_vsx_stxvw4x_be:
14992	VT = MVT::v4i32;
14993	break;
14994	case Intrinsic::ppc_vsx_stxvd2x_be:
14995	VT = MVT::v2f64;
14996	break;
14997	case Intrinsic::ppc_altivec_stvebx:
14998	VT = MVT::i8;
14999	break;
15000	case Intrinsic::ppc_altivec_stvehx:
15001	VT = MVT::i16;
15002	break;
15003	case Intrinsic::ppc_altivec_stvewx:
15004	VT = MVT::i32;
15005	break;
15006	}
15007
15008	return isConsecutiveLSLoc(Loc: N->getOperand(Num: `3`), VT, Base, Bytes, Dist, DAG);
15009	}
15010
15011	return false;
15012	}
15013
15014	// Return true is there is a nearyby consecutive load to the one provided
15015	// (regardless of alignment). We search up and down the chain, looking though
15016	// token factors and other loads (but nothing else). As a result, a true result
15017	// indicates that it is safe to create a new consecutive load adjacent to the
15018	// load provided.
15019	static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
15020	SDValue Chain = LD->getChain();
15021	EVT VT = LD->getMemoryVT();
15022
15023	SmallPtrSet<SDNode *, `16`> LoadRoots;
15024	SmallVector<SDNode *, `8`> Queue(`1`, Chain.getNode());
15025	SmallPtrSet<SDNode *, `16`> Visited;
15026
15027	// First, search up the chain, branching to follow all token-factor operands.
15028	// If we find a consecutive load, then we're done, otherwise, record all
15029	// nodes just above the top-level loads and token factors.
15030	while (!Queue.empty()) {
15031	SDNode *ChainNext = Queue.pop_back_val();
15032	if (!Visited.insert(Ptr: ChainNext).second)
15033	continue;
15034
15035	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: ChainNext)) {
15036	if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: `1`, DAG))
15037	return true;
15038
15039	if (!Visited.count(Ptr: ChainLD->getChain().getNode()))
15040	Queue.push_back(Elt: ChainLD->getChain().getNode());
15041	} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15042	for (const SDUse &O : ChainNext->ops())
15043	if (!Visited.count(Ptr: O.getNode()))
15044	Queue.push_back(Elt: O.getNode());
15045	} else
15046	LoadRoots.insert(Ptr: ChainNext);
15047	}
15048
15049	// Second, search down the chain, starting from the top-level nodes recorded
15050	// in the first phase. These top-level nodes are the nodes just above all
15051	// loads and token factors. Starting with their uses, recursively look though
15052	// all loads (just the chain uses) and token factors to find a consecutive
15053	// load.
15054	Visited.clear();
15055	Queue.clear();
15056
15057	for (SDNode *I : LoadRoots) {
15058	Queue.push_back(Elt: I);
15059
15060	while (!Queue.empty()) {
15061	SDNode *LoadRoot = Queue.pop_back_val();
15062	if (!Visited.insert(Ptr: LoadRoot).second)
15063	continue;
15064
15065	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: LoadRoot))
15066	if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: `1`, DAG))
15067	return true;
15068
15069	for (SDNode *U : LoadRoot->users())
15070	if (((isa<MemSDNode>(Val: U) &&
15071	cast<MemSDNode>(Val: U)->getChain().getNode() == LoadRoot) \|\|
15072	U->getOpcode() == ISD::TokenFactor) &&
15073	!Visited.count(Ptr: U))
15074	Queue.push_back(Elt: U);
15075	}
15076	}
15077
15078	return false;
15079	}
15080
15081	/// This function is called when we have proved that a SETCC node can be replaced
15082	/// by subtraction (and other supporting instructions) so that the result of
15083	/// comparison is kept in a GPR instead of CR. This function is purely for
15084	/// codegen purposes and has some flags to guide the codegen process.
15085	static SDValue generateEquivalentSub(SDNode N, int* Size, bool Complement,
15086	bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15087	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15088
15089	// Zero extend the operands to the largest legal integer. Originally, they
15090	// must be of a strictly smaller size.
15091	auto Op0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: `0`),
15092	N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
15093	auto Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: `1`),
15094	N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
15095
15096	// Swap if needed. Depends on the condition code.
15097	if (Swap)
15098	std::swap(a&: Op0, b&: Op1);
15099
15100	// Subtract extended integers.
15101	auto SubNode = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: Op0, N2: Op1);
15102
15103	// Move the sign bit to the least significant position and zero out the rest.
15104	// Now the least significant bit carries the result of original comparison.
15105	auto Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SubNode,
15106	N2: DAG.getConstant(Val: Size - `1`, DL, VT: MVT::i32));
15107	auto Final = Shifted;
15108
15109	// Complement the result if needed. Based on the condition code.
15110	if (Complement)
15111	Final = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Shifted,
15112	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i64));
15113
15114	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Final);
15115	}
15116
15117	SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15118	DAGCombinerInfo &DCI) const {
15119	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15120
15121	SelectionDAG &DAG = DCI.DAG;
15122	SDLoc DL(N);
15123
15124	// Size of integers being compared has a critical role in the following
15125	// analysis, so we prefer to do this when all types are legal.
15126	if (!DCI.isAfterLegalizeDAG())
15127	return SDValue ();
15128
15129	// If all users of SETCC extend its value to a legal integer type
15130	// then we replace SETCC with a subtraction
15131	for (const SDNode *U : N->users())
15132	if (U->getOpcode() != ISD::ZERO_EXTEND)
15133	return SDValue ();
15134
15135	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
15136	auto OpSize = N->getOperand(Num: `0`).getValueSizeInBits();
15137
15138	unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
15139
15140	if (OpSize < Size) {
15141	switch (CC) {
15142	default: break;
15143	case ISD::SETULT:
15144	return generateEquivalentSub(N, Size, Complement: false, Swap: false, DL, DAG);
15145	case ISD::SETULE:
15146	return generateEquivalentSub(N, Size, Complement: true, Swap: true, DL, DAG);
15147	case ISD::SETUGT:
15148	return generateEquivalentSub(N, Size, Complement: false, Swap: true, DL, DAG);
15149	case ISD::SETUGE:
15150	return generateEquivalentSub(N, Size, Complement: true, Swap: false, DL, DAG);
15151	}
15152	}
15153
15154	return SDValue ();
15155	}
15156
15157	SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15158	DAGCombinerInfo &DCI) const {
15159	SelectionDAG &DAG = DCI.DAG;
15160	SDLoc dl(N);
15161
15162	assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15163	// If we're tracking CR bits, we need to be careful that we don't have:
15164	// trunc(binary-ops(zext(x), zext(y)))
15165	// or
15166	// trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15167	// such that we're unnecessarily moving things into GPRs when it would be
15168	// better to keep them in CR bits.
15169
15170	// Note that trunc here can be an actual i1 trunc, or can be the effective
15171	// truncation that comes from a setcc or select_cc.
15172	if (N->getOpcode() == ISD::TRUNCATE &&
15173	N->getValueType(ResNo: `0`) != MVT::i1)
15174	return SDValue ();
15175
15176	if (N->getOperand(Num: `0`).getValueType() != MVT::i32 &&
15177	N->getOperand(Num: `0`).getValueType() != MVT::i64)
15178	return SDValue ();
15179
15180	if (N->getOpcode() == ISD::SETCC \|\|
15181	N->getOpcode() == ISD::SELECT_CC) {
15182	// If we're looking at a comparison, then we need to make sure that the
15183	// high bits (all except for the first) don't matter the result.
15184	ISD::CondCode CC =
15185	cast<CondCodeSDNode>(Val: N->getOperand(
15186	Num: N->getOpcode() == ISD::SETCC ? `2` : `4`))->get();
15187	unsigned OpBits = N->getOperand(Num: `0`).getValueSizeInBits();
15188
15189	if (ISD::isSignedIntSetCC(Code: CC)) {
15190	if (DAG.ComputeNumSignBits(Op: N->getOperand(Num: `0`)) != OpBits \|\|
15191	DAG.ComputeNumSignBits(Op: N->getOperand(Num: `1`)) != OpBits)
15192	return SDValue ();
15193	} else if (ISD::isUnsignedIntSetCC(Code: CC)) {
15194	if (!DAG.MaskedValueIsZero(Op: N->getOperand(Num: `0`),
15195	Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-`1`)) \|\|
15196	!DAG.MaskedValueIsZero(Op: N->getOperand(Num: `1`),
15197	Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-`1`)))
15198	return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15199	: SDValue ());
15200	} else {
15201	// This is neither a signed nor an unsigned comparison, just make sure
15202	// that the high bits are equal.
15203	KnownBits Op1Known = DAG.computeKnownBits(Op: N->getOperand(Num: `0`));
15204	KnownBits Op2Known = DAG.computeKnownBits(Op: N->getOperand(Num: `1`));
15205
15206	// We don't really care about what is known about the first bit (if
15207	// anything), so pretend that it is known zero for both to ensure they can
15208	// be compared as constants.
15209	Op1Known.Zero.setBit(`0`); Op1Known.One.clearBit(BitPosition: `0`);
15210	Op2Known.Zero.setBit(`0`); Op2Known.One.clearBit(BitPosition: `0`);
15211
15212	if (!Op1Known.isConstant() \|\| !Op2Known.isConstant() \|\|
15213	Op1Known.getConstant() != Op2Known.getConstant())
15214	return SDValue ();
15215	}
15216	}
15217
15218	// We now know that the higher-order bits are irrelevant, we just need to
15219	// make sure that all of the intermediate operations are bit operations, and
15220	// all inputs are extensions.
15221	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND &&
15222	N->getOperand(Num: `0`).getOpcode() != ISD::OR &&
15223	N->getOperand(Num: `0`).getOpcode() != ISD::XOR &&
15224	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT &&
15225	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT_CC &&
15226	N->getOperand(Num: `0`).getOpcode() != ISD::TRUNCATE &&
15227	N->getOperand(Num: `0`).getOpcode() != ISD::SIGN_EXTEND &&
15228	N->getOperand(Num: `0`).getOpcode() != ISD::ZERO_EXTEND &&
15229	N->getOperand(Num: `0`).getOpcode() != ISD::ANY_EXTEND)
15230	return SDValue ();
15231
15232	if ((N->getOpcode() == ISD::SETCC \|\| N->getOpcode() == ISD::SELECT_CC) &&
15233	N->getOperand(Num: `1`).getOpcode() != ISD::AND &&
15234	N->getOperand(Num: `1`).getOpcode() != ISD::OR &&
15235	N->getOperand(Num: `1`).getOpcode() != ISD::XOR &&
15236	N->getOperand(Num: `1`).getOpcode() != ISD::SELECT &&
15237	N->getOperand(Num: `1`).getOpcode() != ISD::SELECT_CC &&
15238	N->getOperand(Num: `1`).getOpcode() != ISD::TRUNCATE &&
15239	N->getOperand(Num: `1`).getOpcode() != ISD::SIGN_EXTEND &&
15240	N->getOperand(Num: `1`).getOpcode() != ISD::ZERO_EXTEND &&
15241	N->getOperand(Num: `1`).getOpcode() != ISD::ANY_EXTEND)
15242	return SDValue ();
15243
15244	SmallVector<SDValue, `4`> Inputs;
15245	SmallVector<SDValue, `8`> BinOps, PromOps;
15246	SmallPtrSet<SDNode *, `16`> Visited;
15247
15248	for (unsigned i = `0`; i < `2`; ++i) {
15249	if (((N->getOperand(Num: i).getOpcode() == ISD::SIGN_EXTEND \|\|
15250	N->getOperand(Num: i).getOpcode() == ISD::ZERO_EXTEND \|\|
15251	N->getOperand(Num: i).getOpcode() == ISD::ANY_EXTEND) &&
15252	N->getOperand(Num: i).getOperand(i: `0`).getValueType() == MVT::i1) \|\|
15253	isa<ConstantSDNode>(Val: N->getOperand(Num: i)))
15254	Inputs.push_back(Elt: N->getOperand(Num: i));
15255	else
15256	BinOps.push_back(Elt: N->getOperand(Num: i));
15257
15258	if (N->getOpcode() == ISD::TRUNCATE)
15259	break;
15260	}
15261
15262	// Visit all inputs, collect all binary operations (and, or, xor and
15263	// select) that are all fed by extensions.
15264	while (!BinOps.empty()) {
15265	SDValue BinOp = BinOps.pop_back_val();
15266
15267	if (!Visited.insert(Ptr: BinOp.getNode()).second)
15268	continue;
15269
15270	PromOps.push_back(Elt: BinOp);
15271
15272	for (unsigned i = `0`, ie = BinOp.getNumOperands(); i != ie; ++i) {
15273	// The condition of the select is not promoted.
15274	if (BinOp.getOpcode() == ISD::SELECT && i == `0`)
15275	continue;
15276	if (BinOp.getOpcode() == ISD::SELECT_CC && i != `2` && i != `3`)
15277	continue;
15278
15279	if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
15280	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
15281	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15282	BinOp.getOperand(i).getOperand(i: `0`).getValueType() == MVT::i1) \|\|
15283	isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15284	Inputs.push_back(Elt: BinOp.getOperand(i));
15285	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
15286	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
15287	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
15288	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
15289	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC \|\|
15290	BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
15291	BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
15292	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
15293	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15294	BinOps.push_back(Elt: BinOp.getOperand(i));
15295	} else {
15296	// We have an input that is not an extension or another binary
15297	// operation; we'll abort this transformation.
15298	return SDValue ();
15299	}
15300	}
15301	}
15302
15303	// Make sure that this is a self-contained cluster of operations (which
15304	// is not quite the same thing as saying that everything has only one
15305	// use).
15306	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15307	if (isa<ConstantSDNode>(Val: Inputs [i]))
15308	continue;
15309
15310	for (const SDNode *User : Inputs [i].getNode()->users()) {
15311	if (User != N && !Visited.count(Ptr: User))
15312	return SDValue ();
15313
15314	// Make sure that we're not going to promote the non-output-value
15315	// operand(s) or SELECT or SELECT_CC.
15316	// FIXME: Although we could sometimes handle this, and it does occur in
15317	// practice that one of the condition inputs to the select is also one of
15318	// the outputs, we currently can't deal with this.
15319	if (User->getOpcode() == ISD::SELECT) {
15320	if (User->getOperand(Num: `0`) == Inputs [i])
15321	return SDValue ();
15322	} else if (User->getOpcode() == ISD::SELECT_CC) {
15323	if (User->getOperand(Num: `0`) == Inputs [i] \|\|
15324	User->getOperand(Num: `1`) == Inputs [i])
15325	return SDValue ();
15326	}
15327	}
15328	}
15329
15330	for (unsigned i = `0`, ie = PromOps.size(); i != ie; ++i) {
15331	for (const SDNode *User : PromOps [i].getNode()->users()) {
15332	if (User != N && !Visited.count(Ptr: User))
15333	return SDValue ();
15334
15335	// Make sure that we're not going to promote the non-output-value
15336	// operand(s) or SELECT or SELECT_CC.
15337	// FIXME: Although we could sometimes handle this, and it does occur in
15338	// practice that one of the condition inputs to the select is also one of
15339	// the outputs, we currently can't deal with this.
15340	if (User->getOpcode() == ISD::SELECT) {
15341	if (User->getOperand(Num: `0`) == PromOps [i])
15342	return SDValue ();
15343	} else if (User->getOpcode() == ISD::SELECT_CC) {
15344	if (User->getOperand(Num: `0`) == PromOps [i] \|\|
15345	User->getOperand(Num: `1`) == PromOps [i])
15346	return SDValue ();
15347	}
15348	}
15349	}
15350
15351	// Replace all inputs with the extension operand.
15352	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15353	// Constants may have users outside the cluster of to-be-promoted nodes,
15354	// and so we need to replace those as we do the promotions.
15355	if (isa<ConstantSDNode>(Val: Inputs [i]))
15356	continue;
15357	else
15358	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i], To: Inputs [i].getOperand(i: `0`));
15359	}
15360
15361	std::list<HandleSDNode> PromOpHandles;
15362	for (auto &PromOp : PromOps)
15363	PromOpHandles.emplace_back(args&: PromOp);
15364
15365	// Replace all operations (these are all the same, but have a different
15366	// (i1) return type). DAG.getNode will validate that the types of
15367	// a binary operator match, so go through the list in reverse so that
15368	// we've likely promoted both operands first. Any intermediate truncations or
15369	// extensions disappear.
15370	while (!PromOpHandles.empty()) {
15371	SDValue PromOp = PromOpHandles.back().getValue();
15372	PromOpHandles.pop_back();
15373
15374	if (PromOp.getOpcode() == ISD::TRUNCATE \|\|
15375	PromOp.getOpcode() == ISD::SIGN_EXTEND \|\|
15376	PromOp.getOpcode() == ISD::ZERO_EXTEND \|\|
15377	PromOp.getOpcode() == ISD::ANY_EXTEND) {
15378	if (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: `0`)) &&
15379	PromOp.getOperand(i: `0`).getValueType() != MVT::i1) {
15380	// The operand is not yet ready (see comment below).
15381	PromOpHandles.emplace_front(args&: PromOp);
15382	continue;
15383	}
15384
15385	SDValue RepValue = PromOp.getOperand(i: `0`);
15386	if (isa<ConstantSDNode>(Val: RepValue))
15387	RepValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: RepValue);
15388
15389	DAG.ReplaceAllUsesOfValueWith(From: PromOp, To: RepValue);
15390	continue;
15391	}
15392
15393	unsigned C;
15394	switch (PromOp.getOpcode()) {
15395	default: C = `0`; break;
15396	case ISD::SELECT: C = `1`; break;
15397	case ISD::SELECT_CC: C = `2`; break;
15398	}
15399
15400	if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15401	PromOp.getOperand(i: C).getValueType() != MVT::i1) \|\|
15402	(!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+`1`)) &&
15403	PromOp.getOperand(i: C+`1`).getValueType() != MVT::i1)) {
15404	// The to-be-promoted operands of this node have not yet been
15405	// promoted (this should be rare because we're going through the
15406	// list backward, but if one of the operands has several users in
15407	// this cluster of to-be-promoted nodes, it is possible).
15408	PromOpHandles.emplace_front(args&: PromOp);
15409	continue;
15410	}
15411
15412	SmallVector<SDValue, `3`> Ops(PromOp.getNode()->ops());
15413
15414	// If there are any constant inputs, make sure they're replaced now.
15415	for (unsigned i = `0`; i < `2`; ++i)
15416	if (isa<ConstantSDNode>(Val: Ops [C+i]))
15417	Ops [C+i] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: Ops [C+i]);
15418
15419	DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15420	To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: MVT::i1, Ops));
15421	}
15422
15423	// Now we're left with the initial truncation itself.
15424	if (N->getOpcode() == ISD::TRUNCATE)
15425	return N->getOperand(Num: `0`);
15426
15427	// Otherwise, this is a comparison. The operands to be compared have just
15428	// changed type (to i1), but everything else is the same.
15429	return SDValue (N, `0`);
15430	}
15431
15432	SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15433	DAGCombinerInfo &DCI) const {
15434	SelectionDAG &DAG = DCI.DAG;
15435	SDLoc dl(N);
15436
15437	// If we're tracking CR bits, we need to be careful that we don't have:
15438	// zext(binary-ops(trunc(x), trunc(y)))
15439	// or
15440	// zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15441	// such that we're unnecessarily moving things into CR bits that can more
15442	// efficiently stay in GPRs. Note that if we're not certain that the high
15443	// bits are set as required by the final extension, we still may need to do
15444	// some masking to get the proper behavior.
15445
15446	// This same functionality is important on PPC64 when dealing with
15447	// 32-to-64-bit extensions; these occur often when 32-bit values are used as
15448	// the return values of functions. Because it is so similar, it is handled
15449	// here as well.
15450
15451	if (N->getValueType(ResNo: `0`) != MVT::i32 &&
15452	N->getValueType(ResNo: `0`) != MVT::i64)
15453	return SDValue ();
15454
15455	if (!((N->getOperand(Num: `0`).getValueType() == MVT::i1 && Subtarget.useCRBits()) \|\|
15456	(N->getOperand(Num: `0`).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15457	return SDValue ();
15458
15459	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND &&
15460	N->getOperand(Num: `0`).getOpcode() != ISD::OR &&
15461	N->getOperand(Num: `0`).getOpcode() != ISD::XOR &&
15462	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT &&
15463	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT_CC)
15464	return SDValue ();
15465
15466	SmallVector<SDValue, `4`> Inputs;
15467	SmallVector<SDValue, `8`> BinOps(`1`, N->getOperand(Num: `0`)), PromOps;
15468	SmallPtrSet<SDNode *, `16`> Visited;
15469
15470	// Visit all inputs, collect all binary operations (and, or, xor and
15471	// select) that are all fed by truncations.
15472	while (!BinOps.empty()) {
15473	SDValue BinOp = BinOps.pop_back_val();
15474
15475	if (!Visited.insert(Ptr: BinOp.getNode()).second)
15476	continue;
15477
15478	PromOps.push_back(Elt: BinOp);
15479
15480	for (unsigned i = `0`, ie = BinOp.getNumOperands(); i != ie; ++i) {
15481	// The condition of the select is not promoted.
15482	if (BinOp.getOpcode() == ISD::SELECT && i == `0`)
15483	continue;
15484	if (BinOp.getOpcode() == ISD::SELECT_CC && i != `2` && i != `3`)
15485	continue;
15486
15487	if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
15488	isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15489	Inputs.push_back(Elt: BinOp.getOperand(i));
15490	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
15491	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
15492	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
15493	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
15494	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15495	BinOps.push_back(Elt: BinOp.getOperand(i));
15496	} else {
15497	// We have an input that is not a truncation or another binary
15498	// operation; we'll abort this transformation.
15499	return SDValue ();
15500	}
15501	}
15502	}
15503
15504	// The operands of a select that must be truncated when the select is
15505	// promoted because the operand is actually part of the to-be-promoted set.
15506	DenseMap<SDNode *, EVT> SelectTruncOp[`2`];
15507
15508	// Make sure that this is a self-contained cluster of operations (which
15509	// is not quite the same thing as saying that everything has only one
15510	// use).
15511	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15512	if (isa<ConstantSDNode>(Val: Inputs [i]))
15513	continue;
15514
15515	for (SDNode *User : Inputs [i].getNode()->users()) {
15516	if (User != N && !Visited.count(Ptr: User))
15517	return SDValue ();
15518
15519	// If we're going to promote the non-output-value operand(s) or SELECT or
15520	// SELECT_CC, record them for truncation.
15521	if (User->getOpcode() == ISD::SELECT) {
15522	if (User->getOperand(Num: `0`) == Inputs [i])
15523	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15524	y: User->getOperand(Num: `0`).getValueType()));
15525	} else if (User->getOpcode() == ISD::SELECT_CC) {
15526	if (User->getOperand(Num: `0`) == Inputs [i])
15527	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15528	y: User->getOperand(Num: `0`).getValueType()));
15529	if (User->getOperand(Num: `1`) == Inputs [i])
15530	SelectTruncOp[`1`].insert(KV: std::make_pair(x&: User,
15531	y: User->getOperand(Num: `1`).getValueType()));
15532	}
15533	}
15534	}
15535
15536	for (unsigned i = `0`, ie = PromOps.size(); i != ie; ++i) {
15537	for (SDNode *User : PromOps [i].getNode()->users()) {
15538	if (User != N && !Visited.count(Ptr: User))
15539	return SDValue ();
15540
15541	// If we're going to promote the non-output-value operand(s) or SELECT or
15542	// SELECT_CC, record them for truncation.
15543	if (User->getOpcode() == ISD::SELECT) {
15544	if (User->getOperand(Num: `0`) == PromOps [i])
15545	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15546	y: User->getOperand(Num: `0`).getValueType()));
15547	} else if (User->getOpcode() == ISD::SELECT_CC) {
15548	if (User->getOperand(Num: `0`) == PromOps [i])
15549	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15550	y: User->getOperand(Num: `0`).getValueType()));
15551	if (User->getOperand(Num: `1`) == PromOps [i])
15552	SelectTruncOp[`1`].insert(KV: std::make_pair(x&: User,
15553	y: User->getOperand(Num: `1`).getValueType()));
15554	}
15555	}
15556	}
15557
15558	unsigned PromBits = N->getOperand(Num: `0`).getValueSizeInBits();
15559	bool ReallyNeedsExt = false;
15560	if (N->getOpcode() != ISD::ANY_EXTEND) {
15561	// If all of the inputs are not already sign/zero extended, then
15562	// we'll still need to do that at the end.
15563	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15564	if (isa<ConstantSDNode>(Val: Inputs [i]))
15565	continue;
15566
15567	unsigned OpBits =
15568	Inputs [i].getOperand(i: `0`).getValueSizeInBits();
15569	assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15570
15571	if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15572	!DAG.MaskedValueIsZero(Op: Inputs [i].getOperand(i: `0`),
15573	Mask: APInt::getHighBitsSet(numBits: OpBits,
15574	hiBitsSet: OpBits-PromBits))) \|\|
15575	(N->getOpcode() == ISD::SIGN_EXTEND &&
15576	DAG.ComputeNumSignBits(Op: Inputs [i].getOperand(i: `0`)) <
15577	(OpBits-(PromBits-`1`)))) {
15578	ReallyNeedsExt = true;
15579	break;
15580	}
15581	}
15582	}
15583
15584	// Convert PromOps to handles before doing any RAUW operations, as these
15585	// may CSE with existing nodes, deleting the originals.
15586	std::list<HandleSDNode> PromOpHandles;
15587	for (auto &PromOp : PromOps)
15588	PromOpHandles.emplace_back(args&: PromOp);
15589
15590	// Replace all inputs, either with the truncation operand, or a
15591	// truncation or extension to the final output type.
15592	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15593	// Constant inputs need to be replaced with the to-be-promoted nodes that
15594	// use them because they might have users outside of the cluster of
15595	// promoted nodes.
15596	if (isa<ConstantSDNode>(Val: Inputs [i]))
15597	continue;
15598
15599	SDValue InSrc = Inputs [i].getOperand(i: `0`);
15600	if (Inputs [i].getValueType() == N->getValueType(ResNo: `0`))
15601	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i], To: InSrc);
15602	else if (N->getOpcode() == ISD::SIGN_EXTEND)
15603	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
15604	To: DAG.getSExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
15605	else if (N->getOpcode() == ISD::ZERO_EXTEND)
15606	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
15607	To: DAG.getZExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
15608	else
15609	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
15610	To: DAG.getAnyExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
15611	}
15612
15613	// Replace all operations (these are all the same, but have a different
15614	// (promoted) return type). DAG.getNode will validate that the types of
15615	// a binary operator match, so go through the list in reverse so that
15616	// we've likely promoted both operands first.
15617	while (!PromOpHandles.empty()) {
15618	SDValue PromOp = PromOpHandles.back().getValue();
15619	PromOpHandles.pop_back();
15620
15621	unsigned C;
15622	switch (PromOp.getOpcode()) {
15623	default: C = `0`; break;
15624	case ISD::SELECT: C = `1`; break;
15625	case ISD::SELECT_CC: C = `2`; break;
15626	}
15627
15628	if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15629	PromOp.getOperand(i: C).getValueType() != N->getValueType(ResNo: `0`)) \|\|
15630	(!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+`1`)) &&
15631	PromOp.getOperand(i: C+`1`).getValueType() != N->getValueType(ResNo: `0`))) {
15632	// The to-be-promoted operands of this node have not yet been
15633	// promoted (this should be rare because we're going through the
15634	// list backward, but if one of the operands has several users in
15635	// this cluster of to-be-promoted nodes, it is possible).
15636	PromOpHandles.emplace_front(args&: PromOp);
15637	continue;
15638	}
15639
15640	// For SELECT and SELECT_CC nodes, we do a similar check for any
15641	// to-be-promoted comparison inputs.
15642	if (PromOp.getOpcode() == ISD::SELECT \|\|
15643	PromOp.getOpcode() == ISD::SELECT_CC) {
15644	if ((SelectTruncOp[`0`].count(Val: PromOp.getNode()) &&
15645	PromOp.getOperand(i: `0`).getValueType() != N->getValueType(ResNo: `0`)) \|\|
15646	(SelectTruncOp[`1`].count(Val: PromOp.getNode()) &&
15647	PromOp.getOperand(i: `1`).getValueType() != N->getValueType(ResNo: `0`))) {
15648	PromOpHandles.emplace_front(args&: PromOp);
15649	continue;
15650	}
15651	}
15652
15653	SmallVector<SDValue, `3`> Ops(PromOp.getNode()->ops());
15654
15655	// If this node has constant inputs, then they'll need to be promoted here.
15656	for (unsigned i = `0`; i < `2`; ++i) {
15657	if (!isa<ConstantSDNode>(Val: Ops [C+i]))
15658	continue;
15659	if (Ops [C+i].getValueType() == N->getValueType(ResNo: `0`))
15660	continue;
15661
15662	if (N->getOpcode() == ISD::SIGN_EXTEND)
15663	Ops [C+i] = DAG.getSExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
15664	else if (N->getOpcode() == ISD::ZERO_EXTEND)
15665	Ops [C+i] = DAG.getZExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
15666	else
15667	Ops [C+i] = DAG.getAnyExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
15668	}
15669
15670	// If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15671	// truncate them again to the original value type.
15672	if (PromOp.getOpcode() == ISD::SELECT \|\|
15673	PromOp.getOpcode() == ISD::SELECT_CC) {
15674	auto SI0 = SelectTruncOp[`0`].find(Val: PromOp.getNode());
15675	if (SI0 != SelectTruncOp[`0`].end())
15676	Ops [`0`] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI0 ->second, Operand: Ops [`0`]);
15677	auto SI1 = SelectTruncOp[`1`].find(Val: PromOp.getNode());
15678	if (SI1 != SelectTruncOp[`1`].end())
15679	Ops [`1`] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI1 ->second, Operand: Ops [`1`]);
15680	}
15681
15682	DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15683	To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: N->getValueType(ResNo: `0`), Ops));
15684	}
15685
15686	// Now we're left with the initial extension itself.
15687	if (!ReallyNeedsExt)
15688	return N->getOperand(Num: `0`);
15689
15690	// To zero extend, just mask off everything except for the first bit (in the
15691	// i1 case).
15692	if (N->getOpcode() == ISD::ZERO_EXTEND)
15693	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`),
15694	N2: DAG.getConstant(Val: APInt::getLowBitsSet(
15695	numBits: N->getValueSizeInBits(ResNo: `0`), loBitsSet: PromBits),
15696	DL: dl, VT: N->getValueType(ResNo: `0`)));
15697
15698	assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15699	"Invalid extension type");
15700	EVT ShiftAmountTy = getShiftAmountTy(LHSTy: N->getValueType(ResNo: `0`), DL: DAG.getDataLayout());
15701	SDValue ShiftCst =
15702	DAG.getConstant(Val: N->getValueSizeInBits(ResNo: `0`) - PromBits, DL: dl, VT: ShiftAmountTy);
15703	return DAG.getNode(
15704	Opcode: ISD::SRA, DL: dl, VT: N->getValueType(ResNo: `0`),
15705	N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`), N2: ShiftCst),
15706	N2: ShiftCst);
15707	}
15708
15709	// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15710	static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS) {
15711
15712	auto isValidForConvert = [](SDValue &Operand) {
15713	if (!Operand.hasOneUse())
15714	return false;
15715
15716	if (Operand.getValueType() != MVT::i128)
15717	return false;
15718
15719	if (Operand.getOpcode() == ISD::Constant)
15720	return true;
15721
15722	auto *LoadNode = dyn_cast<LoadSDNode>(Val&: Operand);
15723	if (!LoadNode)
15724	return false;
15725
15726	// If memory operation is volatile, do not perform any
15727	// optimization or transformation. Volatile operations must be preserved
15728	// as written to ensure correct program behavior, so we return an empty
15729	// SDValue to indicate no action.
15730
15731	if (LoadNode->isVolatile())
15732	return false;
15733
15734	// Only combine loads if both use the unindexed addressing mode.
15735	// PowerPC AltiVec/VMX does not support vector loads or stores with
15736	// pre/post-increment addressing. Indexed modes may imply implicit
15737	// pointer updates, which are not compatible with AltiVec vector
15738	// instructions.
15739	if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15740	return false;
15741
15742	// Only combine loads if both are non-extending loads
15743	// (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15744	// ISD::SEXTLOAD) perform zero or sign extension, which may change the
15745	// loaded value's semantics and are not compatible with vector loads.
15746	if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15747	return false;
15748
15749	return true;
15750	};
15751
15752	return (isValidForConvert (LHS) && isValidForConvert (RHS));
15753	}
15754
15755	SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N,
15756	const SDLoc &DL) {
15757
15758	assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15759
15760	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
15761	assert((CC == ISD::SETNE \|\| CC == ISD::SETEQ) &&
15762	"CC mus be ISD::SETNE or ISD::SETEQ");
15763
15764	auto getV16i8Load = [&](const SDValue &Operand) {
15765	if (Operand.getOpcode() == ISD::Constant)
15766	return DAG.getBitcast(VT: MVT::v16i8, V: Operand);
15767
15768	assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15769
15770	auto *LoadNode = cast<LoadSDNode>(Val: Operand);
15771	return DAG.getLoad(VT: MVT::v16i8, dl: DL, Chain: LoadNode->getChain(),
15772	Ptr: LoadNode->getBasePtr(), MMO: LoadNode->getMemOperand());
15773	};
15774
15775	// Following code transforms the DAG
15776	// t0: ch,glue = EntryToken
15777	// t2: i64,ch = CopyFromReg t0, Register:i64 %0
15778	// t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15779	// undef:i64
15780	// t4: i64,ch = CopyFromReg t0, Register:i64 %1
15781	// t5: i128,ch =
15782	// load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15783	// setcc t3, t5, setne:ch
15784	//
15785	// ---->
15786	//
15787	// t0: ch,glue = EntryToken
15788	// t2: i64,ch = CopyFromReg t0, Register:i64 %0
15789	// t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15790	// undef:i64
15791	// t4: i64,ch = CopyFromReg t0, Register:i64 %1
15792	// t5: v16i8,ch =
15793	// load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15794	// t6: i32 =
15795	// llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15796	// Constant:i32<2>, t3, t5
15797	// t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15798
15799	// Or transforms the DAG
15800	// t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15801	// t8: i1 =
15802	// setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15803	//
15804	// --->
15805	//
15806	// t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15807	// t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15808	// t7: i32 =
15809	// llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15810
15811	SDValue LHSVec = getV16i8Load (N->getOperand(Num: `0`));
15812	SDValue RHSVec = getV16i8Load (N->getOperand(Num: `1`));
15813
15814	SDValue IntrID =
15815	DAG.getConstant(Val: Intrinsic::ppc_altivec_vcmpequb_p, DL, VT: MVT::i32);
15816	SDValue CRSel = DAG.getConstant(Val: `2`, DL, VT: MVT::i32); // which CR6 predicate field
15817	SDValue PredResult = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
15818	N1: IntrID, N2: CRSel, N3: LHSVec, N4: RHSVec);
15819	// ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15820	// so we need to invert the CC opcode.
15821	return DAG.getSetCC(DL, VT: N->getValueType(ResNo: `0`), LHS: PredResult,
15822	RHS: DAG.getConstant(Val: `0`, DL, VT: MVT::i32),
15823	Cond: CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15824	}
15825
15826	// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15827	// If it is , return true; otherwise return false.
15828	static bool canConvertSETCCToXori(SDNode *N) {
15829	assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15830
15831	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
15832	if (CC != ISD::SETEQ)
15833	return false;
15834
15835	SDValue LHS = N->getOperand(Num: `0`);
15836	SDValue RHS = N->getOperand(Num: `1`);
15837
15838	// Check the `SDValue &V` is from `and` with `1`.
15839	auto IsAndWithOne = [](SDValue &V) {
15840	if (V.getOpcode() == ISD::AND) {
15841	for (const SDValue &Op : V ->ops())
15842	if (auto *C = dyn_cast<ConstantSDNode>(Val: Op))
15843	if (C->isOne())
15844	return true;
15845	}
15846	return false;
15847	};
15848
15849	// Check whether the SETCC compare with zero.
15850	auto IsCompareWithZero = [](SDValue &V) {
15851	if (auto *C = dyn_cast<ConstantSDNode>(Val&: V))
15852	if (C->isZero())
15853	return true;
15854	return false;
15855	};
15856
15857	return (IsAndWithOne (LHS) && IsCompareWithZero (RHS)) \|\|
15858	(IsAndWithOne (RHS) && IsCompareWithZero (LHS));
15859	}
15860
15861	// You must check whether the `SDNode N` can be converted to Xori using*
15862	// the function `static bool canConvertSETCCToXori(SDNode N)`*
15863	// before calling the function; otherwise, it may produce incorrect results.
15864	static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG) {
15865
15866	assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15867	SDValue LHS = N->getOperand(Num: `0`);
15868	SDValue RHS = N->getOperand(Num: `1`);
15869	SDLoc DL(N);
15870
15871	[[maybe_unused]] ISD::CondCode CC =
15872	cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
15873	assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15874	// Rewrite it as XORI (and X, 1), 1.
15875	auto MakeXor1 = [&](SDValue V) {
15876	EVT VT = V.getValueType();
15877	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
15878	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: V, N2: One);
15879	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Xor);
15880	};
15881
15882	if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15883	return MakeXor1 (LHS);
15884
15885	if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15886	return MakeXor1 (RHS);
15887
15888	llvm_unreachable("Should not reach here.");
15889	}
15890
15891	SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15892	DAGCombinerInfo &DCI) const {
15893	assert(N->getOpcode() == ISD::SETCC &&
15894	"Should be called with a SETCC node");
15895
15896	// Check if the pattern (setcc (and X, 1), 0, eq) is present.
15897	// If it is, rewrite it as XORI (and X, 1), 1.
15898	if (canConvertSETCCToXori(N))
15899	return ConvertSETCCToXori(N, DAG&: DCI.DAG);
15900
15901	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
15902	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
15903	SDValue LHS = N->getOperand(Num: `0`);
15904	SDValue RHS = N->getOperand(Num: `1`);
15905
15906	// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15907	if (LHS.getOpcode() == ISD::SUB && isNullConstant(V: LHS.getOperand(i: `0`)) &&
15908	LHS.hasOneUse())
15909	std::swap(a&: LHS, b&: RHS);
15910
15911	// x == 0-y --> x+y == 0
15912	// x != 0-y --> x+y != 0
15913	if (RHS.getOpcode() == ISD::SUB && isNullConstant(V: RHS.getOperand(i: `0`)) &&
15914	RHS.hasOneUse()) {
15915	SDLoc DL(N);
15916	SelectionDAG &DAG = DCI.DAG;
15917	EVT VT = N->getValueType(ResNo: `0`);
15918	EVT OpVT = LHS.getValueType();
15919	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS, N2: RHS.getOperand(i: `1`));
15920	return DAG.getSetCC(DL, VT, LHS: Add, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond: CC);
15921	}
15922
15923	// Optimization: Fold i128 equality/inequality compares of two loads into a
15924	// vectorized compare using vcmpequb.p when Altivec is available.
15925	//
15926	// Rationale:
15927	// A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15928	// On VSX-capable subtargets, we can instead reinterpret the i128 loads
15929	// as v16i8 vectors and use the Altive vcmpequb.p instruction to
15930	// perform a full 128-bit equality check in a single vector compare.
15931	//
15932	// Example Result:
15933	// This transformation replaces memcmp(a, b, 16) with two vector loads
15934	// and one vector compare instruction.
15935
15936	if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15937	return convertTwoLoadsAndCmpToVCMPEQUB(DAG&: DCI.DAG, N, DL: SDLoc (N));
15938	}
15939
15940	return DAGCombineTruncBoolExt(N, DCI);
15941	}
15942
15943	// Is this an extending load from an f32 to an f64?
15944	static bool isFPExtLoad(SDValue Op) {
15945	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: Op.getNode()))
15946	return LD->getExtensionType() == ISD::EXTLOAD &&
15947	Op.getValueType() == MVT::f64;
15948	return false;
15949	}
15950
15951	/// Reduces the number of fp-to-int conversion when building a vector.
15952	///
15953	/// If this vector is built out of floating to integer conversions,
15954	/// transform it to a vector built out of floating point values followed by a
15955	/// single floating to integer conversion of the vector.
15956	/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15957	/// becomes (fptosi (build_vector ($A, $B, ...)))
15958	SDValue PPCTargetLowering::
15959	combineElementTruncationToVectorTruncation(SDNode *N,
15960	DAGCombinerInfo &DCI) const {
15961	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15962	"Should be called with a BUILD_VECTOR node");
15963
15964	SelectionDAG &DAG = DCI.DAG;
15965	SDLoc dl(N);
15966
15967	SDValue FirstInput = N->getOperand(Num: `0`);
15968	assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15969	"The input operand must be an fp-to-int conversion.");
15970
15971	// This combine happens after legalization so the fp_to_[su]i nodes are
15972	// already converted to PPCSISD nodes.
15973	unsigned FirstConversion = FirstInput.getOperand(i: `0`).getOpcode();
15974	if (FirstConversion == PPCISD::FCTIDZ \|\|
15975	FirstConversion == PPCISD::FCTIDUZ \|\|
15976	FirstConversion == PPCISD::FCTIWZ \|\|
15977	FirstConversion == PPCISD::FCTIWUZ) {
15978	bool IsSplat = true;
15979	bool Is32Bit = FirstConversion == PPCISD::FCTIWZ \|\|
15980	FirstConversion == PPCISD::FCTIWUZ;
15981	EVT SrcVT = FirstInput.getOperand(i: `0`).getValueType();
15982	SmallVector<SDValue, `4`> Ops;
15983	EVT TargetVT = N->getValueType(ResNo: `0`);
15984	for (int i = `0`, e = N->getNumOperands(); i < e; ++i) {
15985	SDValue NextOp = N->getOperand(Num: i);
15986	if (NextOp.getOpcode() != PPCISD::MFVSR)
15987	return SDValue ();
15988	unsigned NextConversion = NextOp.getOperand(i: `0`).getOpcode();
15989	if (NextConversion != FirstConversion)
15990	return SDValue ();
15991	// If we are converting to 32-bit integers, we need to add an FP_ROUND.
15992	// This is not valid if the input was originally double precision. It is
15993	// also not profitable to do unless this is an extending load in which
15994	// case doing this combine will allow us to combine consecutive loads.
15995	if (Is32Bit && !isFPExtLoad(Op: NextOp.getOperand(i: `0`).getOperand(i: `0`)))
15996	return SDValue ();
15997	if (N->getOperand(Num: i) != FirstInput)
15998	IsSplat = false;
15999	}
16000
16001	// If this is a splat, we leave it as-is since there will be only a single
16002	// fp-to-int conversion followed by a splat of the integer. This is better
16003	// for 32-bit and smaller ints and neutral for 64-bit ints.
16004	if (IsSplat)
16005	return SDValue ();
16006
16007	// Now that we know we have the right type of node, get its operands
16008	for (int i = `0`, e = N->getNumOperands(); i < e; ++i) {
16009	SDValue In = N->getOperand(Num: i).getOperand(i: `0`);
16010	if (Is32Bit) {
16011	// For 32-bit values, we need to add an FP_ROUND node (if we made it
16012	// here, we know that all inputs are extending loads so this is safe).
16013	if (In.isUndef())
16014	Ops.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
16015	else {
16016	SDValue Trunc =
16017	DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: In.getOperand(i: `0`),
16018	N2: DAG.getIntPtrConstant(Val: `1`, DL: dl, /isTarget=/true));
16019	Ops.push_back(Elt: Trunc);
16020	}
16021	} else
16022	Ops.push_back(Elt: In.isUndef() ? DAG.getUNDEF(VT: SrcVT) : In.getOperand(i: `0`));
16023	}
16024
16025	unsigned Opcode;
16026	if (FirstConversion == PPCISD::FCTIDZ \|\|
16027	FirstConversion == PPCISD::FCTIWZ)
16028	Opcode = ISD::FP_TO_SINT;
16029	else
16030	Opcode = ISD::FP_TO_UINT;
16031
16032	EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16033	SDValue BV = DAG.getBuildVector(VT: NewVT, DL: dl, Ops);
16034	return DAG.getNode(Opcode, DL: dl, VT: TargetVT, Operand: BV);
16035	}
16036	return SDValue ();
16037	}
16038
16039	// LXVKQ instruction load VSX vector with a special quadword value
16040	// based on an immediate value. This helper method returns the details of the
16041	// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16042	// to help generate the LXVKQ instruction and the subsequent shift instruction
16043	// required to match the original build vector pattern.
16044
16045	// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16046	using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16047
16048	static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16049
16050	// LXVKQ instruction loads the Quadword value:
16051	// 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16052	static const APInt BasePattern = APInt (`128`, `0x8000000000000000ULL`) << `64`;
16053	static const uint32_t Uim = `16`;
16054
16055	// Check for direct LXVKQ match (no shift needed)
16056	if (FullVal == BasePattern)
16057	return std::make_tuple(args: Uim, args: uint8_t{`0`});
16058
16059	// Check if FullValue is 1 (the result of the base pattern >> 127)
16060	if (FullVal == APInt (`128`, `1`))
16061	return std::make_tuple(args: Uim, args: uint8_t{`127`});
16062
16063	return std::nullopt;
16064	}
16065
16066	/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16067	/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16068	/// LXVKQ instruction load VSX vector with a special quadword value based on an
16069	/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16070	/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16071	/// This can be used to inline the build vector constants that have the
16072	/// following patterns:
16073	///
16074	/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16075	/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16076	/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16077	/// combination of splatting and right shift instructions.
16078
16079	SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16080	SelectionDAG &DAG) const {
16081
16082	assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16083	"Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16084
16085	// This transformation is only supported if we are loading either a byte,
16086	// halfword, word, or doubleword.
16087	EVT VT = Op.getValueType();
16088	if (!(VT == MVT::v8i16 \|\| VT == MVT::v16i8 \|\| VT == MVT::v4i32 \|\|
16089	VT == MVT::v2i64))
16090	return SDValue ();
16091
16092	LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16093	<< VT.getEVTString() << "): ";
16094	Op->dump());
16095
16096	unsigned NumElems = VT.getVectorNumElements();
16097	unsigned ElemBits = VT.getScalarSizeInBits();
16098
16099	bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16100
16101	// Check for Non-constant operand in the build vector.
16102	for (const SDValue &Operand : Op.getNode()->op_values()) {
16103	if (!isa<ConstantSDNode>(Val: Operand))
16104	return SDValue ();
16105	}
16106
16107	// Assemble build vector operands as a 128-bit register value
16108	// We need to reconstruct what the 128-bit register pattern would be
16109	// that produces this vector when interpreted with the current endianness
16110	APInt FullVal = APInt::getZero(numBits: `128`);
16111
16112	for (unsigned Index = `0`; Index < NumElems; ++Index) {
16113	auto *C = cast<ConstantSDNode>(Val: Op.getOperand(i: Index));
16114
16115	// Get element value as raw bits (zero-extended)
16116	uint64_t ElemValue = C->getZExtValue();
16117
16118	// Mask to element size to ensure we only get the relevant bits
16119	if (ElemBits < `64`)
16120	ElemValue &= ((`1ULL` << ElemBits) - `1`);
16121
16122	// Calculate bit position for this element in the 128-bit register
16123	unsigned BitPos =
16124	(IsLittleEndian) ? (Index * ElemBits) : (`128` - (Index + `1`) * ElemBits);
16125
16126	// Create APInt for the element value and shift it to correct position
16127	APInt ElemAPInt(`128`, ElemValue);
16128	ElemAPInt <<= BitPos;
16129
16130	// Place the element value at the correct bit position
16131	FullVal \|= ElemAPInt;
16132	}
16133
16134	if (FullVal.isZero() \|\| FullVal.isAllOnes())
16135	return SDValue ();
16136
16137	if (auto UIMOpt = getPatternInfo(FullVal)) {
16138	const auto &[Uim, ShiftAmount] = *UIMOpt;
16139	SDLoc Dl(Op);
16140
16141	// Generate LXVKQ instruction if the shift amount is zero.
16142	if (ShiftAmount == `0`) {
16143	SDValue UimVal = DAG.getTargetConstant(Val: Uim, DL: Dl, VT: MVT::i32);
16144	SDValue LxvkqInstr =
16145	SDValue (DAG.getMachineNode(Opcode: PPC::LXVKQ, dl: Dl, VT, Op1: UimVal), `0`);
16146	LLVM_DEBUG(llvm::dbgs()
16147	<< "combineBVLoadsSpecialValue: Instruction Emitted ";
16148	LxvkqInstr.dump());
16149	return LxvkqInstr;
16150	}
16151
16152	assert(ShiftAmount == `127` && "Unexpected lxvkq shift amount value");
16153
16154	// The right shifted pattern can be constructed using a combination of
16155	// XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16156	// 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16157	// value 255.
16158	SDValue ShiftAmountVec =
16159	SDValue (DAG.getMachineNode(Opcode: PPC::XXSPLTIB, dl: Dl, VT: MVT::v4i32,
16160	Op1: DAG.getTargetConstant(Val: `255`, DL: Dl, VT: MVT::i32)),
16161	`0`);
16162	// Generate appropriate right shift instruction
16163	SDValue ShiftVec = SDValue (
16164	DAG.getMachineNode(Opcode: PPC::VSRQ, dl: Dl, VT, Op1: ShiftAmountVec, Op2: ShiftAmountVec),
16165	`0`);
16166	LLVM_DEBUG(llvm::dbgs()
16167	<< "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16168	ShiftVec.dump());
16169	return ShiftVec;
16170	}
16171	// No patterns matched for build vectors.
16172	return SDValue ();
16173	}
16174
16175	/// Reduce the number of loads when building a vector.
16176	///
16177	/// Building a vector out of multiple loads can be converted to a load
16178	/// of the vector type if the loads are consecutive. If the loads are
16179	/// consecutive but in descending order, a shuffle is added at the end
16180	/// to reorder the vector.
16181	static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
16182	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16183	"Should be called with a BUILD_VECTOR node");
16184
16185	SDLoc dl(N);
16186
16187	// Return early for non byte-sized type, as they can't be consecutive.
16188	if (!N->getValueType(ResNo: `0`).getVectorElementType().isByteSized())
16189	return SDValue ();
16190
16191	bool InputsAreConsecutiveLoads = true;
16192	bool InputsAreReverseConsecutive = true;
16193	unsigned ElemSize = N->getValueType(ResNo: `0`).getScalarType().getStoreSize();
16194	SDValue FirstInput = N->getOperand(Num: `0`);
16195	bool IsRoundOfExtLoad = false;
16196	LoadSDNode FirstLoad = nullptr*;
16197
16198	if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16199	FirstInput.getOperand(i: `0`).getOpcode() == ISD::LOAD) {
16200	FirstLoad = cast<LoadSDNode>(Val: FirstInput.getOperand(i: `0`));
16201	IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16202	}
16203	// Not a build vector of (possibly fp_rounded) loads.
16204	if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) \|\|
16205	N->getNumOperands() == `1`)
16206	return SDValue ();
16207
16208	if (!IsRoundOfExtLoad)
16209	FirstLoad = cast<LoadSDNode>(Val&: FirstInput);
16210
16211	SmallVector<LoadSDNode *, `4`> InputLoads;
16212	InputLoads.push_back(Elt: FirstLoad);
16213	for (int i = `1`, e = N->getNumOperands(); i < e; ++i) {
16214	// If any inputs are fp_round(extload), they all must be.
16215	if (IsRoundOfExtLoad && N->getOperand(Num: i).getOpcode() != ISD::FP_ROUND)
16216	return SDValue ();
16217
16218	SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(Num: i).getOperand(i: `0`) :
16219	N->getOperand(Num: i);
16220	if (NextInput.getOpcode() != ISD::LOAD)
16221	return SDValue ();
16222
16223	SDValue PreviousInput =
16224	IsRoundOfExtLoad ? N->getOperand(Num: i-`1`).getOperand(i: `0`) : N->getOperand(Num: i-`1`);
16225	LoadSDNode *LD1 = cast<LoadSDNode>(Val&: PreviousInput);
16226	LoadSDNode *LD2 = cast<LoadSDNode>(Val&: NextInput);
16227
16228	// If any inputs are fp_round(extload), they all must be.
16229	if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16230	return SDValue ();
16231
16232	// We only care about regular loads. The PPC-specific load intrinsics
16233	// will not lead to a merge opportunity.
16234	if (!DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: ElemSize, Dist: `1`))
16235	InputsAreConsecutiveLoads = false;
16236	if (!DAG.areNonVolatileConsecutiveLoads(LD: LD1, Base: LD2, Bytes: ElemSize, Dist: `1`))
16237	InputsAreReverseConsecutive = false;
16238
16239	// Exit early if the loads are neither consecutive nor reverse consecutive.
16240	if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16241	return SDValue ();
16242	InputLoads.push_back(Elt: LD2);
16243	}
16244
16245	assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16246	"The loads cannot be both consecutive and reverse consecutive.");
16247
16248	SDValue WideLoad;
16249	SDValue ReturnSDVal;
16250	if (InputsAreConsecutiveLoads) {
16251	assert(FirstLoad && "Input needs to be a LoadSDNode.");
16252	WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl, Chain: FirstLoad->getChain(),
16253	Ptr: FirstLoad->getBasePtr(), PtrInfo: FirstLoad->getPointerInfo(),
16254	Alignment: FirstLoad->getAlign());
16255	ReturnSDVal = WideLoad;
16256	} else if (InputsAreReverseConsecutive) {
16257	LoadSDNode *LastLoad = InputLoads.back();
16258	assert(LastLoad && "Input needs to be a LoadSDNode.");
16259	WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl, Chain: LastLoad->getChain(),
16260	Ptr: LastLoad->getBasePtr(), PtrInfo: LastLoad->getPointerInfo(),
16261	Alignment: LastLoad->getAlign());
16262	SmallVector<int, `16`> Ops;
16263	for (int i = N->getNumOperands() - `1`; i >= `0`; i--)
16264	Ops.push_back(Elt: i);
16265
16266	ReturnSDVal = DAG.getVectorShuffle(VT: N->getValueType(ResNo: `0`), dl, N1: WideLoad,
16267	N2: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)), Mask: Ops);
16268	} else
16269	return SDValue ();
16270
16271	for (auto *LD : InputLoads)
16272	DAG.makeEquivalentMemoryOrdering(OldLoad: LD, NewMemOp: WideLoad);
16273	return ReturnSDVal;
16274	}
16275
16276	// This function adds the required vector_shuffle needed to get
16277	// the elements of the vector extract in the correct position
16278	// as specified by the CorrectElems encoding.
16279	static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
16280	SDValue Input, uint64_t Elems,
16281	uint64_t CorrectElems) {
16282	SDLoc dl(N);
16283
16284	unsigned NumElems = Input.getValueType().getVectorNumElements();
16285	SmallVector<int, `16`> ShuffleMask(NumElems, -`1`);
16286
16287	// Knowing the element indices being extracted from the original
16288	// vector and the order in which they're being inserted, just put
16289	// them at element indices required for the instruction.
16290	for (unsigned i = `0`; i < N->getNumOperands(); i++) {
16291	if (DAG.getDataLayout().isLittleEndian())
16292	ShuffleMask [CorrectElems & `0xF`] = Elems & `0xF`;
16293	else
16294	ShuffleMask [(CorrectElems & `0xF0`) >> `4`] = (Elems & `0xF0`) >> `4`;
16295	CorrectElems = CorrectElems >> `8`;
16296	Elems = Elems >> `8`;
16297	}
16298
16299	SDValue Shuffle =
16300	DAG.getVectorShuffle(VT: Input.getValueType(), dl, N1: Input,
16301	N2: DAG.getUNDEF(VT: Input.getValueType()), Mask: ShuffleMask);
16302
16303	EVT VT = N->getValueType(ResNo: `0`);
16304	SDValue Conv = DAG.getBitcast(VT, V: Shuffle);
16305
16306	EVT ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(),
16307	VT: Input.getValueType().getVectorElementType(),
16308	NumElements: VT.getVectorNumElements());
16309	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT, N1: Conv,
16310	N2: DAG.getValueType(ExtVT));
16311	}
16312
16313	// Look for build vector patterns where input operands come from sign
16314	// extended vector_extract elements of specific indices. If the correct indices
16315	// aren't used, add a vector shuffle to fix up the indices and create
16316	// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16317	// during instruction selection.
16318	static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
16319	// This array encodes the indices that the vector sign extend instructions
16320	// extract from when extending from one type to another for both BE and LE.
16321	// The right nibble of each byte corresponds to the LE incides.
16322	// and the left nibble of each byte corresponds to the BE incides.
16323	// For example: 0x3074B8FC byte->word
16324	// For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16325	// For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16326	// For example: 0x000070F8 byte->double word
16327	// For LE: the allowed indices are: 0x0,0x8
16328	// For BE: the allowed indices are: 0x7,0xF
16329	uint64_t TargetElems[] = {
16330	`0x3074B8FC`, // b->w
16331	`0x000070F8`, // b->d
16332	`0x10325476`, // h->w
16333	`0x00003074`, // h->d
16334	`0x00001032`, // w->d
16335	};
16336
16337	uint64_t Elems = `0`;
16338	int Index;
16339	SDValue Input;
16340
16341	auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16342	if (!Op)
16343	return false;
16344	if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16345	Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16346	return false;
16347
16348	// A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16349	// of the right width.
16350	SDValue Extract = Op.getOperand(i: `0`);
16351	if (Extract.getOpcode() == ISD::ANY_EXTEND)
16352	Extract = Extract.getOperand(i: `0`);
16353	if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16354	return false;
16355
16356	ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: `1`));
16357	if (!ExtOp)
16358	return false;
16359
16360	Index = ExtOp->getZExtValue();
16361	if (Input && Input != Extract.getOperand(i: `0`))
16362	return false;
16363
16364	if (!Input)
16365	Input = Extract.getOperand(i: `0`);
16366
16367	Elems = Elems << `8`;
16368	Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << `4`;
16369	Elems \|= Index;
16370
16371	return true;
16372	};
16373
16374	// If the build vector operands aren't sign extended vector extracts,
16375	// of the same input vector, then return.
16376	for (unsigned i = `0`; i < N->getNumOperands(); i++) {
16377	if (!isSExtOfVecExtract (N->getOperand(Num: i))) {
16378	return SDValue ();
16379	}
16380	}
16381
16382	// If the vector extract indices are not correct, add the appropriate
16383	// vector_shuffle.
16384	int TgtElemArrayIdx;
16385	int InputSize = Input.getValueType().getScalarSizeInBits();
16386	int OutputSize = N->getValueType(ResNo: `0`).getScalarSizeInBits();
16387	if (InputSize + OutputSize == `40`)
16388	TgtElemArrayIdx = `0`;
16389	else if (InputSize + OutputSize == `72`)
16390	TgtElemArrayIdx = `1`;
16391	else if (InputSize + OutputSize == `48`)
16392	TgtElemArrayIdx = `2`;
16393	else if (InputSize + OutputSize == `80`)
16394	TgtElemArrayIdx = `3`;
16395	else if (InputSize + OutputSize == `96`)
16396	TgtElemArrayIdx = `4`;
16397	else
16398	return SDValue ();
16399
16400	uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16401	CorrectElems = DAG.getDataLayout().isLittleEndian()
16402	? CorrectElems & `0x0F0F0F0F0F0F0F0F`
16403	: CorrectElems & `0xF0F0F0F0F0F0F0F0`;
16404	if (Elems != CorrectElems) {
16405	return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16406	}
16407
16408	// Regular lowering will catch cases where a shuffle is not needed.
16409	return SDValue ();
16410	}
16411
16412	// Look for the pattern of a load from a narrow width to i128, feeding
16413	// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16414	// (LXVRZX). This node represents a zero extending load that will be matched
16415	// to the Load VSX Vector Rightmost instructions.
16416	static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
16417	SDLoc DL(N);
16418
16419	// This combine is only eligible for a BUILD_VECTOR of v1i128.
16420	if (N->getValueType(ResNo: `0`) != MVT::v1i128)
16421	return SDValue ();
16422
16423	SDValue Operand = N->getOperand(Num: `0`);
16424	// Proceed with the transformation if the operand to the BUILD_VECTOR
16425	// is a load instruction.
16426	if (Operand.getOpcode() != ISD::LOAD)
16427	return SDValue ();
16428
16429	auto *LD = cast<LoadSDNode>(Val&: Operand);
16430	EVT MemoryType = LD->getMemoryVT();
16431
16432	// This transformation is only valid if the we are loading either a byte,
16433	// halfword, word, or doubleword.
16434	bool ValidLDType = MemoryType == MVT::i8 \|\| MemoryType == MVT::i16 \|\|
16435	MemoryType == MVT::i32 \|\| MemoryType == MVT::i64;
16436
16437	// Ensure that the load from the narrow width is being zero extended to i128.
16438	if (!ValidLDType \|\|
16439	(LD->getExtensionType() != ISD::ZEXTLOAD &&
16440	LD->getExtensionType() != ISD::EXTLOAD))
16441	return SDValue ();
16442
16443	SDValue LoadOps[] = {
16444	LD->getChain(), LD->getBasePtr(),
16445	DAG.getIntPtrConstant(Val: MemoryType.getScalarSizeInBits(), DL)};
16446
16447	return DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVRZX, dl: DL,
16448	VTList: DAG.getVTList(VT1: MVT::v1i128, VT2: MVT::Other),
16449	Ops: LoadOps, MemVT: MemoryType, MMO: LD->getMemOperand());
16450	}
16451
16452	SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16453	DAGCombinerInfo &DCI) const {
16454	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16455	"Should be called with a BUILD_VECTOR node");
16456
16457	SelectionDAG &DAG = DCI.DAG;
16458	SDLoc dl(N);
16459
16460	if (!Subtarget.hasVSX())
16461	return SDValue ();
16462
16463	// The target independent DAG combiner will leave a build_vector of
16464	// float-to-int conversions intact. We can generate MUCH better code for
16465	// a float-to-int conversion of a vector of floats.
16466	SDValue FirstInput = N->getOperand(Num: `0`);
16467	if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16468	SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16469	if (Reduced)
16470	return Reduced;
16471	}
16472
16473	// If we're building a vector out of consecutive loads, just load that
16474	// vector type.
16475	SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16476	if (Reduced)
16477	return Reduced;
16478
16479	// If we're building a vector out of extended elements from another vector
16480	// we have P9 vector integer extend instructions. The code assumes legal
16481	// input types (i.e. it can't handle things like v4i16) so do not run before
16482	// legalization.
16483	if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16484	Reduced = combineBVOfVecSExt(N, DAG);
16485	if (Reduced)
16486	return Reduced;
16487	}
16488
16489	// On Power10, the Load VSX Vector Rightmost instructions can be utilized
16490	// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16491	// is a load from <valid narrow width> to i128.
16492	if (Subtarget.isISA3_1()) {
16493	SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16494	if (BVOfZLoad)
16495	return BVOfZLoad;
16496	}
16497
16498	if (N->getValueType(ResNo: `0`) != MVT::v2f64)
16499	return SDValue ();
16500
16501	// Looking for:
16502	// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16503	if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16504	FirstInput.getOpcode() != ISD::UINT_TO_FP)
16505	return SDValue ();
16506	if (N->getOperand(Num: `1`).getOpcode() != ISD::SINT_TO_FP &&
16507	N->getOperand(Num: `1`).getOpcode() != ISD::UINT_TO_FP)
16508	return SDValue ();
16509	if (FirstInput.getOpcode() != N->getOperand(Num: `1`).getOpcode())
16510	return SDValue ();
16511
16512	SDValue Ext1 = FirstInput.getOperand(i: `0`);
16513	SDValue Ext2 = N->getOperand(Num: `1`).getOperand(i: `0`);
16514	if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
16515	Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16516	return SDValue ();
16517
16518	ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Val: Ext1.getOperand(i: `1`));
16519	ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Val: Ext2.getOperand(i: `1`));
16520	if (!Ext1Op \|\| !Ext2Op)
16521	return SDValue ();
16522	if (Ext1.getOperand(i: `0`).getValueType() != MVT::v4i32 \|\|
16523	Ext1.getOperand(i: `0`) != Ext2.getOperand(i: `0`))
16524	return SDValue ();
16525
16526	int FirstElem = Ext1Op->getZExtValue();
16527	int SecondElem = Ext2Op->getZExtValue();
16528	int SubvecIdx;
16529	if (FirstElem == `0` && SecondElem == `1`)
16530	SubvecIdx = Subtarget.isLittleEndian() ? `1` : `0`;
16531	else if (FirstElem == `2` && SecondElem == `3`)
16532	SubvecIdx = Subtarget.isLittleEndian() ? `0` : `1`;
16533	else
16534	return SDValue ();
16535
16536	SDValue SrcVec = Ext1.getOperand(i: `0`);
16537	auto NodeType = (N->getOperand(Num: `1`).getOpcode() == ISD::SINT_TO_FP) ?
16538	PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16539	return DAG.getNode(Opcode: NodeType, DL: dl, VT: MVT::v2f64,
16540	N1: SrcVec, N2: DAG.getIntPtrConstant(Val: SubvecIdx, DL: dl));
16541	}
16542
16543	SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16544	DAGCombinerInfo &DCI) const {
16545	assert((N->getOpcode() == ISD::SINT_TO_FP \|\|
16546	N->getOpcode() == ISD::UINT_TO_FP) &&
16547	"Need an int -> FP conversion node here");
16548
16549	if (useSoftFloat() \|\| !Subtarget.has64BitSupport())
16550	return SDValue ();
16551
16552	SelectionDAG &DAG = DCI.DAG;
16553	SDLoc dl(N);
16554	SDValue Op(N, `0`);
16555
16556	// Don't handle ppc_fp128 here or conversions that are out-of-range capable
16557	// from the hardware.
16558	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16559	return SDValue ();
16560	if (!Op.getOperand(i: `0`).getValueType().isSimple())
16561	return SDValue ();
16562	if (Op.getOperand(i: `0`).getValueType().getSimpleVT() <= MVT (MVT::i1) \|\|
16563	Op.getOperand(i: `0`).getValueType().getSimpleVT() > MVT (MVT::i64))
16564	return SDValue ();
16565
16566	SDValue FirstOperand(Op.getOperand(i: `0`));
16567	bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16568	(FirstOperand.getValueType() == MVT::i8 \|\|
16569	FirstOperand.getValueType() == MVT::i16);
16570	if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16571	bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16572	bool DstDouble = Op.getValueType() == MVT::f64;
16573	unsigned ConvOp = Signed ?
16574	(DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16575	(DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16576	SDValue WidthConst =
16577	DAG.getIntPtrConstant(Val: FirstOperand.getValueType() == MVT::i8 ? `1` : `2`,
16578	DL: dl, isTarget: false);
16579	LoadSDNode *LDN = cast<LoadSDNode>(Val: FirstOperand.getNode());
16580	SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16581	SDValue Ld = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXSIZX, dl,
16582	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
16583	Ops, MemVT: MVT::i8, MMO: LDN->getMemOperand());
16584	DAG.makeEquivalentMemoryOrdering(OldLoad: LDN, NewMemOp: Ld);
16585
16586	// For signed conversion, we need to sign-extend the value in the VSR
16587	if (Signed) {
16588	SDValue ExtOps[] = { Ld, WidthConst };
16589	SDValue Ext = DAG.getNode(Opcode: PPCISD::VEXTS, DL: dl, VT: MVT::f64, Ops: ExtOps);
16590	return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ext);
16591	} else
16592	return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ld);
16593	}
16594
16595
16596	// For i32 intermediate values, unfortunately, the conversion functions
16597	// leave the upper 32 bits of the value are undefined. Within the set of
16598	// scalar instructions, we have no method for zero- or sign-extending the
16599	// value. Thus, we cannot handle i32 intermediate values here.
16600	if (Op.getOperand(i: `0`).getValueType() == MVT::i32)
16601	return SDValue ();
16602
16603	assert((Op.getOpcode() == ISD::SINT_TO_FP \|\| Subtarget.hasFPCVT()) &&
16604	"UINT_TO_FP is supported only with FPCVT");
16605
16606	// If we have FCFIDS, then use it when converting to single-precision.
16607	// Otherwise, convert to double-precision and then round.
16608	unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16609	? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16610	: PPCISD::FCFIDS)
16611	: (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16612	: PPCISD::FCFID);
16613	MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16614	? MVT::f32
16615	: MVT::f64;
16616
16617	// If we're converting from a float, to an int, and back to a float again,
16618	// then we don't need the store/load pair at all.
16619	if ((Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_UINT &&
16620	Subtarget.hasFPCVT()) \|\|
16621	(Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_SINT)) {
16622	SDValue Src = Op.getOperand(i: `0`).getOperand(i: `0`);
16623	if (Src.getValueType() == MVT::f32) {
16624	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
16625	DCI.AddToWorklist(N: Src.getNode());
16626	} else if (Src.getValueType() != MVT::f64) {
16627	// Make sure that we don't pick up a ppc_fp128 source value.
16628	return SDValue ();
16629	}
16630
16631	unsigned FCTOp =
16632	Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16633	PPCISD::FCTIDUZ;
16634
16635	SDValue Tmp = DAG.getNode(Opcode: FCTOp, DL: dl, VT: MVT::f64, Operand: Src);
16636	SDValue FP = DAG.getNode(Opcode: FCFOp, DL: dl, VT: FCFTy, Operand: Tmp);
16637
16638	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16639	FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
16640	N2: DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true));
16641	DCI.AddToWorklist(N: FP.getNode());
16642	}
16643
16644	return FP;
16645	}
16646
16647	return SDValue ();
16648	}
16649
16650	// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16651	// builtins) into loads with swaps.
16652	SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
16653	DAGCombinerInfo &DCI) const {
16654	// Delay VSX load for LE combine until after LegalizeOps to prioritize other
16655	// load combines.
16656	if (DCI.isBeforeLegalizeOps())
16657	return SDValue ();
16658
16659	SelectionDAG &DAG = DCI.DAG;
16660	SDLoc dl(N);
16661	SDValue Chain;
16662	SDValue Base;
16663	MachineMemOperand *MMO;
16664
16665	switch (N->getOpcode()) {
16666	default:
16667	llvm_unreachable("Unexpected opcode for little endian VSX load");
16668	case ISD::LOAD: {
16669	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16670	Chain = LD->getChain();
16671	Base = LD->getBasePtr();
16672	MMO = LD->getMemOperand();
16673	// If the MMO suggests this isn't a load of a full vector, leave
16674	// things alone. For a built-in, we have to make the change for
16675	// correctness, so if there is a size problem that will be a bug.
16676	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < `16`)
16677	return SDValue ();
16678	break;
16679	}
16680	case ISD::INTRINSIC_W_CHAIN: {
16681	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16682	Chain = Intrin->getChain();
16683	// Similarly to the store case below, Intrin->getBasePtr() doesn't get
16684	// us what we want. Get operand 2 instead.
16685	Base = Intrin->getOperand(Num: `2`);
16686	MMO = Intrin->getMemOperand();
16687	break;
16688	}
16689	}
16690
16691	MVT VecTy = N->getValueType(ResNo: `0`).getSimpleVT();
16692
16693	SDValue LoadOps[] = { Chain, Base };
16694	SDValue Load = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVD2X, dl,
16695	VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other),
16696	Ops: LoadOps, MemVT: MVT::v2f64, MMO);
16697
16698	DCI.AddToWorklist(N: Load.getNode());
16699	Chain = Load.getValue(R: `1`);
16700	SDValue Swap = DAG.getNode(
16701	Opcode: PPCISD::XXSWAPD, DL: dl, VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Load);
16702	DCI.AddToWorklist(N: Swap.getNode());
16703
16704	// Add a bitcast if the resulting load type doesn't match v2f64.
16705	if (VecTy != MVT::v2f64) {
16706	SDValue N = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: Swap);
16707	DCI.AddToWorklist(N: N.getNode());
16708	// Package {bitcast value, swap's chain} to match Load's shape.
16709	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VT1: VecTy, VT2: MVT::Other),
16710	N1: N, N2: Swap.getValue(R: `1`));
16711	}
16712
16713	return Swap;
16714	}
16715
16716	// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16717	// builtins) into stores with swaps.
16718	SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
16719	DAGCombinerInfo &DCI) const {
16720	// Delay VSX store for LE combine until after LegalizeOps to prioritize other
16721	// store combines.
16722	if (DCI.isBeforeLegalizeOps())
16723	return SDValue ();
16724
16725	SelectionDAG &DAG = DCI.DAG;
16726	SDLoc dl(N);
16727	SDValue Chain;
16728	SDValue Base;
16729	unsigned SrcOpnd;
16730	MachineMemOperand *MMO;
16731
16732	switch (N->getOpcode()) {
16733	default:
16734	llvm_unreachable("Unexpected opcode for little endian VSX store");
16735	case ISD::STORE: {
16736	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
16737	Chain = ST->getChain();
16738	Base = ST->getBasePtr();
16739	MMO = ST->getMemOperand();
16740	SrcOpnd = `1`;
16741	// If the MMO suggests this isn't a store of a full vector, leave
16742	// things alone. For a built-in, we have to make the change for
16743	// correctness, so if there is a size problem that will be a bug.
16744	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < `16`)
16745	return SDValue ();
16746	break;
16747	}
16748	case ISD::INTRINSIC_VOID: {
16749	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16750	Chain = Intrin->getChain();
16751	// Intrin->getBasePtr() oddly does not get what we want.
16752	Base = Intrin->getOperand(Num: `3`);
16753	MMO = Intrin->getMemOperand();
16754	SrcOpnd = `2`;
16755	break;
16756	}
16757	}
16758
16759	SDValue Src = N->getOperand(Num: SrcOpnd);
16760	MVT VecTy = Src.getValueType().getSimpleVT();
16761
16762	// All stores are done as v2f64 and possible bit cast.
16763	if (VecTy != MVT::v2f64) {
16764	Src = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: Src);
16765	DCI.AddToWorklist(N: Src.getNode());
16766	}
16767
16768	SDValue Swap = DAG.getNode(Opcode: PPCISD::XXSWAPD, DL: dl,
16769	VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Src);
16770	DCI.AddToWorklist(N: Swap.getNode());
16771	Chain = Swap.getValue(R: `1`);
16772	SDValue StoreOps[] = { Chain, Swap, Base };
16773	SDValue Store = DAG.getMemIntrinsicNode(Opcode: PPCISD::STXVD2X, dl,
16774	VTList: DAG.getVTList(VT: MVT::Other),
16775	Ops: StoreOps, MemVT: VecTy, MMO);
16776	DCI.AddToWorklist(N: Store.getNode());
16777	return Store;
16778	}
16779
16780	// Handle DAG combine for STORE (FP_TO_INT F).
16781	SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16782	DAGCombinerInfo &DCI) const {
16783	SelectionDAG &DAG = DCI.DAG;
16784	SDLoc dl(N);
16785	unsigned Opcode = N->getOperand(Num: `1`).getOpcode();
16786	(void)Opcode;
16787	bool Strict = N->getOperand(Num: `1`)->isStrictFPOpcode();
16788
16789	assert((Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
16790	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT)
16791	&& "Not a FP_TO_INT Instruction!");
16792
16793	SDValue Val = N->getOperand(Num: `1`).getOperand(i: Strict ? `1` : `0`);
16794	EVT Op1VT = N->getOperand(Num: `1`).getValueType();
16795	EVT ResVT = Val.getValueType();
16796
16797	if (!Subtarget.hasVSX() \|\| !Subtarget.hasFPCVT() \|\| !isTypeLegal(VT: ResVT))
16798	return SDValue ();
16799
16800	// Only perform combine for conversion to i64/i32 or power9 i16/i8.
16801	bool ValidTypeForStoreFltAsInt =
16802	(Op1VT == MVT::i32 \|\| (Op1VT == MVT::i64 && Subtarget.isPPC64()) \|\|
16803	(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 \|\| Op1VT == MVT::i8)));
16804
16805	// TODO: Lower conversion from f128 on all VSX targets
16806	if (ResVT == MVT::ppcf128 \|\| (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16807	return SDValue ();
16808
16809	if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) \|\|
16810	cast<StoreSDNode>(Val: N)->isTruncatingStore() \|\| !ValidTypeForStoreFltAsInt)
16811	return SDValue ();
16812
16813	Val = convertFPToInt(Op: N->getOperand(Num: `1`), DAG, Subtarget);
16814
16815	// Set number of bytes being converted.
16816	unsigned ByteSize = Op1VT.getScalarSizeInBits() / `8`;
16817	SDValue Ops[] = {N->getOperand(Num: `0`), Val, N->getOperand(Num: `2`),
16818	DAG.getIntPtrConstant(Val: ByteSize, DL: dl, isTarget: false),
16819	DAG.getValueType(Op1VT)};
16820
16821	Val = DAG.getMemIntrinsicNode(Opcode: PPCISD::ST_VSR_SCAL_INT, dl,
16822	VTList: DAG.getVTList(VT: MVT::Other), Ops,
16823	MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16824	MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16825
16826	return Val;
16827	}
16828
16829	static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16830	// Check that the source of the element keeps flipping
16831	// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16832	bool PrevElemFromFirstVec = Mask [`0`] < NumElts;
16833	for (int i = `1`, e = Mask.size(); i < e; i++) {
16834	if (PrevElemFromFirstVec && Mask [i] < NumElts)
16835	return false;
16836	if (!PrevElemFromFirstVec && Mask [i] >= NumElts)
16837	return false;
16838	PrevElemFromFirstVec = !PrevElemFromFirstVec;
16839	}
16840	return true;
16841	}
16842
16843	static bool isSplatBV(SDValue Op) {
16844	if (Op.getOpcode() != ISD::BUILD_VECTOR)
16845	return false;
16846	SDValue FirstOp;
16847
16848	// Find first non-undef input.
16849	for (int i = `0`, e = Op.getNumOperands(); i < e; i++) {
16850	FirstOp = Op.getOperand(i);
16851	if (!FirstOp.isUndef())
16852	break;
16853	}
16854
16855	// All inputs are undef or the same as the first non-undef input.
16856	for (int i = `1`, e = Op.getNumOperands(); i < e; i++)
16857	if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16858	return false;
16859	return true;
16860	}
16861
16862	static SDValue isScalarToVec(SDValue Op) {
16863	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16864	return Op;
16865	if (Op.getOpcode() != ISD::BITCAST)
16866	return SDValue ();
16867	Op = Op.getOperand(i: `0`);
16868	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16869	return Op;
16870	return SDValue ();
16871	}
16872
16873	// Fix up the shuffle mask to account for the fact that the result of
16874	// scalar_to_vector is not in lane zero. This just takes all values in
16875	// the ranges specified by the min/max indices and adds the number of
16876	// elements required to ensure each element comes from the respective
16877	// position in the valid lane.
16878	// On little endian, that's just the corresponding element in the other
16879	// half of the vector. On big endian, it is in the same half but right
16880	// justified rather than left justified in that half.
16881	static void fixupShuffleMaskForPermutedSToV(
16882	SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16883	int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16884	unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16885	int LHSEltFixup =
16886	Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16887	int RHSEltFixup =
16888	Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16889	for (int I = `0`, E = ShuffV.size(); I < E; ++I) {
16890	int Idx = ShuffV [I];
16891	if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16892	ShuffV [I] += LHSEltFixup;
16893	else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16894	ShuffV [I] += RHSEltFixup;
16895	}
16896	}
16897
16898	// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16899	// the original is:
16900	// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16901	// In such a case, just change the shuffle mask to extract the element
16902	// from the permuted index.
16903	static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
16904	const PPCSubtarget &Subtarget) {
16905	SDLoc dl(OrigSToV);
16906	EVT VT = OrigSToV.getValueType();
16907	assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16908	"Expecting a SCALAR_TO_VECTOR here");
16909	SDValue Input = OrigSToV.getOperand(i: `0`);
16910
16911	if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16912	ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Val: Input.getOperand(i: `1`));
16913	SDValue OrigVector = Input.getOperand(i: `0`);
16914
16915	// Can't handle non-const element indices or different vector types
16916	// for the input to the extract and the output of the scalar_to_vector.
16917	if (Idx && VT == OrigVector.getValueType()) {
16918	unsigned NumElts = VT.getVectorNumElements();
16919	assert(
16920	NumElts > `1` &&
16921	"Cannot produce a permuted scalar_to_vector for one element vector");
16922	SmallVector<int, `16`> NewMask(NumElts, -`1`);
16923	unsigned ResultInElt = NumElts / `2`;
16924	ResultInElt -= Subtarget.isLittleEndian() ? `0` : `1`;
16925	NewMask [ResultInElt] = Idx->getZExtValue();
16926	return DAG.getVectorShuffle(VT, dl, N1: OrigVector, N2: OrigVector, Mask: NewMask);
16927	}
16928	}
16929	return DAG.getNode(Opcode: PPCISD::SCALAR_TO_VECTOR_PERMUTED, DL: dl, VT,
16930	Operand: OrigSToV.getOperand(i: `0`));
16931	}
16932
16933	static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
16934	int HalfVec, int LHSLastElementDefined,
16935	int RHSLastElementDefined) {
16936	for (int Index : ShuffV) {
16937	if (Index < `0`) // Skip explicitly undefined mask indices.
16938	continue;
16939	// Handle first input vector of the vector_shuffle.
16940	if ((LHSLastElementDefined >= `0`) && (Index < HalfVec) &&
16941	(Index > LHSLastElementDefined))
16942	return false;
16943	// Handle second input vector of the vector_shuffle.
16944	if ((RHSLastElementDefined >= `0`) &&
16945	(Index > HalfVec + RHSLastElementDefined))
16946	return false;
16947	}
16948	return true;
16949	}
16950
16951	static SDValue generateSToVPermutedForVecShuffle(
16952	int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16953	int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16954	SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16955	EVT VecShuffOperandType = VecShuffOperand.getValueType();
16956	// Set up the values for the shuffle vector fixup.
16957	NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16958	// The last element depends on if the input comes from the LHS or RHS.
16959	//
16960	// For example:
16961	// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16962	//
16963	// For the LHS: The last element that comes from the LHS is actually 0, not 3
16964	// because elements 1 and higher of a scalar_to_vector are undefined.
16965	// For the RHS: The last element that comes from the RHS is actually 5, not 7
16966	// because elements 1 and higher of a scalar_to_vector are undefined.
16967	// It is also not 4 because the original scalar_to_vector is wider and
16968	// actually contains two i32 elements.
16969	LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16970	? ScalarSize / ShuffleEltWidth - `1` + FirstElt
16971	: FirstElt;
16972	SDValue SToVPermuted = getSToVPermuted(OrigSToV: SToVNode, DAG, Subtarget);
16973	if (SToVPermuted.getValueType() != VecShuffOperandType)
16974	SToVPermuted = DAG.getBitcast(VT: VecShuffOperandType, V: SToVPermuted);
16975	return SToVPermuted;
16976	}
16977
16978	// On little endian subtargets, combine shuffles such as:
16979	// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16980	// into:
16981	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16982	// because the latter can be matched to a single instruction merge.
16983	// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16984	// to put the value into element zero. Adjust the shuffle mask so that the
16985	// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16986	// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16987	// nodes with elements smaller than doubleword because all the ways
16988	// of getting scalar data into a vector register put the value in the
16989	// rightmost element of the left half of the vector.
16990	SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16991	SelectionDAG &DAG) const {
16992	SDValue LHS = SVN->getOperand(Num: `0`);
16993	SDValue RHS = SVN->getOperand(Num: `1`);
16994	auto Mask = SVN->getMask();
16995	int NumElts = LHS.getValueType().getVectorNumElements();
16996	SDValue Res(SVN, `0`);
16997	SDLoc dl(SVN);
16998	bool IsLittleEndian = Subtarget.isLittleEndian();
16999
17000	// On big endian targets this is only useful for subtargets with direct moves.
17001	// On little endian targets it would be useful for all subtargets with VSX.
17002	// However adding special handling for LE subtargets without direct moves
17003	// would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17004	// which includes direct moves.
17005	if (!Subtarget.hasDirectMove())
17006	return Res;
17007
17008	// If this is not a shuffle of a shuffle and the first element comes from
17009	// the second vector, canonicalize to the commuted form. This will make it
17010	// more likely to match one of the single instruction patterns.
17011	if (Mask [`0`] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17012	RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17013	std::swap(a&: LHS, b&: RHS);
17014	Res = DAG.getCommutedVectorShuffle(SV: *SVN);
17015
17016	if (!isa<ShuffleVectorSDNode>(Val: Res))
17017	return Res;
17018
17019	Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
17020	}
17021
17022	// Adjust the shuffle mask if either input vector comes from a
17023	// SCALAR_TO_VECTOR and keep the respective input vector in permuted
17024	// form (to prevent the need for a swap).
17025	SmallVector<int, `16`> ShuffV(Mask);
17026	SDValue SToVLHS = isScalarToVec(Op: LHS);
17027	SDValue SToVRHS = isScalarToVec(Op: RHS);
17028	if (SToVLHS \|\| SToVRHS) {
17029	EVT VT = SVN->getValueType(ResNo: `0`);
17030	uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17031	int ShuffleNumElts = ShuffV.size();
17032	int HalfVec = ShuffleNumElts / `2`;
17033	// The width of the "valid lane" (i.e. the lane that contains the value that
17034	// is vectorized) needs to be expressed in terms of the number of elements
17035	// of the shuffle. It is thereby the ratio of the values before and after
17036	// any bitcast, which will be set later on if the LHS or RHS are
17037	// SCALAR_TO_VECTOR nodes.
17038	unsigned LHSNumValidElts = HalfVec;
17039	unsigned RHSNumValidElts = HalfVec;
17040
17041	// Initially assume that neither input is permuted. These will be adjusted
17042	// accordingly if either input is. Note, that -1 means that all elements
17043	// are undefined.
17044	int LHSFirstElt = `0`;
17045	int RHSFirstElt = ShuffleNumElts;
17046	int LHSLastElt = -`1`;
17047	int RHSLastElt = -`1`;
17048
17049	// Get the permuted scalar to vector nodes for the source(s) that come from
17050	// ISD::SCALAR_TO_VECTOR.
17051	// On big endian systems, this only makes sense for element sizes smaller
17052	// than 64 bits since for 64-bit elements, all instructions already put
17053	// the value into element zero. Since scalar size of LHS and RHS may differ
17054	// after isScalarToVec, this should be checked using their own sizes.
17055	int LHSScalarSize = `0`;
17056	int RHSScalarSize = `0`;
17057	if (SToVLHS) {
17058	LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17059	if (!IsLittleEndian && LHSScalarSize >= `64`)
17060	return Res;
17061	}
17062	if (SToVRHS) {
17063	RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17064	if (!IsLittleEndian && RHSScalarSize >= `64`)
17065	return Res;
17066	}
17067	if (LHSScalarSize != `0`)
17068	LHS = generateSToVPermutedForVecShuffle(
17069	ScalarSize: LHSScalarSize, ShuffleEltWidth, NumValidElts&: LHSNumValidElts, FirstElt: LHSFirstElt,
17070	LastElt&: LHSLastElt, VecShuffOperand: LHS, SToVNode: SToVLHS, DAG, Subtarget);
17071	if (RHSScalarSize != `0`)
17072	RHS = generateSToVPermutedForVecShuffle(
17073	ScalarSize: RHSScalarSize, ShuffleEltWidth, NumValidElts&: RHSNumValidElts, FirstElt: RHSFirstElt,
17074	LastElt&: RHSLastElt, VecShuffOperand: RHS, SToVNode: SToVRHS, DAG, Subtarget);
17075
17076	if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElementDefined: LHSLastElt, RHSLastElementDefined: RHSLastElt))
17077	return Res;
17078
17079	// Fix up the shuffle mask to reflect where the desired element actually is.
17080	// The minimum and maximum indices that correspond to element zero for both
17081	// the LHS and RHS are computed and will control which shuffle mask entries
17082	// are to be changed. For example, if the RHS is permuted, any shuffle mask
17083	// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17084	fixupShuffleMaskForPermutedSToV(
17085	ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17086	LHSNumValidElts, RHSNumValidElts, Subtarget);
17087	Res = DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: `0`), dl, N1: LHS, N2: RHS, Mask: ShuffV);
17088
17089	// We may have simplified away the shuffle. We won't be able to do anything
17090	// further with it here.
17091	if (!isa<ShuffleVectorSDNode>(Val: Res))
17092	return Res;
17093	Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
17094	}
17095
17096	SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17097	// The common case after we commuted the shuffle is that the RHS is a splat
17098	// and we have elements coming in from the splat at indices that are not
17099	// conducive to using a merge.
17100	// Example:
17101	// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17102	if (!isSplatBV(Op: TheSplat))
17103	return Res;
17104
17105	// We are looking for a mask such that all even elements are from
17106	// one vector and all odd elements from the other.
17107	if (!isAlternatingShuffMask(Mask, NumElts))
17108	return Res;
17109
17110	// Adjust the mask so we are pulling in the same index from the splat
17111	// as the index from the interesting vector in consecutive elements.
17112	if (IsLittleEndian) {
17113	// Example (even elements from first vector):
17114	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17115	if (Mask [`0`] < NumElts)
17116	for (int i = `1`, e = Mask.size(); i < e; i += `2`) {
17117	if (ShuffV [i] < `0`)
17118	continue;
17119	// If element from non-splat is undef, pick first element from splat.
17120	ShuffV [i] = (ShuffV [i - `1`] >= `0` ? ShuffV [i - `1`] : `0`) + NumElts;
17121	}
17122	// Example (odd elements from first vector):
17123	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17124	else
17125	for (int i = `0`, e = Mask.size(); i < e; i += `2`) {
17126	if (ShuffV [i] < `0`)
17127	continue;
17128	// If element from non-splat is undef, pick first element from splat.
17129	ShuffV [i] = (ShuffV [i + `1`] >= `0` ? ShuffV [i + `1`] : `0`) + NumElts;
17130	}
17131	} else {
17132	// Example (even elements from first vector):
17133	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17134	if (Mask [`0`] < NumElts)
17135	for (int i = `0`, e = Mask.size(); i < e; i += `2`) {
17136	if (ShuffV [i] < `0`)
17137	continue;
17138	// If element from non-splat is undef, pick first element from splat.
17139	ShuffV [i] = ShuffV [i + `1`] >= `0` ? ShuffV [i + `1`] - NumElts : `0`;
17140	}
17141	// Example (odd elements from first vector):
17142	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17143	else
17144	for (int i = `1`, e = Mask.size(); i < e; i += `2`) {
17145	if (ShuffV [i] < `0`)
17146	continue;
17147	// If element from non-splat is undef, pick first element from splat.
17148	ShuffV [i] = ShuffV [i - `1`] >= `0` ? ShuffV [i - `1`] - NumElts : `0`;
17149	}
17150	}
17151
17152	// If the RHS has undefs, we need to remove them since we may have created
17153	// a shuffle that adds those instead of the splat value.
17154	SDValue SplatVal =
17155	cast<BuildVectorSDNode>(Val: TheSplat.getNode())->getSplatValue();
17156	TheSplat = DAG.getSplatBuildVector(VT: TheSplat.getValueType(), DL: dl, Op: SplatVal);
17157
17158	if (IsLittleEndian)
17159	RHS = TheSplat;
17160	else
17161	LHS = TheSplat;
17162	return DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: `0`), dl, N1: LHS, N2: RHS, Mask: ShuffV);
17163	}
17164
17165	SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17166	LSBaseSDNode *LSBase,
17167	DAGCombinerInfo &DCI) const {
17168	assert((ISD::isNormalLoad(LSBase) \|\| ISD::isNormalStore(LSBase)) &&
17169	"Not a reverse memop pattern!");
17170
17171	auto IsElementReverse = [](const ShuffleVectorSDNode SVN) -> bool* {
17172	auto Mask = SVN->getMask();
17173	int i = `0`;
17174	auto I = Mask.rbegin();
17175	auto E = Mask.rend();
17176
17177	for (; I != E; ++I) {
17178	if (*I != i)
17179	return false;
17180	i++;
17181	}
17182	return true;
17183	};
17184
17185	SelectionDAG &DAG = DCI.DAG;
17186	EVT VT = SVN->getValueType(ResNo: `0`);
17187
17188	if (!isTypeLegal(VT) \|\| !Subtarget.isLittleEndian() \|\| !Subtarget.hasVSX())
17189	return SDValue ();
17190
17191	// Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17192	// See comment in PPCVSXSwapRemoval.cpp.
17193	// It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17194	if (!Subtarget.hasP9Vector())
17195	return SDValue ();
17196
17197	if(!IsElementReverse (SVN))
17198	return SDValue ();
17199
17200	if (LSBase->getOpcode() == ISD::LOAD) {
17201	// If the load return value 0 has more than one user except the
17202	// shufflevector instruction, it is not profitable to replace the
17203	// shufflevector with a reverse load.
17204	for (SDUse &Use : LSBase->uses())
17205	if (Use.getResNo() == `0` &&
17206	Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17207	return SDValue ();
17208
17209	SDLoc dl(LSBase);
17210	SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17211	return DAG.getMemIntrinsicNode(
17212	Opcode: PPCISD::LOAD_VEC_BE, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops: LoadOps,
17213	MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
17214	}
17215
17216	if (LSBase->getOpcode() == ISD::STORE) {
17217	// If there are other uses of the shuffle, the swap cannot be avoided.
17218	// Forcing the use of an X-Form (since swapped stores only have
17219	// X-Forms) without removing the swap is unprofitable.
17220	if (!SVN->hasOneUse())
17221	return SDValue ();
17222
17223	SDLoc dl(LSBase);
17224	SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(Num: `0`),
17225	LSBase->getBasePtr()};
17226	return DAG.getMemIntrinsicNode(
17227	Opcode: PPCISD::STORE_VEC_BE, dl, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps,
17228	MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
17229	}
17230
17231	llvm_unreachable("Expected a load or store node here");
17232	}
17233
17234	static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17235	unsigned IntrinsicID = Intrin.getConstantOperandVal(i: `1`);
17236	if (IntrinsicID == Intrinsic::ppc_stdcx)
17237	StoreWidth = `8`;
17238	else if (IntrinsicID == Intrinsic::ppc_stwcx)
17239	StoreWidth = `4`;
17240	else if (IntrinsicID == Intrinsic::ppc_sthcx)
17241	StoreWidth = `2`;
17242	else if (IntrinsicID == Intrinsic::ppc_stbcx)
17243	StoreWidth = `1`;
17244	else
17245	return false;
17246	return true;
17247	}
17248
17249	static SDValue DAGCombineAddc(SDNode *N,
17250	llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
17251	if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(Value: `1`)) {
17252	// (ADDC (ADDE 0, 0, C), -1) -> C
17253	SDValue LHS = N->getOperand(Num: `0`);
17254	SDValue RHS = N->getOperand(Num: `1`);
17255	if (LHS ->getOpcode() == PPCISD::ADDE &&
17256	isNullConstant(V: LHS ->getOperand(Num: `0`)) &&
17257	isNullConstant(V: LHS ->getOperand(Num: `1`)) && isAllOnesConstant(V: RHS)) {
17258	return DCI.CombineTo(N, Res0: SDValue (N, `0`), Res1: LHS ->getOperand(Num: `2`));
17259	}
17260	}
17261	return SDValue ();
17262	}
17263
17264	// Optimize zero-extension of setcc when the compared value is known to be 0
17265	// or 1.
17266	//
17267	// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17268	// -> zext(xor(Value, 1)) for seteq
17269	// -> zext(Value) for setne
17270	//
17271	// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17272	// by keeping the value in its original i32 type throughout.
17273	//
17274	// Example:
17275	// Before: zext(setcc(test_data_class(...), 0, seteq))
17276	// // test_data_class returns 0 or 1 in i32
17277	// // setcc converts i32 -> i1
17278	// // zext converts i1 -> i64
17279	// After: zext(xor(test_data_class(...), 1))
17280	// // Stays in i32, then extends to i64
17281	//
17282	// This is beneficial because:
17283	// 1. Eliminates the setcc instruction
17284	// 2. Avoids i32 -> i1 truncation
17285	// 3. Keeps computation in native integer width
17286
17287	static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
17288	// Check if this is a zero_extend
17289	if (N->getOpcode() != ISD::ZERO_EXTEND)
17290	return SDValue ();
17291
17292	SDValue Src = N->getOperand(Num: `0`);
17293
17294	// Check if the source is a setcc
17295	if (Src.getOpcode() != ISD::SETCC)
17296	return SDValue ();
17297
17298	SDValue LHS = Src.getOperand(i: `0`);
17299	SDValue RHS = Src.getOperand(i: `1`);
17300	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Src.getOperand(i: `2`))->get();
17301
17302	if (!isNullConstant(V: RHS) && !isNullConstant(V: LHS))
17303	return SDValue ();
17304
17305	SDValue NonNullConstant = isNullConstant(V: RHS) ? LHS : RHS;
17306
17307	auto isZeroOrOne = [=](SDValue &V) {
17308	if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17309	V.getConstantOperandVal(i: `0`) == Intrinsic::ppc_test_data_class)
17310	return true;
17311	return false;
17312	};
17313
17314	if (!isZeroOrOne (NonNullConstant))
17315	return SDValue ();
17316
17317	// Check for pattern: zext(setcc (Value), 0, seteq)) or
17318	// zext(setcc (Value), 0, setne))
17319	if (CC == ISD::SETEQ \|\| CC == ISD::SETNE) {
17320	// Replace with: zext(xor(Value, 1)) for seteq
17321	// or: zext(Value) for setne
17322	// This keeps the value in i32 instead of converting to i1
17323	SDLoc DL(N);
17324	EVT VType = N->getValueType(ResNo: `0`);
17325	SDValue NewNonNullConstant = DAG.getZExtOrTrunc(Op: NonNullConstant, DL, VT: VType);
17326
17327	if (CC == ISD::SETNE)
17328	return NewNonNullConstant;
17329
17330	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VType);
17331	return DAG.getNode(Opcode: ISD::XOR, DL, VT: VType, N1: NewNonNullConstant, N2: One);
17332	}
17333
17334	return SDValue ();
17335	}
17336
17337	// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17338	// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17339	// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17340	// 1, cc))
17341	// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17342	// 0, 1, cc))
17343	// 4. etc
17344	static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
17345	assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17346
17347	EVT XorVT = N->getValueType(ResNo: `0`);
17348	if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17349	return SDValue ();
17350
17351	SDValue LHS = N->getOperand(Num: `0`);
17352	SDValue RHS = N->getOperand(Num: `1`);
17353
17354	// Check for XOR with constant 1
17355	ConstantSDNode *XorConst = dyn_cast<ConstantSDNode>(Val&: RHS);
17356	if (!XorConst \|\| !XorConst->isOne()) {
17357	XorConst = dyn_cast<ConstantSDNode>(Val&: LHS);
17358	if (!XorConst \|\| !XorConst->isOne())
17359	return SDValue ();
17360	// Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17361	std::swap(a&: LHS, b&: RHS);
17362	}
17363
17364	// Check if LHS has only one use
17365	if (!LHS.hasOneUse())
17366	return SDValue ();
17367
17368	// Handle extensions: ZEXT, ANYEXT
17369	SDValue SelectNode = LHS;
17370
17371	if (LHS.getOpcode() == ISD::ZERO_EXTEND \|\|
17372	LHS.getOpcode() == ISD::ANY_EXTEND) {
17373	SelectNode = LHS.getOperand(i: `0`);
17374
17375	// Check if the extension input has only one use
17376	if (!SelectNode.hasOneUse())
17377	return SDValue ();
17378	}
17379
17380	// Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17381	if (!SelectNode.isMachineOpcode())
17382	return SDValue ();
17383
17384	unsigned MachineOpc = SelectNode.getMachineOpcode();
17385
17386	// Handle both SELECT_CC_I4 and SELECT_CC_I8
17387	if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17388	return SDValue ();
17389
17390	// SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17391	if (SelectNode.getNumOperands() != `4`)
17392	return SDValue ();
17393
17394	ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(Val: SelectNode.getOperand(i: `1`));
17395	ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(Val: SelectNode.getOperand(i: `2`));
17396
17397	if (!ConstOp1 \|\| !ConstOp2)
17398	return SDValue ();
17399
17400	// Only optimize if operands are {0, 1} or {1, 0}
17401	if (!((ConstOp1->isOne() && ConstOp2->isZero()) \|\|
17402	(ConstOp1->isZero() && ConstOp2->isOne())))
17403	return SDValue ();
17404
17405	// Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17406	// eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17407	// SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17408	// create SELECT_CC(cond, 1, 0, pred).
17409	SDLoc DL(N);
17410	MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17411
17412	bool ConstOp1IsOne = ConstOp1->isOne();
17413	return SDValue (
17414	DAG.getMachineNode(Opcode: MachineOpc, dl: DL, VT: XorVT,
17415	Ops: {SelectNode.getOperand(i: `0`),
17416	DAG.getConstant(Val: ConstOp1IsOne ? `0` : `1`, DL, VT: XorVT),
17417	DAG.getConstant(Val: ConstOp1IsOne ? `1` : `0`, DL, VT: XorVT),
17418	SelectNode.getOperand(i: `3`)}),
17419	`0`);
17420	}
17421
17422	SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
17423	DAGCombinerInfo &DCI) const {
17424	SelectionDAG &DAG = DCI.DAG;
17425	SDLoc dl(N);
17426	switch (N->getOpcode()) {
17427	default: break;
17428	case ISD::ADD:
17429	return combineADD(N, DCI);
17430	case ISD::AND: {
17431	// We don't want (and (zext (shift...)), C) if C fits in the width of the
17432	// original input as that will prevent us from selecting optimal rotates.
17433	// This only matters if the input to the extend is i32 widened to i64.
17434	SDValue Op1 = N->getOperand(Num: `0`);
17435	SDValue Op2 = N->getOperand(Num: `1`);
17436	if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17437	Op1.getOpcode() != ISD::ANY_EXTEND) \|\|
17438	!isa<ConstantSDNode>(Val: Op2) \|\| N->getValueType(ResNo: `0`) != MVT::i64 \|\|
17439	Op1.getOperand(i: `0`).getValueType() != MVT::i32)
17440	break;
17441	SDValue NarrowOp = Op1.getOperand(i: `0`);
17442	if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17443	NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17444	break;
17445
17446	uint64_t Imm = Op2 ->getAsZExtVal();
17447	// Make sure that the constant is narrow enough to fit in the narrow type.
17448	if (!isUInt<`32`>(x: Imm))
17449	break;
17450	SDValue ConstOp = DAG.getConstant(Val: Imm, DL: dl, VT: MVT::i32);
17451	SDValue NarrowAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: NarrowOp, N2: ConstOp);
17452	return DAG.getZExtOrTrunc(Op: NarrowAnd, DL: dl, VT: N->getValueType(ResNo: `0`));
17453	}
17454	case ISD::XOR: {
17455	// Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17456	if (SDValue V = combineXorSelectCC(N, DAG))
17457	return V;
17458	break;
17459	}
17460	case ISD::SHL:
17461	return combineSHL(N, DCI);
17462	case ISD::SRA:
17463	return combineSRA(N, DCI);
17464	case ISD::SRL:
17465	return combineSRL(N, DCI);
17466	case ISD::MUL:
17467	return combineMUL(N, DCI);
17468	case ISD::FMA:
17469	case PPCISD::FNMSUB:
17470	return combineFMALike(N, DCI);
17471	case PPCISD::SHL:
17472	if (isNullConstant(V: N->getOperand(Num: `0`))) // 0 << V -> 0.
17473	return N->getOperand(Num: `0`);
17474	break;
17475	case PPCISD::SRL:
17476	if (isNullConstant(V: N->getOperand(Num: `0`))) // 0 >>u V -> 0.
17477	return N->getOperand(Num: `0`);
17478	break;
17479	case PPCISD::SRA:
17480	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `0`))) {
17481	if (C->isZero() \|\| // 0 >>s V -> 0.
17482	C->isAllOnes()) // -1 >>s V -> -1.
17483	return N->getOperand(Num: `0`);
17484	}
17485	break;
17486	case ISD::ZERO_EXTEND:
17487	if (SDValue RetV = combineZextSetccWithZero(N, DAG&: DCI.DAG))
17488	return RetV;
17489	[[fallthrough]];
17490	case ISD::SIGN_EXTEND:
17491	case ISD::ANY_EXTEND:
17492	return DAGCombineExtBoolTrunc(N, DCI);
17493	case ISD::TRUNCATE:
17494	return combineTRUNCATE(N, DCI);
17495	case ISD::SETCC:
17496	if (SDValue CSCC = combineSetCC(N, DCI))
17497	return CSCC;
17498	[[fallthrough]];
17499	case ISD::SELECT_CC:
17500	return DAGCombineTruncBoolExt(N, DCI);
17501	case ISD::SINT_TO_FP:
17502	case ISD::UINT_TO_FP:
17503	return combineFPToIntToFP(N, DCI);
17504	case ISD::VECTOR_SHUFFLE:
17505	if (ISD::isNormalLoad(N: N->getOperand(Num: `0`).getNode())) {
17506	LSBaseSDNode* LSBase = cast<LSBaseSDNode>(Val: N->getOperand(Num: `0`));
17507	return combineVReverseMemOP(SVN: cast<ShuffleVectorSDNode>(Val: N), LSBase, DCI);
17508	}
17509	return combineVectorShuffle(SVN: cast<ShuffleVectorSDNode>(Val: N), DAG&: DCI.DAG);
17510	case ISD::STORE: {
17511
17512	EVT Op1VT = N->getOperand(Num: `1`).getValueType();
17513	unsigned Opcode = N->getOperand(Num: `1`).getOpcode();
17514
17515	if (Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
17516	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT) {
17517	SDValue Val = combineStoreFPToInt(N, DCI);
17518	if (Val)
17519	return Val;
17520	}
17521
17522	if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17523	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: `1`));
17524	SDValue Val= combineVReverseMemOP(SVN, LSBase: cast<LSBaseSDNode>(Val: N), DCI);
17525	if (Val)
17526	return Val;
17527	}
17528
17529	// Turn STORE (BSWAP) -> sthbrx/stwbrx.
17530	if (cast<StoreSDNode>(Val: N)->isUnindexed() && Opcode == ISD::BSWAP &&
17531	N->getOperand(Num: `1`).getNode()->hasOneUse() &&
17532	(Op1VT == MVT::i32 \|\| Op1VT == MVT::i16 \|\|
17533	(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17534
17535	// STBRX can only handle simple types and it makes no sense to store less
17536	// two bytes in byte-reversed order.
17537	EVT mVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
17538	if (mVT.isExtended() \|\| mVT.getSizeInBits() < `16`)
17539	break;
17540
17541	SDValue BSwapOp = N->getOperand(Num: `1`).getOperand(i: `0`);
17542	// Do an any-extend to 32-bits if this is a half-word input.
17543	if (BSwapOp.getValueType() == MVT::i16)
17544	BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: BSwapOp);
17545
17546	// If the type of BSWAP operand is wider than stored memory width
17547	// it need to be shifted to the right side before STBRX.
17548	if (Op1VT.bitsGT(VT: mVT)) {
17549	int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17550	BSwapOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: Op1VT, N1: BSwapOp,
17551	N2: DAG.getConstant(Val: Shift, DL: dl, VT: MVT::i32));
17552	// Need to truncate if this is a bswap of i64 stored as i32/i16.
17553	if (Op1VT == MVT::i64)
17554	BSwapOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: BSwapOp);
17555	}
17556
17557	SDValue Ops[] = {
17558	N->getOperand(Num: `0`), BSwapOp, N->getOperand(Num: `2`), DAG.getValueType(mVT)
17559	};
17560	return
17561	DAG.getMemIntrinsicNode(Opcode: PPCISD::STBRX, dl, VTList: DAG.getVTList(VT: MVT::Other),
17562	Ops, MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
17563	MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
17564	}
17565
17566	// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17567	// So it can increase the chance of CSE constant construction.
17568	if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17569	isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)) && Op1VT == MVT::i32) {
17570	// Need to sign-extended to 64-bits to handle negative values.
17571	EVT MemVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
17572	uint64_t Val64 = SignExtend64(X: N->getConstantOperandVal(Num: `1`),
17573	B: MemVT.getSizeInBits());
17574	SDValue Const64 = DAG.getConstant(Val: Val64, DL: dl, VT: MVT::i64);
17575
17576	auto *ST = cast<StoreSDNode>(Val: N);
17577	SDValue NewST = DAG.getStore(Chain: ST->getChain(), dl, Val: Const64,
17578	Ptr: ST->getBasePtr(), Offset: ST->getOffset(), SVT: MemVT,
17579	MMO: ST->getMemOperand(), AM: ST->getAddressingMode(),
17580	/IsTruncating=/true);
17581	// Note we use CombineTo here to prevent DAGCombiner from visiting the
17582	// new store which will change the constant by removing non-demanded bits.
17583	return ST->isUnindexed()
17584	? DCI.CombineTo(N, Res: NewST, /AddTo=/false)
17585	: DCI.CombineTo(N, Res0: NewST, Res1: NewST.getValue(R: `1`), /AddTo=/false);
17586	}
17587
17588	// For little endian, VSX stores require generating xxswapd/lxvd2x.
17589	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17590	if (Op1VT.isSimple()) {
17591	MVT StoreVT = Op1VT.getSimpleVT();
17592	if (Subtarget.needsSwapsForVSXMemOps() &&
17593	(StoreVT == MVT::v2f64 \|\| StoreVT == MVT::v2i64 \|\|
17594	StoreVT == MVT::v4f32 \|\| StoreVT == MVT::v4i32))
17595	return expandVSXStoreForLE(N, DCI);
17596	}
17597	break;
17598	}
17599	case ISD::LOAD: {
17600	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
17601	EVT VT = LD->getValueType(ResNo: `0`);
17602
17603	// For little endian, VSX loads require generating lxvd2x/xxswapd.
17604	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17605	if (VT.isSimple()) {
17606	MVT LoadVT = VT.getSimpleVT();
17607	if (Subtarget.needsSwapsForVSXMemOps() &&
17608	(LoadVT == MVT::v2f64 \|\| LoadVT == MVT::v2i64 \|\|
17609	LoadVT == MVT::v4f32 \|\| LoadVT == MVT::v4i32))
17610	return expandVSXLoadForLE(N, DCI);
17611	}
17612
17613	// We sometimes end up with a 64-bit integer load, from which we extract
17614	// two single-precision floating-point numbers. This happens with
17615	// std::complex<float>, and other similar structures, because of the way we
17616	// canonicalize structure copies. However, if we lack direct moves,
17617	// then the final bitcasts from the extracted integer values to the
17618	// floating-point numbers turn into store/load pairs. Even with direct moves,
17619	// just loading the two floating-point numbers is likely better.
17620	auto ReplaceTwoFloatLoad = [&]() {
17621	if (VT != MVT::i64)
17622	return false;
17623
17624	if (LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
17625	LD->isVolatile())
17626	return false;
17627
17628	// We're looking for a sequence like this:
17629	// t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17630	// t16: i64 = srl t13, Constant:i32<32>
17631	// t17: i32 = truncate t16
17632	// t18: f32 = bitcast t17
17633	// t19: i32 = truncate t13
17634	// t20: f32 = bitcast t19
17635
17636	if (!LD->hasNUsesOfValue(NUses: `2`, Value: `0`))
17637	return false;
17638
17639	auto UI = LD->user_begin();
17640	while (UI.getUse().getResNo() != `0`) ++UI;
17641	SDNode Trunc = UI ++;
17642	while (UI.getUse().getResNo() != `0`) ++UI;
17643	SDNode RightShift = UI;
17644	if (Trunc->getOpcode() != ISD::TRUNCATE)
17645	std::swap(a&: Trunc, b&: RightShift);
17646
17647	if (Trunc->getOpcode() != ISD::TRUNCATE \|\|
17648	Trunc->getValueType(ResNo: `0`) != MVT::i32 \|\|
17649	!Trunc->hasOneUse())
17650	return false;
17651	if (RightShift->getOpcode() != ISD::SRL \|\|
17652	!isa<ConstantSDNode>(Val: RightShift->getOperand(Num: `1`)) \|\|
17653	RightShift->getConstantOperandVal(Num: `1`) != `32` \|\|
17654	!RightShift->hasOneUse())
17655	return false;
17656
17657	SDNode Trunc2 = RightShift->user_begin();
17658	if (Trunc2->getOpcode() != ISD::TRUNCATE \|\|
17659	Trunc2->getValueType(ResNo: `0`) != MVT::i32 \|\|
17660	!Trunc2->hasOneUse())
17661	return false;
17662
17663	SDNode Bitcast = Trunc->user_begin();
17664	SDNode Bitcast2 = Trunc2->user_begin();
17665
17666	if (Bitcast->getOpcode() != ISD::BITCAST \|\|
17667	Bitcast->getValueType(ResNo: `0`) != MVT::f32)
17668	return false;
17669	if (Bitcast2->getOpcode() != ISD::BITCAST \|\|
17670	Bitcast2->getValueType(ResNo: `0`) != MVT::f32)
17671	return false;
17672
17673	if (Subtarget.isLittleEndian())
17674	std::swap(a&: Bitcast, b&: Bitcast2);
17675
17676	// Bitcast has the second float (in memory-layout order) and Bitcast2
17677	// has the first one.
17678
17679	SDValue BasePtr = LD->getBasePtr();
17680	if (LD->isIndexed()) {
17681	assert(LD->getAddressingMode() == ISD::PRE_INC &&
17682	"Non-pre-inc AM on PPC?");
17683	BasePtr =
17684	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
17685	N2: LD->getOffset());
17686	}
17687
17688	auto MMOFlags =
17689	LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17690	SDValue FloatLoad = DAG.getLoad(VT: MVT::f32, dl, Chain: LD->getChain(), Ptr: BasePtr,
17691	PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign(),
17692	MMOFlags, AAInfo: LD->getAAInfo());
17693	SDValue AddPtr =
17694	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(),
17695	N1: BasePtr, N2: DAG.getIntPtrConstant(Val: `4`, DL: dl));
17696	SDValue FloatLoad2 = DAG.getLoad(
17697	VT: MVT::f32, dl, Chain: SDValue (FloatLoad.getNode(), `1`), Ptr: AddPtr,
17698	PtrInfo: LD->getPointerInfo().getWithOffset(O: `4`),
17699	Alignment: commonAlignment(A: LD->getAlign(), Offset: `4`), MMOFlags, AAInfo: LD->getAAInfo());
17700
17701	if (LD->isIndexed()) {
17702	// Note that DAGCombine should re-form any pre-increment load(s) from
17703	// what is produced here if that makes sense.
17704	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, `1`), To: BasePtr);
17705	}
17706
17707	DCI.CombineTo(N: Bitcast2, Res: FloatLoad);
17708	DCI.CombineTo(N: Bitcast, Res: FloatLoad2);
17709
17710	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, LD->isIndexed() ? `2` : `1`),
17711	To: SDValue (FloatLoad2.getNode(), `1`));
17712	return true;
17713	};
17714
17715	if (ReplaceTwoFloatLoad ())
17716	return SDValue (N, `0`);
17717
17718	EVT MemVT = LD->getMemoryVT();
17719	Type Ty = MemVT.getTypeForEVT(Context&: DAG.getContext());
17720	Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17721	if (LD->isUnindexed() && VT.isVector() &&
17722	((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17723	// P8 and later hardware should just use LOAD.
17724	!Subtarget.hasP8Vector() &&
17725	(VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
17726	VT == MVT::v4f32))) &&
17727	LD->getAlign() < ABIAlignment) {
17728	// This is a type-legal unaligned Altivec load.
17729	SDValue Chain = LD->getChain();
17730	SDValue Ptr = LD->getBasePtr();
17731	bool isLittleEndian = Subtarget.isLittleEndian();
17732
17733	// This implements the loading of unaligned vectors as described in
17734	// the venerable Apple Velocity Engine overview. Specifically:
17735	// https://developer.apple.com/hardwaredrivers/ve/alignment.html
17736	// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17737	//
17738	// The general idea is to expand a sequence of one or more unaligned
17739	// loads into an alignment-based permutation-control instruction (lvsl
17740	// or lvsr), a series of regular vector loads (which always truncate
17741	// their input address to an aligned address), and a series of
17742	// permutations. The results of these permutations are the requested
17743	// loaded values. The trick is that the last "extra" load is not taken
17744	// from the address you might suspect (sizeof(vector) bytes after the
17745	// last requested load), but rather sizeof(vector) - 1 bytes after the
17746	// last requested vector. The point of this is to avoid a page fault if
17747	// the base address happened to be aligned. This works because if the
17748	// base address is aligned, then adding less than a full vector length
17749	// will cause the last vector in the sequence to be (re)loaded.
17750	// Otherwise, the next vector will be fetched as you might suspect was
17751	// necessary.
17752
17753	// We might be able to reuse the permutation generation from
17754	// a different base address offset from this one by an aligned amount.
17755	// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17756	// optimization later.
17757	Intrinsic::ID Intr, IntrLD, IntrPerm;
17758	MVT PermCntlTy, PermTy, LDTy;
17759	Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17760	: Intrinsic::ppc_altivec_lvsl;
17761	IntrLD = Intrinsic::ppc_altivec_lvx;
17762	IntrPerm = Intrinsic::ppc_altivec_vperm;
17763	PermCntlTy = MVT::v16i8;
17764	PermTy = MVT::v4i32;
17765	LDTy = MVT::v4i32;
17766
17767	SDValue PermCntl = BuildIntrinsicOp(IID: Intr, Op: Ptr, DAG, dl, DestVT: PermCntlTy);
17768
17769	// Create the new MMO for the new base load. It is like the original MMO,
17770	// but represents an area in memory almost twice the vector size centered
17771	// on the original address. If the address is unaligned, we might start
17772	// reading up to (sizeof(vector)-1) bytes below the address of the
17773	// original unaligned load.
17774	MachineFunction &MF = DAG.getMachineFunction();
17775	MachineMemOperand *BaseMMO =
17776	MF.getMachineMemOperand(MMO: LD->getMemOperand(),
17777	Offset: -(int64_t)MemVT.getStoreSize()+`1`,
17778	Size: `2`*MemVT.getStoreSize()-`1`);
17779
17780	// Create the new base load.
17781	SDValue LDXIntID =
17782	DAG.getTargetConstant(Val: IntrLD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
17783	SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17784	SDValue BaseLoad =
17785	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
17786	VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
17787	Ops: BaseLoadOps, MemVT: LDTy, MMO: BaseMMO);
17788
17789	// Note that the value of IncOffset (which is provided to the next
17790	// load's pointer info offset value, and thus used to calculate the
17791	// alignment), and the value of IncValue (which is actually used to
17792	// increment the pointer value) are different! This is because we
17793	// require the next load to appear to be aligned, even though it
17794	// is actually offset from the base pointer by a lesser amount.
17795	int IncOffset = VT.getSizeInBits() / `8`;
17796	int IncValue = IncOffset;
17797
17798	// Walk (both up and down) the chain looking for another load at the real
17799	// (aligned) offset (the alignment of the other load does not matter in
17800	// this case). If found, then do not use the offset reduction trick, as
17801	// that will prevent the loads from being later combined (as they would
17802	// otherwise be duplicates).
17803	if (!findConsecutiveLoad(LD, DAG))
17804	--IncValue;
17805
17806	SDValue Increment =
17807	DAG.getConstant(Val: IncValue, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
17808	Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Ptr.getValueType(), N1: Ptr, N2: Increment);
17809
17810	MachineMemOperand *ExtraMMO =
17811	MF.getMachineMemOperand(MMO: LD->getMemOperand(),
17812	Offset: `1`, Size: `2`*MemVT.getStoreSize()-`1`);
17813	SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17814	SDValue ExtraLoad =
17815	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
17816	VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
17817	Ops: ExtraLoadOps, MemVT: LDTy, MMO: ExtraMMO);
17818
17819	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
17820	N1: BaseLoad.getValue(R: `1`), N2: ExtraLoad.getValue(R: `1`));
17821
17822	// Because vperm has a big-endian bias, we must reverse the order
17823	// of the input vectors and complement the permute control vector
17824	// when generating little endian code. We have already handled the
17825	// latter by using lvsr instead of lvsl, so just reverse BaseLoad
17826	// and ExtraLoad here.
17827	SDValue Perm;
17828	if (isLittleEndian)
17829	Perm = BuildIntrinsicOp(IID: IntrPerm,
17830	Op0: ExtraLoad, Op1: BaseLoad, Op2: PermCntl, DAG, dl);
17831	else
17832	Perm = BuildIntrinsicOp(IID: IntrPerm,
17833	Op0: BaseLoad, Op1: ExtraLoad, Op2: PermCntl, DAG, dl);
17834
17835	if (VT != PermTy)
17836	Perm = Subtarget.hasAltivec()
17837	? DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Perm)
17838	: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Perm,
17839	N2: DAG.getTargetConstant(Val: `1`, DL: dl, VT: MVT::i64));
17840	// second argument is 1 because this rounding
17841	// is always exact.
17842
17843	// The output of the permutation is our loaded result, the TokenFactor is
17844	// our new chain.
17845	DCI.CombineTo(N, Res0: Perm, Res1: TF);
17846	return SDValue (N, `0`);
17847	}
17848	}
17849	break;
17850	case ISD::INTRINSIC_WO_CHAIN: {
17851	bool isLittleEndian = Subtarget.isLittleEndian();
17852	unsigned IID = N->getConstantOperandVal(Num: `0`);
17853	Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17854	: Intrinsic::ppc_altivec_lvsl);
17855	if (IID == Intr && N->getOperand(Num: `1`)->getOpcode() == ISD::ADD) {
17856	SDValue Add = N->getOperand(Num: `1`);
17857
17858	int Bits = `4` / 16 byte alignment /;
17859
17860	if (DAG.MaskedValueIsZero(Op: Add ->getOperand(Num: `1`),
17861	Mask: APInt::getAllOnes(numBits: Bits / alignment /)
17862	.zext(width: Add.getScalarValueSizeInBits()))) {
17863	SDNode *BasePtr = Add ->getOperand(Num: `0`).getNode();
17864	for (SDNode *U : BasePtr->users()) {
17865	if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17866	U->getConstantOperandVal(Num: `0`) == IID) {
17867	// We've found another LVSL/LVSR, and this address is an aligned
17868	// multiple of that one. The results will be the same, so use the
17869	// one we've just found instead.
17870
17871	return SDValue (U, `0`);
17872	}
17873	}
17874	}
17875
17876	if (isa<ConstantSDNode>(Val: Add ->getOperand(Num: `1`))) {
17877	SDNode *BasePtr = Add ->getOperand(Num: `0`).getNode();
17878	for (SDNode *U : BasePtr->users()) {
17879	if (U->getOpcode() == ISD::ADD &&
17880	isa<ConstantSDNode>(Val: U->getOperand(Num: `1`)) &&
17881	(Add ->getConstantOperandVal(Num: `1`) - U->getConstantOperandVal(Num: `1`)) %
17882	(`1ULL` << Bits) ==
17883	`0`) {
17884	SDNode *OtherAdd = U;
17885	for (SDNode *V : OtherAdd->users()) {
17886	if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17887	V->getConstantOperandVal(Num: `0`) == IID) {
17888	return SDValue (V, `0`);
17889	}
17890	}
17891	}
17892	}
17893	}
17894	}
17895
17896	// Combine vmaxsw/h/b(a, a's negation) to abs(a)
17897	// Expose the vabsduw/h/b opportunity for down stream
17898	if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17899	(IID == Intrinsic::ppc_altivec_vmaxsw \|\|
17900	IID == Intrinsic::ppc_altivec_vmaxsh \|\|
17901	IID == Intrinsic::ppc_altivec_vmaxsb)) {
17902	SDValue V1 = N->getOperand(Num: `1`);
17903	SDValue V2 = N->getOperand(Num: `2`);
17904	if ((V1.getSimpleValueType() == MVT::v4i32 \|\|
17905	V1.getSimpleValueType() == MVT::v8i16 \|\|
17906	V1.getSimpleValueType() == MVT::v16i8) &&
17907	V1.getSimpleValueType() == V2.getSimpleValueType()) {
17908	// (0-a, a)
17909	if (V1.getOpcode() == ISD::SUB &&
17910	ISD::isBuildVectorAllZeros(N: V1.getOperand(i: `0`).getNode()) &&
17911	V1.getOperand(i: `1`) == V2) {
17912	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V2.getValueType(), Operand: V2);
17913	}
17914	// (a, 0-a)
17915	if (V2.getOpcode() == ISD::SUB &&
17916	ISD::isBuildVectorAllZeros(N: V2.getOperand(i: `0`).getNode()) &&
17917	V2.getOperand(i: `1`) == V1) {
17918	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17919	}
17920	// (x-y, y-x)
17921	if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17922	V1.getOperand(i: `0`) == V2.getOperand(i: `1`) &&
17923	V1.getOperand(i: `1`) == V2.getOperand(i: `0`)) {
17924	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17925	}
17926	}
17927	}
17928	}
17929
17930	break;
17931	case ISD::INTRINSIC_W_CHAIN:
17932	switch (N->getConstantOperandVal(Num: `1`)) {
17933	default:
17934	break;
17935	case Intrinsic::ppc_altivec_vsum4sbs:
17936	case Intrinsic::ppc_altivec_vsum4shs:
17937	case Intrinsic::ppc_altivec_vsum4ubs: {
17938	// These sum-across intrinsics only have a chain due to the side effect
17939	// that they may set the SAT bit. If we know the SAT bit will not be set
17940	// for some inputs, we can replace any uses of their chain with the
17941	// input chain.
17942	if (BuildVectorSDNode *BVN =
17943	dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: `3`))) {
17944	APInt APSplatBits, APSplatUndef;
17945	unsigned SplatBitSize;
17946	bool HasAnyUndefs;
17947	bool BVNIsConstantSplat = BVN->isConstantSplat(
17948	SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: `0`,
17949	isBigEndian: !Subtarget.isLittleEndian());
17950	// If the constant splat vector is 0, the SAT bit will not be set.
17951	if (BVNIsConstantSplat && APSplatBits == `0`)
17952	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: N->getOperand(Num: `0`));
17953	}
17954	return SDValue ();
17955	}
17956	case Intrinsic::ppc_vsx_lxvw4x:
17957	case Intrinsic::ppc_vsx_lxvd2x:
17958	// For little endian, VSX loads require generating lxvd2x/xxswapd.
17959	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17960	if (Subtarget.needsSwapsForVSXMemOps())
17961	return expandVSXLoadForLE(N, DCI);
17962	break;
17963	}
17964	break;
17965	case ISD::INTRINSIC_VOID:
17966	// For little endian, VSX stores require generating xxswapd/stxvd2x.
17967	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17968	if (Subtarget.needsSwapsForVSXMemOps()) {
17969	switch (N->getConstantOperandVal(Num: `1`)) {
17970	default:
17971	break;
17972	case Intrinsic::ppc_vsx_stxvw4x:
17973	case Intrinsic::ppc_vsx_stxvd2x:
17974	return expandVSXStoreForLE(N, DCI);
17975	}
17976	}
17977	break;
17978	case ISD::BSWAP: {
17979	// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17980	// For subtargets without LDBRX, we can still do better than the default
17981	// expansion even for 64-bit BSWAP (LOAD).
17982	bool Is64BitBswapOn64BitTgt =
17983	Subtarget.isPPC64() && N->getValueType(ResNo: `0`) == MVT::i64;
17984	bool IsSingleUseNormalLd = ISD::isNormalLoad(N: N->getOperand(Num: `0`).getNode()) &&
17985	N->getOperand(Num: `0`).hasOneUse();
17986	if (IsSingleUseNormalLd &&
17987	(N->getValueType(ResNo: `0`) == MVT::i32 \|\| N->getValueType(ResNo: `0`) == MVT::i16 \|\|
17988	(Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17989	SDValue Load = N->getOperand(Num: `0`);
17990	LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
17991	// Create the byte-swapping load.
17992	SDValue Ops[] = {
17993	LD->getChain(), // Chain
17994	LD->getBasePtr(), // Ptr
17995	DAG.getValueType(N->getValueType(ResNo: `0`)) // VT
17996	};
17997	SDValue BSLoad =
17998	DAG.getMemIntrinsicNode(Opcode: PPCISD::LBRX, dl,
17999	VTList: DAG.getVTList(VT1: N->getValueType(ResNo: `0`) == MVT::i64 ?
18000	MVT::i64 : MVT::i32, VT2: MVT::Other),
18001	Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
18002
18003	// If this is an i16 load, insert the truncate.
18004	SDValue ResVal = BSLoad;
18005	if (N->getValueType(ResNo: `0`) == MVT::i16)
18006	ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i16, Operand: BSLoad);
18007
18008	// First, combine the bswap away. This makes the value produced by the
18009	// load dead.
18010	DCI.CombineTo(N, Res: ResVal);
18011
18012	// Next, combine the load away, we give it a bogus result value but a real
18013	// chain result. The result value is dead because the bswap is dead.
18014	DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: `1`));
18015
18016	// Return N so it doesn't get rechecked!
18017	return SDValue (N, `0`);
18018	}
18019	// Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18020	// before legalization so that the BUILD_PAIR is handled correctly.
18021	if (!DCI.isBeforeLegalize() \|\| !Is64BitBswapOn64BitTgt \|\|
18022	!IsSingleUseNormalLd)
18023	return SDValue ();
18024	LoadSDNode *LD = cast<LoadSDNode>(Val: N->getOperand(Num: `0`));
18025
18026	// Can't split volatile or atomic loads.
18027	if (!LD->isSimple())
18028	return SDValue ();
18029	SDValue BasePtr = LD->getBasePtr();
18030	SDValue Lo = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr,
18031	PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign());
18032	Lo = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Lo);
18033	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
18034	N2: DAG.getIntPtrConstant(Val: `4`, DL: dl));
18035	MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
18036	MMO: LD->getMemOperand(), Offset: `4`, Size: `4`);
18037	SDValue Hi = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr, MMO: NewMMO);
18038	Hi = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Hi);
18039	SDValue Res;
18040	if (Subtarget.isLittleEndian())
18041	Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Hi, N2: Lo);
18042	else
18043	Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
18044	SDValue TF =
18045	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
18046	N1: Hi.getOperand(i: `0`).getValue(R: `1`), N2: Lo.getOperand(i: `0`).getValue(R: `1`));
18047	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, `1`), To: TF);
18048	return Res;
18049	}
18050	case PPCISD::VCMP:
18051	// If a VCMP_rec node already exists with exactly the same operands as this
18052	// node, use its result instead of this node (VCMP_rec computes both a CR6
18053	// and a normal output).
18054	//
18055	if (!N->getOperand(Num: `0`).hasOneUse() &&
18056	!N->getOperand(Num: `1`).hasOneUse() &&
18057	!N->getOperand(Num: `2`).hasOneUse()) {
18058
18059	// Scan all of the users of the LHS, looking for VCMP_rec's that match.
18060	SDNode VCMPrecNode = nullptr*;
18061
18062	SDNode *LHSN = N->getOperand(Num: `0`).getNode();
18063	for (SDNode *User : LHSN->users())
18064	if (User->getOpcode() == PPCISD::VCMP_rec &&
18065	User->getOperand(Num: `1`) == N->getOperand(Num: `1`) &&
18066	User->getOperand(Num: `2`) == N->getOperand(Num: `2`) &&
18067	User->getOperand(Num: `0`) == N->getOperand(Num: `0`)) {
18068	VCMPrecNode = User;
18069	break;
18070	}
18071
18072	// If there is no VCMP_rec node, or if the flag value has a single use,
18073	// don't transform this.
18074	if (!VCMPrecNode \|\| VCMPrecNode->hasNUsesOfValue(NUses: `0`, Value: `1`))
18075	break;
18076
18077	// Look at the (necessarily single) use of the flag value. If it has a
18078	// chain, this transformation is more complex. Note that multiple things
18079	// could use the value result, which we should ignore.
18080	SDNode FlagUser = nullptr*;
18081	for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18082	FlagUser == nullptr; ++UI) {
18083	assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18084	SDNode *User = UI ->getUser();
18085	for (unsigned i = `0`, e = User->getNumOperands(); i != e; ++i) {
18086	if (User->getOperand(Num: i) == SDValue (VCMPrecNode, `1`)) {
18087	FlagUser = User;
18088	break;
18089	}
18090	}
18091	}
18092
18093	// If the user is a MFOCRF instruction, we know this is safe.
18094	// Otherwise we give up for right now.
18095	if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18096	return SDValue (VCMPrecNode, `0`);
18097	}
18098	break;
18099	case ISD::BR_CC: {
18100	// If this is a branch on an altivec predicate comparison, lower this so
18101	// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18102	// lowering is done pre-legalize, because the legalizer lowers the predicate
18103	// compare down to code that is difficult to reassemble.
18104	// This code also handles branches that depend on the result of a store
18105	// conditional.
18106	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`))->get();
18107	SDValue LHS = N->getOperand(Num: `2`), RHS = N->getOperand(Num: `3`);
18108
18109	int CompareOpc;
18110	bool isDot;
18111
18112	if (!isa<ConstantSDNode>(Val: RHS) \|\| (CC != ISD::SETEQ && CC != ISD::SETNE))
18113	break;
18114
18115	// Since we are doing this pre-legalize, the RHS can be a constant of
18116	// arbitrary bitwidth which may cause issues when trying to get the value
18117	// from the underlying APInt.
18118	auto RHSAPInt = RHS ->getAsAPIntVal();
18119	if (!RHSAPInt.isIntN(N: `64`))
18120	break;
18121
18122	unsigned Val = RHSAPInt.getZExtValue();
18123	auto isImpossibleCompare = [&]() {
18124	// If this is a comparison against something other than 0/1, then we know
18125	// that the condition is never/always true.
18126	if (Val != `0` && Val != `1`) {
18127	if (CC == ISD::SETEQ) // Cond never true, remove branch.
18128	return N->getOperand(Num: `0`);
18129	// Always !=, turn it into an unconditional branch.
18130	return DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other,
18131	N1: N->getOperand(Num: `0`), N2: N->getOperand(Num: `4`));
18132	}
18133	return SDValue ();
18134	};
18135	// Combine branches fed by store conditional instructions (st[bhwd]cx).
18136	unsigned StoreWidth = `0`;
18137	if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18138	isStoreConditional(Intrin: LHS, StoreWidth)) {
18139	if (SDValue Impossible = isImpossibleCompare ())
18140	return Impossible;
18141	PPC::Predicate CompOpc;
18142	// eq 0 => ne
18143	// ne 0 => eq
18144	// eq 1 => eq
18145	// ne 1 => ne
18146	if (Val == `0`)
18147	CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18148	else
18149	CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18150
18151	SDValue Ops[] = {LHS.getOperand(i: `0`), LHS.getOperand(i: `2`), LHS.getOperand(i: `3`),
18152	DAG.getConstant(Val: StoreWidth, DL: dl, VT: MVT::i32)};
18153	auto *MemNode = cast<MemSDNode>(Val&: LHS);
18154	SDValue ConstSt = DAG.getMemIntrinsicNode(
18155	Opcode: PPCISD::STORE_COND, dl,
18156	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other, VT3: MVT::Glue), Ops,
18157	MemVT: MemNode->getMemoryVT(), MMO: MemNode->getMemOperand());
18158
18159	SDValue InChain;
18160	// Unchain the branch from the original store conditional.
18161	if (N->getOperand(Num: `0`) == LHS.getValue(R: `1`))
18162	InChain = LHS.getOperand(i: `0`);
18163	else if (N->getOperand(Num: `0`).getOpcode() == ISD::TokenFactor) {
18164	SmallVector<SDValue, `4`> InChains;
18165	SDValue InTF = N->getOperand(Num: `0`);
18166	for (int i = `0`, e = InTF.getNumOperands(); i < e; i++)
18167	if (InTF.getOperand(i) != LHS.getValue(R: `1`))
18168	InChains.push_back(Elt: InTF.getOperand(i));
18169	InChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: InChains);
18170	}
18171
18172	return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: InChain,
18173	N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
18174	N3: DAG.getRegister(Reg: PPC::CR0, VT: MVT::i32), N4: N->getOperand(Num: `4`),
18175	N5: ConstSt.getValue(R: `2`));
18176	}
18177
18178	if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18179	getVectorCompareInfo(Intrin: LHS, CompareOpc, isDot, Subtarget)) {
18180	assert(isDot && "Can't compare against a vector result!");
18181
18182	if (SDValue Impossible = isImpossibleCompare ())
18183	return Impossible;
18184
18185	bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == `0`);
18186	// Create the PPCISD altivec 'dot' comparison node.
18187	SDValue Ops[] = {
18188	LHS.getOperand(i: `2`), // LHS of compare
18189	LHS.getOperand(i: `3`), // RHS of compare
18190	DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
18191	};
18192	EVT VTs[] = { LHS.getOperand(i: `2`).getValueType(), MVT::Glue };
18193	SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
18194
18195	// Unpack the result based on how the target uses it.
18196	PPC::Predicate CompOpc;
18197	switch (LHS.getConstantOperandVal(i: `1`)) {
18198	default: // Can't happen, don't crash on invalid number though.
18199	case `0`: // Branch on the value of the EQ bit of CR6.
18200	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18201	break;
18202	case `1`: // Branch on the inverted value of the EQ bit of CR6.
18203	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18204	break;
18205	case `2`: // Branch on the value of the LT bit of CR6.
18206	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18207	break;
18208	case `3`: // Branch on the inverted value of the LT bit of CR6.
18209	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18210	break;
18211	}
18212
18213	return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: N->getOperand(Num: `0`),
18214	N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
18215	N3: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32),
18216	N4: N->getOperand(Num: `4`), N5: CompNode.getValue(R: `1`));
18217	}
18218	break;
18219	}
18220	case ISD::BUILD_VECTOR:
18221	return DAGCombineBuildVector(N, DCI);
18222	case PPCISD::ADDC:
18223	return DAGCombineAddc(N, DCI);
18224	}
18225
18226	return SDValue ();
18227	}
18228
18229	SDValue
18230	PPCTargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
18231	SelectionDAG &DAG,
18232	SmallVectorImpl<SDNode > &Created) const* {
18233	// fold (sdiv X, pow2)
18234	EVT VT = N->getValueType(ResNo: `0`);
18235	if (VT == MVT::i64 && !Subtarget.isPPC64())
18236	return SDValue ();
18237	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
18238	!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
18239	return SDValue ();
18240
18241	SDLoc DL(N);
18242	SDValue N0 = N->getOperand(Num: `0`);
18243
18244	bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18245	unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18246	SDValue ShiftAmt = DAG.getConstant(Val: Lg2, DL, VT);
18247
18248	SDValue Op = DAG.getNode(Opcode: PPCISD::SRA_ADDZE, DL, VT, N1: N0, N2: ShiftAmt);
18249	Created.push_back(Elt: Op.getNode());
18250
18251	if (IsNegPow2) {
18252	Op = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
18253	Created.push_back(Elt: Op.getNode());
18254	}
18255
18256	return Op;
18257	}
18258
18259	//===----------------------------------------------------------------------===//
18260	// Inline Assembly Support
18261	//===----------------------------------------------------------------------===//
18262
18263	void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
18264	KnownBits &Known,
18265	const APInt &DemandedElts,
18266	const SelectionDAG &DAG,
18267	unsigned Depth) const {
18268	Known.resetAll();
18269	switch (Op.getOpcode()) {
18270	default: break;
18271	case PPCISD::LBRX: {
18272	// lhbrx is known to have the top bits cleared out.
18273	if (cast<VTSDNode>(Val: Op.getOperand(i: `2`))->getVT() == MVT::i16)
18274	Known.Zero = `0xFFFF0000`;
18275	break;
18276	}
18277	case PPCISD::ADDE: {
18278	if (Op.getResNo() == `0`) {
18279	// (0\|1), _ = ADDE 0, 0, CARRY
18280	SDValue LHS = Op.getOperand(i: `0`);
18281	SDValue RHS = Op.getOperand(i: `1`);
18282	if (isNullConstant(V: LHS) && isNullConstant(V: RHS))
18283	Known.Zero = ~`1ULL`;
18284	}
18285	break;
18286	}
18287	case ISD::INTRINSIC_WO_CHAIN: {
18288	switch (Op.getConstantOperandVal(i: `0`)) {
18289	default: break;
18290	case Intrinsic::ppc_altivec_vcmpbfp_p:
18291	case Intrinsic::ppc_altivec_vcmpeqfp_p:
18292	case Intrinsic::ppc_altivec_vcmpequb_p:
18293	case Intrinsic::ppc_altivec_vcmpequh_p:
18294	case Intrinsic::ppc_altivec_vcmpequw_p:
18295	case Intrinsic::ppc_altivec_vcmpequd_p:
18296	case Intrinsic::ppc_altivec_vcmpequq_p:
18297	case Intrinsic::ppc_altivec_vcmpgefp_p:
18298	case Intrinsic::ppc_altivec_vcmpgtfp_p:
18299	case Intrinsic::ppc_altivec_vcmpgtsb_p:
18300	case Intrinsic::ppc_altivec_vcmpgtsh_p:
18301	case Intrinsic::ppc_altivec_vcmpgtsw_p:
18302	case Intrinsic::ppc_altivec_vcmpgtsd_p:
18303	case Intrinsic::ppc_altivec_vcmpgtsq_p:
18304	case Intrinsic::ppc_altivec_vcmpgtub_p:
18305	case Intrinsic::ppc_altivec_vcmpgtuh_p:
18306	case Intrinsic::ppc_altivec_vcmpgtuw_p:
18307	case Intrinsic::ppc_altivec_vcmpgtud_p:
18308	case Intrinsic::ppc_altivec_vcmpgtuq_p:
18309	Known.Zero = ~`1U`; // All bits but the low one are known to be zero.
18310	break;
18311	}
18312	break;
18313	}
18314	case ISD::INTRINSIC_W_CHAIN: {
18315	switch (Op.getConstantOperandVal(i: `1`)) {
18316	default:
18317	break;
18318	case Intrinsic::ppc_load2r:
18319	// Top bits are cleared for load2r (which is the same as lhbrx).
18320	Known.Zero = `0xFFFF0000`;
18321	break;
18322	}
18323	break;
18324	}
18325	}
18326	}
18327
18328	Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop ML) const* {
18329	switch (Subtarget.getCPUDirective()) {
18330	default: break;
18331	case PPC::DIR_970:
18332	case PPC::DIR_PWR4:
18333	case PPC::DIR_PWR5:
18334	case PPC::DIR_PWR5X:
18335	case PPC::DIR_PWR6:
18336	case PPC::DIR_PWR6X:
18337	case PPC::DIR_PWR7:
18338	case PPC::DIR_PWR8:
18339	case PPC::DIR_PWR9:
18340	case PPC::DIR_PWR10:
18341	case PPC::DIR_PWR11:
18342	case PPC::DIR_PWR_FUTURE: {
18343	if (!ML)
18344	break;
18345
18346	if (!DisableInnermostLoopAlign32) {
18347	// If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18348	// so that we can decrease cache misses and branch-prediction misses.
18349	// Actual alignment of the loop will depend on the hotness check and other
18350	// logic in alignBlocks.
18351	if (ML->getLoopDepth() > `1` && ML->getSubLoops().empty())
18352	return Align (`32`);
18353	}
18354
18355	const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18356
18357	// For small loops (between 5 and 8 instructions), align to a 32-byte
18358	// boundary so that the entire loop fits in one instruction-cache line.
18359	uint64_t LoopSize = `0`;
18360	for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18361	for (const MachineInstr &J : **I) {
18362	LoopSize += TII->getInstSizeInBytes(MI: J);
18363	if (LoopSize > `32`)
18364	break;
18365	}
18366
18367	if (LoopSize > `16` && LoopSize <= `32`)
18368	return Align (`32`);
18369
18370	break;
18371	}
18372	}
18373
18374	return TargetLowering::getPrefLoopAlignment(ML);
18375	}
18376
18377	/// getConstraintType - Given a constraint, return the type of
18378	/// constraint it is for this target.
18379	PPCTargetLowering::ConstraintType
18380	PPCTargetLowering::getConstraintType(StringRef Constraint) const {
18381	if (Constraint.size() == `1`) {
18382	switch (Constraint [`0`]) {
18383	default: break;
18384	case `'b'`:
18385	case `'r'`:
18386	case `'f'`:
18387	case `'d'`:
18388	case `'v'`:
18389	case `'y'`:
18390	return C_RegisterClass;
18391	case `'Z'`:
18392	// FIXME: While Z does indicate a memory constraint, it specifically
18393	// indicates an r+r address (used in conjunction with the 'y' modifier
18394	// in the replacement string). Currently, we're forcing the base
18395	// register to be r0 in the asm printer (which is interpreted as zero)
18396	// and forming the complete address in the second register. This is
18397	// suboptimal.
18398	return C_Memory;
18399	}
18400	} else if (Constraint == "wc") { // individual CR bits.
18401	return C_RegisterClass;
18402	} else if (Constraint == "wa" \|\| Constraint == "wd" \|\|
18403	Constraint == "wf" \|\| Constraint == "ws" \|\|
18404	Constraint == "wi" \|\| Constraint == "ww") {
18405	return C_RegisterClass; // VSX registers.
18406	}
18407	return TargetLowering::getConstraintType(Constraint);
18408	}
18409
18410	/// Examine constraint type and operand type and determine a weight value.
18411	/// This object must already have been set up with the operand type
18412	/// and the current alternative constraint selected.
18413	TargetLowering::ConstraintWeight
18414	PPCTargetLowering::getSingleConstraintMatchWeight(
18415	AsmOperandInfo &info, const char constraint) const* {
18416	ConstraintWeight weight = CW_Invalid;
18417	Value *CallOperandVal = info.CallOperandVal;
18418	// If we don't have a value, we can't do a match,
18419	// but allow it at the lowest weight.
18420	if (!CallOperandVal)
18421	return CW_Default;
18422	Type *type = CallOperandVal->getType();
18423
18424	// Look at the constraint type.
18425	if (StringRef (constraint) == "wc" && type->isIntegerTy(Bitwidth: `1`))
18426	return CW_Register; // an individual CR bit.
18427	else if ((StringRef (constraint) == "wa" \|\|
18428	StringRef (constraint) == "wd" \|\|
18429	StringRef (constraint) == "wf") &&
18430	type->isVectorTy())
18431	return CW_Register;
18432	else if (StringRef (constraint) == "wi" && type->isIntegerTy(Bitwidth: `64`))
18433	return CW_Register; // just hold 64-bit integers data.
18434	else if (StringRef (constraint) == "ws" && type->isDoubleTy())
18435	return CW_Register;
18436	else if (StringRef (constraint) == "ww" && type->isFloatTy())
18437	return CW_Register;
18438
18439	switch (*constraint) {
18440	default:
18441	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
18442	break;
18443	case `'b'`:
18444	if (type->isIntegerTy())
18445	weight = CW_Register;
18446	break;
18447	case `'f'`:
18448	if (type->isFloatTy())
18449	weight = CW_Register;
18450	break;
18451	case `'d'`:
18452	if (type->isDoubleTy())
18453	weight = CW_Register;
18454	break;
18455	case `'v'`:
18456	if (type->isVectorTy())
18457	weight = CW_Register;
18458	break;
18459	case `'y'`:
18460	weight = CW_Register;
18461	break;
18462	case `'Z'`:
18463	weight = CW_Memory;
18464	break;
18465	}
18466	return weight;
18467	}
18468
18469	std::pair<unsigned, const TargetRegisterClass *>
18470	PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
18471	StringRef Constraint,
18472	MVT VT) const {
18473	if (Constraint.size() == `1`) {
18474	// GCC RS6000 Constraint Letters
18475	switch (Constraint [`0`]) {
18476	case `'b'`: // R1-R31
18477	if (VT == MVT::i64 && Subtarget.isPPC64())
18478	return std::make_pair(x: `0U`, y: &PPC::G8RC_NOX0RegClass);
18479	return std::make_pair(x: `0U`, y: &PPC::GPRC_NOR0RegClass);
18480	case `'r'`: // R0-R31
18481	if (VT == MVT::i64 && Subtarget.isPPC64())
18482	return std::make_pair(x: `0U`, y: &PPC::G8RCRegClass);
18483	return std::make_pair(x: `0U`, y: &PPC::GPRCRegClass);
18484	// 'd' and 'f' constraints are both defined to be "the floating point
18485	// registers", where one is for 32-bit and the other for 64-bit. We don't
18486	// really care overly much here so just give them all the same reg classes.
18487	case `'d'`:
18488	case `'f'`:
18489	if (Subtarget.hasSPE()) {
18490	if (VT == MVT::f32 \|\| VT == MVT::i32)
18491	return std::make_pair(x: `0U`, y: &PPC::GPRCRegClass);
18492	if (VT == MVT::f64 \|\| VT == MVT::i64)
18493	return std::make_pair(x: `0U`, y: &PPC::SPERCRegClass);
18494	} else {
18495	if (VT == MVT::f32 \|\| VT == MVT::i32)
18496	return std::make_pair(x: `0U`, y: &PPC::F4RCRegClass);
18497	if (VT == MVT::f64 \|\| VT == MVT::i64)
18498	return std::make_pair(x: `0U`, y: &PPC::F8RCRegClass);
18499	}
18500	break;
18501	case `'v'`:
18502	if (Subtarget.hasAltivec() && VT.isVector())
18503	return std::make_pair(x: `0U`, y: &PPC::VRRCRegClass);
18504	else if (Subtarget.hasVSX())
18505	// Scalars in Altivec registers only make sense with VSX.
18506	return std::make_pair(x: `0U`, y: &PPC::VFRCRegClass);
18507	break;
18508	case `'y'`: // crrc
18509	return std::make_pair(x: `0U`, y: &PPC::CRRCRegClass);
18510	}
18511	} else if (Constraint == "wc" && Subtarget.useCRBits()) {
18512	// An individual CR bit.
18513	return std::make_pair(x: `0U`, y: &PPC::CRBITRCRegClass);
18514	} else if ((Constraint == "wa" \|\| Constraint == "wd" \|\|
18515	Constraint == "wf" \|\| Constraint == "wi") &&
18516	Subtarget.hasVSX()) {
18517	// A VSX register for either a scalar (FP) or vector. There is no
18518	// support for single precision scalars on subtargets prior to Power8.
18519	if (VT.isVector())
18520	return std::make_pair(x: `0U`, y: &PPC::VSRCRegClass);
18521	if (VT == MVT::f32 && Subtarget.hasP8Vector())
18522	return std::make_pair(x: `0U`, y: &PPC::VSSRCRegClass);
18523	return std::make_pair(x: `0U`, y: &PPC::VSFRCRegClass);
18524	} else if ((Constraint == "ws" \|\| Constraint == "ww") && Subtarget.hasVSX()) {
18525	if (VT == MVT::f32 && Subtarget.hasP8Vector())
18526	return std::make_pair(x: `0U`, y: &PPC::VSSRCRegClass);
18527	else
18528	return std::make_pair(x: `0U`, y: &PPC::VSFRCRegClass);
18529	} else if (Constraint == "lr") {
18530	if (VT == MVT::i64)
18531	return std::make_pair(x: `0U`, y: &PPC::LR8RCRegClass);
18532	else
18533	return std::make_pair(x: `0U`, y: &PPC::LRRCRegClass);
18534	}
18535
18536	// Handle special cases of physical registers that are not properly handled
18537	// by the base class.
18538	if (Constraint [`0`] == `'{'` && Constraint [Constraint.size() - `1`] == `'}'`) {
18539	// If we name a VSX register, we can't defer to the base class because it
18540	// will not recognize the correct register (their names will be VSL{0-31}
18541	// and V{0-31} so they won't match). So we match them here.
18542	if (Constraint.size() > `3` && Constraint [`1`] == `'v'` && Constraint [`2`] == `'s'`) {
18543	int VSNum = atoi(nptr: Constraint.data() + `3`);
18544	assert(VSNum >= `0` && VSNum <= `63` &&
18545	"Attempted to access a vsr out of range");
18546	if (VSNum < `32`)
18547	return std::make_pair(x: PPC::VSL0 + VSNum, y: &PPC::VSRCRegClass);
18548	return std::make_pair(x: PPC::V0 + VSNum - `32`, y: &PPC::VSRCRegClass);
18549	}
18550
18551	// For float registers, we can't defer to the base class as it will match
18552	// the SPILLTOVSRRC class.
18553	if (Constraint.size() > `3` && Constraint [`1`] == `'f'`) {
18554	int RegNum = atoi(nptr: Constraint.data() + `2`);
18555	if (RegNum > `31` \|\| RegNum < `0`)
18556	report_fatal_error(reason: "Invalid floating point register number");
18557	if (VT == MVT::f32 \|\| VT == MVT::i32)
18558	return Subtarget.hasSPE()
18559	? std::make_pair(x: PPC::R0 + RegNum, y: &PPC::GPRCRegClass)
18560	: std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F4RCRegClass);
18561	if (VT == MVT::f64 \|\| VT == MVT::i64)
18562	return Subtarget.hasSPE()
18563	? std::make_pair(x: PPC::S0 + RegNum, y: &PPC::SPERCRegClass)
18564	: std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F8RCRegClass);
18565	}
18566	}
18567
18568	std::pair<unsigned, const TargetRegisterClass *> R =
18569	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18570
18571	// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18572	// (which we call X[0-9]+). If a 64-bit value has been requested, and a
18573	// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18574	// register.
18575	// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18576	// the AsmName field from RegisterInfo.td, then this would not be necessary.*
18577	if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18578	PPC::GPRCRegClass.contains(Reg: R.first))
18579	return std::make_pair(x: TRI->getMatchingSuperReg(Reg: R.first,
18580	SubIdx: PPC::sub_32, RC: &PPC::G8RCRegClass),
18581	y: &PPC::G8RCRegClass);
18582
18583	// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18584	if (!R.second && StringRef ("{cc}").equals_insensitive(RHS: Constraint)) {
18585	R.first = PPC::CR0;
18586	R.second = &PPC::CRRCRegClass;
18587	}
18588	// FIXME: This warning should ideally be emitted in the front end.
18589	const auto &TM = getTargetMachine();
18590	if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18591	if (((R.first >= PPC::V20 && R.first <= PPC::V31) \|\|
18592	(R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18593	(R.second == &PPC::VSRCRegClass \|\| R.second == &PPC::VSFRCRegClass))
18594	errs() << "warning: vector registers 20 to 32 are reserved in the "
18595	"default AIX AltiVec ABI and cannot be used\n";
18596	}
18597
18598	return R;
18599	}
18600
18601	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18602	/// vector. If it is invalid, don't add anything to Ops.
18603	void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
18604	StringRef Constraint,
18605	std::vector<SDValue> &Ops,
18606	SelectionDAG &DAG) const {
18607	SDValue Result;
18608
18609	// Only support length 1 constraints.
18610	if (Constraint.size() > `1`)
18611	return;
18612
18613	char Letter = Constraint [`0`];
18614	switch (Letter) {
18615	default: break;
18616	case `'I'`:
18617	case `'J'`:
18618	case `'K'`:
18619	case `'L'`:
18620	case `'M'`:
18621	case `'N'`:
18622	case `'O'`:
18623	case `'P'`: {
18624	ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Val&: Op);
18625	if (!CST) return; // Must be an immediate to match.
18626	SDLoc dl(Op);
18627	int64_t Value = CST->getSExtValue();
18628	EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18629	// numbers are printed as such.
18630	switch (Letter) {
18631	default: llvm_unreachable("Unknown constraint letter!");
18632	case `'I'`: // "I" is a signed 16-bit constant.
18633	if (isInt<`16`>(x: Value))
18634	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18635	break;
18636	case `'J'`: // "J" is a constant with only the high-order 16 bits nonzero.
18637	if (isShiftedUInt<`16`, `16`>(x: Value))
18638	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18639	break;
18640	case `'L'`: // "L" is a signed 16-bit constant shifted left 16 bits.
18641	if (isShiftedInt<`16`, `16`>(x: Value))
18642	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18643	break;
18644	case `'K'`: // "K" is a constant with only the low-order 16 bits nonzero.
18645	if (isUInt<`16`>(x: Value))
18646	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18647	break;
18648	case `'M'`: // "M" is a constant that is greater than 31.
18649	if (Value > `31`)
18650	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18651	break;
18652	case `'N'`: // "N" is a positive constant that is an exact power of two.
18653	if (Value > `0` && isPowerOf2_64(Value))
18654	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18655	break;
18656	case `'O'`: // "O" is the constant zero.
18657	if (Value == `0`)
18658	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18659	break;
18660	case `'P'`: // "P" is a constant whose negation is a signed 16-bit constant.
18661	if (isInt<`16`>(x: -Value))
18662	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18663	break;
18664	}
18665	break;
18666	}
18667	}
18668
18669	if (Result.getNode()) {
18670	Ops.push_back(x: Result);
18671	return;
18672	}
18673
18674	// Handle standard constraint letters.
18675	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
18676	}
18677
18678	void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
18679	SmallVectorImpl<SDValue> &Ops,
18680	SelectionDAG &DAG) const {
18681	if (I.getNumOperands() <= `1`)
18682	return;
18683	if (!isa<ConstantSDNode>(Val: Ops [`1`].getNode()))
18684	return;
18685	auto IntrinsicID = Ops [`1`].getNode()->getAsZExtVal();
18686	if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18687	IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18688	return;
18689
18690	if (MDNode *MDN = I.getMetadata(KindID: LLVMContext::MD_annotation))
18691	Ops.push_back(Elt: DAG.getMDNode(MD: MDN));
18692	}
18693
18694	// isLegalAddressingMode - Return true if the addressing mode represented
18695	// by AM is legal for this target, for a load/store of the specified type.
18696	bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
18697	const AddrMode &AM, Type *Ty,
18698	unsigned AS,
18699	Instruction I) const* {
18700	// Vector type r+i form is supported since power9 as DQ form. We don't check
18701	// the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18702	// imm form is preferred and the offset can be adjusted to use imm form later
18703	// in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18704	// max offset to check legal addressing mode, we should be a little aggressive
18705	// to contain other offsets for that LSRUse.
18706	if (Ty->isVectorTy() && AM.BaseOffs != `0` && !Subtarget.hasP9Vector())
18707	return false;
18708
18709	// PPC allows a sign-extended 16-bit immediate field.
18710	if (AM.BaseOffs <= -(`1LL` << `16`) \|\| AM.BaseOffs >= (`1LL` << `16`)-`1`)
18711	return false;
18712
18713	// No global is ever allowed as a base.
18714	if (AM.BaseGV)
18715	return false;
18716
18717	// PPC only support r+r,
18718	switch (AM.Scale) {
18719	case `0`: // "r+i" or just "i", depending on HasBaseReg.
18720	break;
18721	case `1`:
18722	if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18723	return false;
18724	// Otherwise we have r+r or r+i.
18725	break;
18726	case `2`:
18727	if (AM.HasBaseReg \|\| AM.BaseOffs) // 2r+r or 2r+i is not allowed.
18728	return false;
18729	// Allow 2r as r+r.*
18730	break;
18731	default:
18732	// No other scales are supported.
18733	return false;
18734	}
18735
18736	return true;
18737	}
18738
18739	SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18740	SelectionDAG &DAG) const {
18741	MachineFunction &MF = DAG.getMachineFunction();
18742	MachineFrameInfo &MFI = MF.getFrameInfo();
18743	MFI.setReturnAddressIsTaken(true);
18744
18745	SDLoc dl(Op);
18746	unsigned Depth = Op.getConstantOperandVal(i: `0`);
18747
18748	// Make sure the function does not optimize away the store of the RA to
18749	// the stack.
18750	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18751	FuncInfo->setLRStoreRequired();
18752	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
18753
18754	if (Depth > `0`) {
18755	// The link register (return address) is saved in the caller's frame
18756	// not the callee's stack frame. So we must get the caller's frame
18757	// address and load the return address at the LR offset from there.
18758	SDValue FrameAddr =
18759	DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
18760	Ptr: LowerFRAMEADDR(Op, DAG), PtrInfo: MachinePointerInfo ());
18761	SDValue Offset =
18762	DAG.getConstant(Val: Subtarget.getFrameLowering()->getReturnSaveOffset(), DL: dl,
18763	VT: Subtarget.getScalarIntVT());
18764	return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(),
18765	Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FrameAddr, N2: Offset),
18766	PtrInfo: MachinePointerInfo ());
18767	}
18768
18769	// Just load the return address off the stack.
18770	SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18771	return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: RetAddrFI,
18772	PtrInfo: MachinePointerInfo ());
18773	}
18774
18775	SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18776	SelectionDAG &DAG) const {
18777	SDLoc dl(Op);
18778	unsigned Depth = Op.getConstantOperandVal(i: `0`);
18779
18780	MachineFunction &MF = DAG.getMachineFunction();
18781	MachineFrameInfo &MFI = MF.getFrameInfo();
18782	MFI.setFrameAddressIsTaken(true);
18783
18784	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
18785	bool isPPC64 = PtrVT == MVT::i64;
18786
18787	// Naked functions never have a frame pointer, and so we use r1. For all
18788	// other functions, this decision must be delayed until during PEI.
18789	unsigned FrameReg;
18790	if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
18791	FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18792	else
18793	FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18794
18795	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: FrameReg,
18796	VT: PtrVT);
18797	while (Depth--)
18798	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
18799	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
18800	return FrameAddr;
18801	}
18802
18803	#define GET_REGISTER_MATCHER
18804	#include "PPCGenAsmMatcher.inc"
18805
18806	Register PPCTargetLowering::getRegisterByName(const char *RegName, LLT VT,
18807	const MachineFunction &MF) const {
18808	bool IsPPC64 = Subtarget.isPPC64();
18809
18810	bool Is64Bit = IsPPC64 && VT == LLT::scalar(SizeInBits: `64`);
18811	if (!Is64Bit && VT != LLT::scalar(SizeInBits: `32`))
18812	report_fatal_error(reason: "Invalid register global variable type");
18813
18814	Register Reg = MatchRegisterName(Name: RegName);
18815	if (!Reg)
18816	return Reg;
18817
18818	// FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18819	// Need followup investigation as to why.
18820	if ((IsPPC64 && Reg == PPC::R2) \|\| Reg == PPC::R0)
18821	report_fatal_error(reason: Twine("Trying to reserve an invalid register \"" +
18822	StringRef (RegName) + "\"."));
18823
18824	// Convert GPR to GP8R register for 64bit.
18825	if (Is64Bit && StringRef (RegName).starts_with_insensitive(Prefix: "r"))
18826	Reg = Reg.id() - PPC::R0 + PPC::X0;
18827
18828	return Reg;
18829	}
18830
18831	bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
18832	// 32-bit SVR4 ABI access everything as got-indirect.
18833	if (Subtarget.is32BitELFABI())
18834	return true;
18835
18836	// AIX accesses everything indirectly through the TOC, which is similar to
18837	// the GOT.
18838	if (Subtarget.isAIXABI())
18839	return true;
18840
18841	CodeModel::Model CModel = getTargetMachine().getCodeModel();
18842	// If it is small or large code model, module locals are accessed
18843	// indirectly by loading their address from .toc/.got.
18844	if (CModel == CodeModel::Small \|\| CModel == CodeModel::Large)
18845	return true;
18846
18847	// JumpTable and BlockAddress are accessed as got-indirect.
18848	if (isa<JumpTableSDNode>(Val: GA) \|\| isa<BlockAddressSDNode>(Val: GA))
18849	return true;
18850
18851	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: GA))
18852	return Subtarget.isGVIndirectSymbol(GV: G->getGlobal());
18853
18854	return false;
18855	}
18856
18857	bool
18858	PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
18859	// The PowerPC target isn't yet aware of offsets.
18860	return false;
18861	}
18862
18863	void PPCTargetLowering::getTgtMemIntrinsic(
18864	SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
18865	MachineFunction &MF, unsigned Intrinsic) const {
18866	IntrinsicInfo Info;
18867	switch (Intrinsic) {
18868	case Intrinsic::ppc_atomicrmw_xchg_i128:
18869	case Intrinsic::ppc_atomicrmw_add_i128:
18870	case Intrinsic::ppc_atomicrmw_sub_i128:
18871	case Intrinsic::ppc_atomicrmw_nand_i128:
18872	case Intrinsic::ppc_atomicrmw_and_i128:
18873	case Intrinsic::ppc_atomicrmw_or_i128:
18874	case Intrinsic::ppc_atomicrmw_xor_i128:
18875	case Intrinsic::ppc_cmpxchg_i128:
18876	Info.opc = ISD::INTRINSIC_W_CHAIN;
18877	Info.memVT = MVT::i128;
18878	Info.ptrVal = I.getArgOperand(i: `0`);
18879	Info.offset = `0`;
18880	Info.align = Align (`16`);
18881	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
18882	MachineMemOperand::MOVolatile;
18883	Infos.push_back(Elt: Info);
18884	return;
18885	case Intrinsic::ppc_atomic_load_i128:
18886	Info.opc = ISD::INTRINSIC_W_CHAIN;
18887	Info.memVT = MVT::i128;
18888	Info.ptrVal = I.getArgOperand(i: `0`);
18889	Info.offset = `0`;
18890	Info.align = Align (`16`);
18891	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
18892	Infos.push_back(Elt: Info);
18893	return;
18894	case Intrinsic::ppc_atomic_store_i128:
18895	Info.opc = ISD::INTRINSIC_VOID;
18896	Info.memVT = MVT::i128;
18897	Info.ptrVal = I.getArgOperand(i: `2`);
18898	Info.offset = `0`;
18899	Info.align = Align (`16`);
18900	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
18901	Infos.push_back(Elt: Info);
18902	return;
18903	case Intrinsic::ppc_altivec_lvx:
18904	case Intrinsic::ppc_altivec_lvxl:
18905	case Intrinsic::ppc_altivec_lvebx:
18906	case Intrinsic::ppc_altivec_lvehx:
18907	case Intrinsic::ppc_altivec_lvewx:
18908	case Intrinsic::ppc_vsx_lxvd2x:
18909	case Intrinsic::ppc_vsx_lxvw4x:
18910	case Intrinsic::ppc_vsx_lxvd2x_be:
18911	case Intrinsic::ppc_vsx_lxvw4x_be:
18912	case Intrinsic::ppc_vsx_lxvl:
18913	case Intrinsic::ppc_vsx_lxvll: {
18914	EVT VT;
18915	switch (Intrinsic) {
18916	case Intrinsic::ppc_altivec_lvebx:
18917	VT = MVT::i8;
18918	break;
18919	case Intrinsic::ppc_altivec_lvehx:
18920	VT = MVT::i16;
18921	break;
18922	case Intrinsic::ppc_altivec_lvewx:
18923	VT = MVT::i32;
18924	break;
18925	case Intrinsic::ppc_vsx_lxvd2x:
18926	case Intrinsic::ppc_vsx_lxvd2x_be:
18927	VT = MVT::v2f64;
18928	break;
18929	default:
18930	VT = MVT::v4i32;
18931	break;
18932	}
18933
18934	Info.opc = ISD::INTRINSIC_W_CHAIN;
18935	Info.memVT = VT;
18936	Info.ptrVal = I.getArgOperand(i: `0`);
18937	Info.offset = -VT.getStoreSize()+`1`;
18938	Info.size = `2`*VT.getStoreSize()-`1`;
18939	Info.align = Align (`1`);
18940	Info.flags = MachineMemOperand::MOLoad;
18941	Infos.push_back(Elt: Info);
18942	return;
18943	}
18944	case Intrinsic::ppc_altivec_stvx:
18945	case Intrinsic::ppc_altivec_stvxl:
18946	case Intrinsic::ppc_altivec_stvebx:
18947	case Intrinsic::ppc_altivec_stvehx:
18948	case Intrinsic::ppc_altivec_stvewx:
18949	case Intrinsic::ppc_vsx_stxvd2x:
18950	case Intrinsic::ppc_vsx_stxvw4x:
18951	case Intrinsic::ppc_vsx_stxvd2x_be:
18952	case Intrinsic::ppc_vsx_stxvw4x_be:
18953	case Intrinsic::ppc_vsx_stxvl:
18954	case Intrinsic::ppc_vsx_stxvll: {
18955	EVT VT;
18956	switch (Intrinsic) {
18957	case Intrinsic::ppc_altivec_stvebx:
18958	VT = MVT::i8;
18959	break;
18960	case Intrinsic::ppc_altivec_stvehx:
18961	VT = MVT::i16;
18962	break;
18963	case Intrinsic::ppc_altivec_stvewx:
18964	VT = MVT::i32;
18965	break;
18966	case Intrinsic::ppc_vsx_stxvd2x:
18967	case Intrinsic::ppc_vsx_stxvd2x_be:
18968	VT = MVT::v2f64;
18969	break;
18970	default:
18971	VT = MVT::v4i32;
18972	break;
18973	}
18974
18975	Info.opc = ISD::INTRINSIC_VOID;
18976	Info.memVT = VT;
18977	Info.ptrVal = I.getArgOperand(i: `1`);
18978	Info.offset = -VT.getStoreSize()+`1`;
18979	Info.size = `2`*VT.getStoreSize()-`1`;
18980	Info.align = Align (`1`);
18981	Info.flags = MachineMemOperand::MOStore;
18982	Infos.push_back(Elt: Info);
18983	return;
18984	}
18985	case Intrinsic::ppc_stdcx:
18986	case Intrinsic::ppc_stwcx:
18987	case Intrinsic::ppc_sthcx:
18988	case Intrinsic::ppc_stbcx: {
18989	EVT VT;
18990	auto Alignment = Align (`8`);
18991	switch (Intrinsic) {
18992	case Intrinsic::ppc_stdcx:
18993	VT = MVT::i64;
18994	break;
18995	case Intrinsic::ppc_stwcx:
18996	VT = MVT::i32;
18997	Alignment = Align (`4`);
18998	break;
18999	case Intrinsic::ppc_sthcx:
19000	VT = MVT::i16;
19001	Alignment = Align (`2`);
19002	break;
19003	case Intrinsic::ppc_stbcx:
19004	VT = MVT::i8;
19005	Alignment = Align (`1`);
19006	break;
19007	}
19008	Info.opc = ISD::INTRINSIC_W_CHAIN;
19009	Info.memVT = VT;
19010	Info.ptrVal = I.getArgOperand(i: `0`);
19011	Info.offset = `0`;
19012	Info.align = Alignment;
19013	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
19014	Infos.push_back(Elt: Info);
19015	return;
19016	}
19017	default:
19018	break;
19019	}
19020	}
19021
19022	/// It returns EVT::Other if the type should be determined using generic
19023	/// target-independent logic.
19024	EVT PPCTargetLowering::getOptimalMemOpType(
19025	LLVMContext &Context, const MemOp &Op,
19026	const AttributeList &FuncAttributes) const {
19027	if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19028	// We should use Altivec/VSX loads and stores when available. For unaligned
19029	// addresses, unaligned VSX loads are only fast starting with the P8.
19030	if (Subtarget.hasAltivec() && Op.size() >= `16`) {
19031	if (Op.isMemset() && Subtarget.hasVSX()) {
19032	uint64_t TailSize = Op.size() % `16`;
19033	// For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19034	// element if vector element type matches tail store. For tail size
19035	// 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19036	if (TailSize > `2` && TailSize <= `4`) {
19037	return MVT::v8i16;
19038	}
19039	return MVT::v4i32;
19040	}
19041	if (Op.isAligned(AlignCheck: Align (`16`)) \|\| Subtarget.hasP8Vector())
19042	return MVT::v4i32;
19043	}
19044	}
19045
19046	if (Subtarget.isPPC64()) {
19047	return MVT::i64;
19048	}
19049
19050	return MVT::i32;
19051	}
19052
19053	/// Returns true if it is beneficial to convert a load of a constant
19054	/// to just the constant itself.
19055	bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
19056	Type Ty) const* {
19057	assert(Ty->isIntegerTy());
19058
19059	unsigned BitSize = Ty->getPrimitiveSizeInBits();
19060	return !(BitSize == `0` \|\| BitSize > `64`);
19061	}
19062
19063	bool PPCTargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
19064	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
19065	return false;
19066	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19067	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19068	return NumBits1 == `64` && NumBits2 == `32`;
19069	}
19070
19071	bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
19072	if (!VT1.isInteger() \|\| !VT2.isInteger())
19073	return false;
19074	unsigned NumBits1 = VT1.getSizeInBits();
19075	unsigned NumBits2 = VT2.getSizeInBits();
19076	return NumBits1 == `64` && NumBits2 == `32`;
19077	}
19078
19079	bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
19080	// Generally speaking, zexts are not free, but they are free when they can be
19081	// folded with other operations.
19082	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19083	EVT MemVT = LD->getMemoryVT();
19084	if ((MemVT == MVT::i1 \|\| MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\|
19085	(Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19086	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
19087	LD->getExtensionType() == ISD::ZEXTLOAD))
19088	return true;
19089	}
19090
19091	// FIXME: Add other cases...
19092	// - 32-bit shifts with a zext to i64
19093	// - zext after ctlz, bswap, etc.
19094	// - zext after and by a constant mask
19095
19096	return TargetLowering::isZExtFree(Val, VT2);
19097	}
19098
19099	bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19100	assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19101	"invalid fpext types");
19102	// Extending to float128 is not free.
19103	if (DestVT == MVT::f128)
19104	return false;
19105	return true;
19106	}
19107
19108	bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
19109	return isInt<`16`>(x: Imm) \|\| isUInt<`16`>(x: Imm);
19110	}
19111
19112	bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
19113	return isInt<`16`>(x: Imm) \|\| isUInt<`16`>(x: Imm);
19114	}
19115
19116	bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
19117	MachineMemOperand::Flags,
19118	unsigned Fast) const* {
19119	if (DisablePPCUnaligned)
19120	return false;
19121
19122	// PowerPC supports unaligned memory access for simple non-vector types.
19123	// Although accessing unaligned addresses is not as efficient as accessing
19124	// aligned addresses, it is generally more efficient than manual expansion,
19125	// and generally only traps for software emulation when crossing page
19126	// boundaries.
19127
19128	if (!VT.isSimple())
19129	return false;
19130
19131	if (VT.isFloatingPoint() && !VT.isVector() &&
19132	!Subtarget.allowsUnalignedFPAccess())
19133	return false;
19134
19135	if (VT.getSimpleVT().isVector()) {
19136	if (Subtarget.hasVSX()) {
19137	if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19138	VT != MVT::v4f32 && VT != MVT::v4i32)
19139	return false;
19140	} else {
19141	return false;
19142	}
19143	}
19144
19145	if (VT == MVT::ppcf128)
19146	return false;
19147
19148	if (Fast)
19149	*Fast = `1`;
19150
19151	return true;
19152	}
19153
19154	bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
19155	SDValue C) const {
19156	// Check integral scalar types.
19157	if (!VT.isScalarInteger())
19158	return false;
19159	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
19160	if (!ConstNode->getAPIntValue().isSignedIntN(N: `64`))
19161	return false;
19162	// This transformation will generate >= 2 operations. But the following
19163	// cases will generate <= 2 instructions during ISEL. So exclude them.
19164	// 1. If the constant multiplier fits 16 bits, it can be handled by one
19165	// HW instruction, ie. MULLI
19166	// 2. If the multiplier after shifted fits 16 bits, an extra shift
19167	// instruction is needed than case 1, ie. MULLI and RLDICR
19168	int64_t Imm = ConstNode->getSExtValue();
19169	unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
19170	Imm >>= Shift;
19171	if (isInt<`16`>(x: Imm))
19172	return false;
19173	uint64_t UImm = static_cast<uint64_t>(Imm);
19174	if (isPowerOf2_64(Value: UImm + `1`) \|\| isPowerOf2_64(Value: UImm - `1`) \|\|
19175	isPowerOf2_64(Value: `1` - UImm) \|\| isPowerOf2_64(Value: -`1` - UImm))
19176	return true;
19177	}
19178	return false;
19179	}
19180
19181	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19182	EVT VT) const {
19183	return isFMAFasterThanFMulAndFAdd(
19184	F: MF.getFunction(), Ty: VT.getTypeForEVT(Context&: MF.getFunction().getContext()));
19185	}
19186
19187	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
19188	Type Ty) const* {
19189	if (Subtarget.hasSPE() \|\| Subtarget.useSoftFloat())
19190	return false;
19191	switch (Ty->getScalarType()->getTypeID()) {
19192	case Type::FloatTyID:
19193	case Type::DoubleTyID:
19194	return true;
19195	case Type::FP128TyID:
19196	return Subtarget.hasP9Vector();
19197	default:
19198	return false;
19199	}
19200	}
19201
19202	// FIXME: add more patterns which are not profitable to hoist.
19203	bool PPCTargetLowering::isProfitableToHoist(Instruction I) const* {
19204	if (!I->hasOneUse())
19205	return true;
19206
19207	Instruction *User = I->user_back();
19208	assert(User && "A single use instruction with no uses.");
19209
19210	switch (I->getOpcode()) {
19211	case Instruction::FMul: {
19212	// Don't break FMA, PowerPC prefers FMA.
19213	if (User->getOpcode() != Instruction::FSub &&
19214	User->getOpcode() != Instruction::FAdd)
19215	return true;
19216
19217	const TargetOptions &Options = getTargetMachine().Options;
19218	const Function *F = I->getFunction();
19219	const DataLayout &DL = F->getDataLayout();
19220	Type *Ty = User->getOperand(i: `0`)->getType();
19221	bool AllowContract = I->getFastMathFlags().allowContract() &&
19222	User->getFastMathFlags().allowContract();
19223
19224	return !(isFMAFasterThanFMulAndFAdd(F: *F, Ty) &&
19225	isOperationLegalOrCustom(Op: ISD::FMA, VT: getValueType(DL, Ty)) &&
19226	(AllowContract \|\| Options.AllowFPOpFusion == FPOpFusion::Fast));
19227	}
19228	case Instruction::Load: {
19229	// Don't break "store (load float)" pattern, this pattern will be combined*
19230	// to "store (load int32)" in later InstCombine pass. See function
19231	// combineLoadToOperationType. On PowerPC, loading a float point takes more
19232	// cycles than loading a 32 bit integer.
19233	LoadInst *LI = cast<LoadInst>(Val: I);
19234	// For the loads that combineLoadToOperationType does nothing, like
19235	// ordered load, it should be profitable to hoist them.
19236	// For swifterror load, it can only be used for pointer to pointer type, so
19237	// later type check should get rid of this case.
19238	if (!LI->isUnordered())
19239	return true;
19240
19241	if (User->getOpcode() != Instruction::Store)
19242	return true;
19243
19244	if (I->getType()->getTypeID() != Type::FloatTyID)
19245	return true;
19246
19247	return false;
19248	}
19249	default:
19250	return true;
19251	}
19252	return true;
19253	}
19254
19255	const MCPhysReg *
19256	PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
19257	// LR is a callee-save register, but we must treat it as clobbered by any call
19258	// site. Hence we include LR in the scratch registers, which are in turn added
19259	// as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19260	// to CTR, which is used by any indirect call.
19261	static const MCPhysReg ScratchRegs[] = {
19262	PPC::X12, PPC::LR8, PPC::CTR8, `0`
19263	};
19264
19265	return ScratchRegs;
19266	}
19267
19268	Register PPCTargetLowering::getExceptionPointerRegister(
19269	const Constant PersonalityFn) const* {
19270	return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19271	}
19272
19273	Register PPCTargetLowering::getExceptionSelectorRegister(
19274	const Constant PersonalityFn) const* {
19275	return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19276	}
19277
19278	bool
19279	PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
19280	EVT VT , unsigned DefinedValues) const {
19281	if (VT == MVT::v2i64)
19282	return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19283
19284	if (Subtarget.hasVSX())
19285	return true;
19286
19287	return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
19288	}
19289
19290	Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode N) const* {
19291	if (DisableILPPref \|\| Subtarget.enableMachineScheduler())
19292	return TargetLowering::getSchedulingPreference(N);
19293
19294	return Sched::ILP;
19295	}
19296
19297	// Create a fast isel object.
19298	FastISel *PPCTargetLowering::createFastISel(
19299	FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19300	const LibcallLoweringInfo LibcallLowering) const* {
19301	return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19302	}
19303
19304	// 'Inverted' means the FMA opcode after negating one multiplicand.
19305	// For example, (fma -a b c) = (fnmsub a b c)
19306	static unsigned invertFMAOpcode(unsigned Opc) {
19307	switch (Opc) {
19308	default:
19309	llvm_unreachable("Invalid FMA opcode for PowerPC!");
19310	case ISD::FMA:
19311	return PPCISD::FNMSUB;
19312	case PPCISD::FNMSUB:
19313	return ISD::FMA;
19314	}
19315	}
19316
19317	SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
19318	bool LegalOps, bool OptForSize,
19319	NegatibleCost &Cost,
19320	unsigned Depth) const {
19321	if (Depth > SelectionDAG::MaxRecursionDepth)
19322	return SDValue ();
19323
19324	unsigned Opc = Op.getOpcode();
19325	EVT VT = Op.getValueType();
19326	SDNodeFlags Flags = Op.getNode()->getFlags();
19327
19328	switch (Opc) {
19329	case PPCISD::FNMSUB:
19330	if (!Op.hasOneUse() \|\| !isTypeLegal(VT))
19331	break;
19332
19333	SDValue N0 = Op.getOperand(i: `0`);
19334	SDValue N1 = Op.getOperand(i: `1`);
19335	SDValue N2 = Op.getOperand(i: `2`);
19336	SDLoc Loc(Op);
19337
19338	NegatibleCost N2Cost = NegatibleCost::Expensive;
19339	SDValue NegN2 =
19340	getNegatedExpression(Op: N2, DAG, LegalOps, OptForSize, Cost&: N2Cost, Depth: Depth + `1`);
19341
19342	if (!NegN2)
19343	return SDValue ();
19344
19345	// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19346	// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19347	// These transformations may change sign of zeroes. For example,
19348	// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19349	if (Flags.hasNoSignedZeros()) {
19350	// Try and choose the cheaper one to negate.
19351	NegatibleCost N0Cost = NegatibleCost::Expensive;
19352	SDValue NegN0 = getNegatedExpression(Op: N0, DAG, LegalOps, OptForSize,
19353	Cost&: N0Cost, Depth: Depth + `1`);
19354
19355	NegatibleCost N1Cost = NegatibleCost::Expensive;
19356	SDValue NegN1 = getNegatedExpression(Op: N1, DAG, LegalOps, OptForSize,
19357	Cost&: N1Cost, Depth: Depth + `1`);
19358
19359	if (NegN0 && N0Cost <= N1Cost) {
19360	Cost = std::min(a: N0Cost, b: N2Cost);
19361	return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: NegN0, N2: N1, N3: NegN2, Flags);
19362	} else if (NegN1) {
19363	Cost = std::min(a: N1Cost, b: N2Cost);
19364	return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: N0, N2: NegN1, N3: NegN2, Flags);
19365	}
19366	}
19367
19368	// (fneg (fnmsub a b c)) => (fma a b (fneg c))
19369	if (isOperationLegal(Op: ISD::FMA, VT)) {
19370	Cost = N2Cost;
19371	return DAG.getNode(Opcode: ISD::FMA, DL: Loc, VT, N1: N0, N2: N1, N3: NegN2, Flags);
19372	}
19373
19374	break;
19375	}
19376
19377	return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19378	Cost, Depth);
19379	}
19380
19381	// Override to enable LOAD_STACK_GUARD lowering on Linux.
19382	bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const {
19383	if (M.getStackProtectorGuard() == "tls" \|\| Subtarget.isTargetLinux())
19384	return true;
19385	return TargetLowering::useLoadStackGuardNode(M);
19386	}
19387
19388	bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
19389	bool ForCodeSize) const {
19390	if (!VT.isSimple() \|\| !Subtarget.hasVSX())
19391	return false;
19392
19393	switch(VT.getSimpleVT().SimpleTy) {
19394	default:
19395	// For FP types that are currently not supported by PPC backend, return
19396	// false. Examples: f16, f80.
19397	return false;
19398	case MVT::f32:
19399	case MVT::f64: {
19400	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19401	// we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19402	return true;
19403	}
19404	bool IsExact;
19405	APSInt IntResult(`16`, false);
19406	// The rounding mode doesn't really matter because we only care about floats
19407	// that can be converted to integers exactly.
19408	Imm.convertToInteger(Result&: IntResult, RM: APFloat::rmTowardZero, IsExact: &IsExact);
19409	// For exact values in the range [-16, 15] we can materialize the float.
19410	if (IsExact && IntResult <= `15` && IntResult >= -`16`)
19411	return true;
19412	return Imm.isZero();
19413	}
19414	case MVT::ppcf128:
19415	return Imm.isPosZero();
19416	}
19417	}
19418
19419	// For vector shift operation op, fold
19420	// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19421	static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
19422	SelectionDAG &DAG) {
19423	SDValue N0 = N->getOperand(Num: `0`);
19424	SDValue N1 = N->getOperand(Num: `1`);
19425	EVT VT = N0.getValueType();
19426	unsigned OpSizeInBits = VT.getScalarSizeInBits();
19427	unsigned Opcode = N->getOpcode();
19428	unsigned TargetOpcode;
19429
19430	switch (Opcode) {
19431	default:
19432	llvm_unreachable("Unexpected shift operation");
19433	case ISD::SHL:
19434	TargetOpcode = PPCISD::SHL;
19435	break;
19436	case ISD::SRL:
19437	TargetOpcode = PPCISD::SRL;
19438	break;
19439	case ISD::SRA:
19440	TargetOpcode = PPCISD::SRA;
19441	break;
19442	}
19443
19444	if (VT.isVector() && TLI.isOperationLegal(Op: Opcode, VT) &&
19445	N1 ->getOpcode() == ISD::AND)
19446	if (ConstantSDNode *Mask = isConstOrConstSplat(N: N1 ->getOperand(Num: `1`)))
19447	if (Mask->getZExtValue() == OpSizeInBits - `1`)
19448	return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc (N), VT, N1: N0, N2: N1 ->getOperand(Num: `0`));
19449
19450	return SDValue ();
19451	}
19452
19453	SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19454	DAGCombinerInfo &DCI) const {
19455	EVT VT = N->getValueType(ResNo: `0`);
19456	assert(VT.isVector() && "Vector type expected.");
19457
19458	unsigned Opc = N->getOpcode();
19459	assert((Opc == ISD::SHL \|\| Opc == ISD::SRL \|\| Opc == ISD::SRA) &&
19460	"Unexpected opcode.");
19461
19462	if (!isOperationLegal(Op: Opc, VT))
19463	return SDValue ();
19464
19465	EVT EltTy = VT.getScalarType();
19466	unsigned EltBits = EltTy.getSizeInBits();
19467	if (EltTy != MVT::i64 && EltTy != MVT::i32)
19468	return SDValue ();
19469
19470	SDValue N1 = N->getOperand(Num: `1`);
19471	uint64_t SplatBits = `0`;
19472	bool AddSplatCase = false;
19473	unsigned OpcN1 = N1.getOpcode();
19474	if (OpcN1 == PPCISD::VADD_SPLAT &&
19475	N1.getConstantOperandVal(i: `1`) == VT.getVectorNumElements()) {
19476	AddSplatCase = true;
19477	SplatBits = N1.getConstantOperandVal(i: `0`);
19478	}
19479
19480	if (!AddSplatCase) {
19481	if (OpcN1 != ISD::BUILD_VECTOR)
19482	return SDValue ();
19483
19484	unsigned SplatBitSize;
19485	bool HasAnyUndefs;
19486	APInt APSplatBits, APSplatUndef;
19487	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Val&: N1);
19488	bool BVNIsConstantSplat =
19489	BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
19490	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian());
19491	if (!BVNIsConstantSplat \|\| SplatBitSize != EltBits)
19492	return SDValue ();
19493	SplatBits = APSplatBits.getZExtValue();
19494	}
19495
19496	SDLoc DL(N);
19497	SDValue N0 = N->getOperand(Num: `0`);
19498	// PPC vector shifts by word/double look at only the low 5/6 bits of the
19499	// shift vector, which means the max value is 31/63. A shift vector of all
19500	// 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19501	// -16 to 15 range.
19502	if (SplatBits == (EltBits - `1`)) {
19503	unsigned NewOpc;
19504	switch (Opc) {
19505	case ISD::SHL:
19506	NewOpc = PPCISD::SHL;
19507	break;
19508	case ISD::SRL:
19509	NewOpc = PPCISD::SRL;
19510	break;
19511	case ISD::SRA:
19512	NewOpc = PPCISD::SRA;
19513	break;
19514	}
19515	SDValue SplatOnes = getCanonicalConstSplat(Val: `255`, SplatSize: `1`, VT, DAG&: DCI.DAG, dl: DL);
19516	return DCI.DAG.getNode(Opcode: NewOpc, DL, VT, N1: N0, N2: SplatOnes);
19517	}
19518
19519	if (Opc != ISD::SHL \|\| !isOperationLegal(Op: ISD::ADD, VT))
19520	return SDValue ();
19521
19522	// For 64-bit there is no splat immediate so we want to catch shift by 1 here
19523	// before the BUILD_VECTOR is replaced by a load.
19524	if (EltTy != MVT::i64 \|\| SplatBits != `1`)
19525	return SDValue ();
19526
19527	return DCI.DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N), VT, N1: N0, N2: N0);
19528	}
19529
19530	SDValue PPCTargetLowering::combineSHL(SDNode N, DAGCombinerInfo &DCI) const* {
19531	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19532	return Value;
19533
19534	if (N->getValueType(ResNo: `0`).isVector())
19535	return combineVectorShift(N, DCI);
19536
19537	SDValue N0 = N->getOperand(Num: `0`);
19538	ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
19539	if (!Subtarget.isISA3_0() \|\| !Subtarget.isPPC64() \|\|
19540	N0.getOpcode() != ISD::SIGN_EXTEND \|\|
19541	N0.getOperand(i: `0`).getValueType() != MVT::i32 \|\| CN1 == nullptr \|\|
19542	N->getValueType(ResNo: `0`) != MVT::i64)
19543	return SDValue ();
19544
19545	// We can't save an operation here if the value is already extended, and
19546	// the existing shift is easier to combine.
19547	SDValue ExtsSrc = N0.getOperand(i: `0`);
19548	if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19549	ExtsSrc.getOperand(i: `0`).getOpcode() == ISD::AssertSext)
19550	return SDValue ();
19551
19552	SDLoc DL(N0);
19553	SDValue ShiftBy = SDValue (CN1, `0`);
19554	// We want the shift amount to be i32 on the extswli, but the shift could
19555	// have an i64.
19556	if (ShiftBy.getValueType() == MVT::i64)
19557	ShiftBy = DCI.DAG.getConstant(Val: CN1->getZExtValue(), DL, VT: MVT::i32);
19558
19559	return DCI.DAG.getNode(Opcode: PPCISD::EXTSWSLI, DL, VT: MVT::i64, N1: N0 ->getOperand(Num: `0`),
19560	N2: ShiftBy);
19561	}
19562
19563	SDValue PPCTargetLowering::combineSRA(SDNode N, DAGCombinerInfo &DCI) const* {
19564	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19565	return Value;
19566
19567	if (N->getValueType(ResNo: `0`).isVector())
19568	return combineVectorShift(N, DCI);
19569
19570	return SDValue ();
19571	}
19572
19573	SDValue PPCTargetLowering::combineSRL(SDNode N, DAGCombinerInfo &DCI) const* {
19574	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19575	return Value;
19576
19577	if (N->getValueType(ResNo: `0`).isVector())
19578	return combineVectorShift(N, DCI);
19579
19580	return SDValue ();
19581	}
19582
19583	// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19584	// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19585	// When C is zero, the equation (addi Z, -C) can be simplified to Z
19586	// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19587	static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
19588	const PPCSubtarget &Subtarget) {
19589	if (!Subtarget.isPPC64())
19590	return SDValue ();
19591
19592	SDValue LHS = N->getOperand(Num: `0`);
19593	SDValue RHS = N->getOperand(Num: `1`);
19594
19595	auto isZextOfCompareWithConstant = [](SDValue Op) {
19596	if (Op.getOpcode() != ISD::ZERO_EXTEND \|\| !Op.hasOneUse() \|\|
19597	Op.getValueType() != MVT::i64)
19598	return false;
19599
19600	SDValue Cmp = Op.getOperand(i: `0`);
19601	if (Cmp.getOpcode() != ISD::SETCC \|\| !Cmp.hasOneUse() \|\|
19602	Cmp.getOperand(i: `0`).getValueType() != MVT::i64)
19603	return false;
19604
19605	if (auto *Constant = dyn_cast<ConstantSDNode>(Val: Cmp.getOperand(i: `1`))) {
19606	int64_t NegConstant = `0` - Constant->getSExtValue();
19607	// Due to the limitations of the addi instruction,
19608	// -C is required to be [-32768, 32767].
19609	return isInt<`16`>(x: NegConstant);
19610	}
19611
19612	return false;
19613	};
19614
19615	bool LHSHasPattern = isZextOfCompareWithConstant (LHS);
19616	bool RHSHasPattern = isZextOfCompareWithConstant (RHS);
19617
19618	// If there is a pattern, canonicalize a zext operand to the RHS.
19619	if (LHSHasPattern && !RHSHasPattern)
19620	std::swap(a&: LHS, b&: RHS);
19621	else if (!LHSHasPattern && !RHSHasPattern)
19622	return SDValue ();
19623
19624	SDLoc DL(N);
19625	EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19626	SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: CarryType);
19627	SDValue Cmp = RHS.getOperand(i: `0`);
19628	SDValue Z = Cmp.getOperand(i: `0`);
19629	auto *Constant = cast<ConstantSDNode>(Val: Cmp.getOperand(i: `1`));
19630	int64_t NegConstant = `0` - Constant->getSExtValue();
19631
19632	switch(cast<CondCodeSDNode>(Val: Cmp.getOperand(i: `2`))->get()) {
19633	default: break;
19634	case ISD::SETNE: {
19635	// when C == 0
19636	// --> addze X, (addic Z, -1).carry
19637	// /
19638	// add X, (zext(setne Z, C))--
19639	// \ when -32768 <= -C <= 32767 && C != 0
19640	// --> addze X, (addic (addi Z, -C), -1).carry
19641	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
19642	N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
19643	SDValue AddOrZ = NegConstant != `0` ? Add : Z;
19644	SDValue Addc =
19645	DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
19646	N1: AddOrZ, N2: DAG.getAllOnesConstant(DL, VT: MVT::i64),
19647	N3: DAG.getConstant(Val: `0`, DL, VT: CarryType));
19648	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
19649	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i64),
19650	N3: SDValue (Addc.getNode(), `1`));
19651	}
19652	case ISD::SETEQ: {
19653	// when C == 0
19654	// --> addze X, (subfic Z, 0).carry
19655	// /
19656	// add X, (zext(sete Z, C))--
19657	// \ when -32768 <= -C <= 32767 && C != 0
19658	// --> addze X, (subfic (addi Z, -C), 0).carry
19659	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
19660	N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
19661	SDValue AddOrZ = NegConstant != `0` ? Add : Z;
19662	SDValue Subc =
19663	DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
19664	N1: DAG.getConstant(Val: `0`, DL, VT: MVT::i64), N2: AddOrZ,
19665	N3: DAG.getConstant(Val: `0`, DL, VT: CarryType));
19666	SDValue Invert = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Subc.getValue(R: `1`),
19667	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryType));
19668	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
19669	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i64), N3: Invert);
19670	}
19671	}
19672
19673	return SDValue ();
19674	}
19675
19676	// Transform
19677	// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19678	// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19679	// In this case both C1 and C2 must be known constants.
19680	// C1+C2 must fit into a 34 bit signed integer.
19681	static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
19682	const PPCSubtarget &Subtarget) {
19683	if (!Subtarget.isUsingPCRelativeCalls())
19684	return SDValue ();
19685
19686	// Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19687	// If we find that node try to cast the Global Address and the Constant.
19688	SDValue LHS = N->getOperand(Num: `0`);
19689	SDValue RHS = N->getOperand(Num: `1`);
19690
19691	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19692	std::swap(a&: LHS, b&: RHS);
19693
19694	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19695	return SDValue ();
19696
19697	// Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19698	GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(Val: LHS.getOperand(i: `0`));
19699	ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(Val&: RHS);
19700
19701	// Check that both casts succeeded.
19702	if (!GSDN \|\| !ConstNode)
19703	return SDValue ();
19704
19705	int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19706	SDLoc DL(GSDN);
19707
19708	// The signed int offset needs to fit in 34 bits.
19709	if (!isInt<`34`>(x: NewOffset))
19710	return SDValue ();
19711
19712	// The new global address is a copy of the old global address except
19713	// that it has the updated Offset.
19714	SDValue GA =
19715	DAG.getTargetGlobalAddress(GV: GSDN->getGlobal(), DL, VT: GSDN->getValueType(ResNo: `0`),
19716	offset: NewOffset, TargetFlags: GSDN->getTargetFlags());
19717	SDValue MatPCRel =
19718	DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: GSDN->getValueType(ResNo: `0`), Operand: GA);
19719	return MatPCRel;
19720	}
19721
19722	// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19723	// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19724	// Mathematical identity: X + 1 = X - (-1)
19725	// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19726	// Requirement: VSX feature for efficient xxleqv generation
19727	static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG,
19728	const PPCSubtarget &Subtarget) {
19729
19730	EVT VT = N->getValueType(ResNo: `0`);
19731	if (!Subtarget.hasVSX())
19732	return SDValue ();
19733
19734	// Handle v2i64, v4i32, v8i16 and v16i8 types
19735	if (!(VT == MVT::v8i16 \|\| VT == MVT::v16i8 \|\| VT == MVT::v4i32 \|\|
19736	VT == MVT::v2i64))
19737	return SDValue ();
19738
19739	SDValue LHS = N->getOperand(Num: `0`);
19740	SDValue RHS = N->getOperand(Num: `1`);
19741
19742	// Check if RHS is BUILD_VECTOR
19743	if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19744	return SDValue ();
19745
19746	// Check if all the elements are 1
19747	unsigned NumOfEles = RHS.getNumOperands();
19748	for (unsigned i = `0`; i < NumOfEles; ++i) {
19749	auto *CN = dyn_cast<ConstantSDNode>(Val: RHS.getOperand(i));
19750	if (!CN \|\| CN->getSExtValue() != `1`)
19751	return SDValue ();
19752	}
19753	SDLoc DL(N);
19754
19755	SDValue MinusOne = DAG.getConstant(Val: APInt::getAllOnes(numBits: `32`), DL, VT: MVT::i32);
19756	SmallVector<SDValue, `4`> Ops(`4`, MinusOne);
19757	SDValue AllOnesVec = DAG.getBuildVector(VT: MVT::v4i32, DL, Ops);
19758
19759	// Bitcast to the target vector type
19760	SDValue Bitcast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: AllOnesVec);
19761
19762	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: LHS, N2: Bitcast);
19763	}
19764
19765	SDValue PPCTargetLowering::combineADD(SDNode N, DAGCombinerInfo &DCI) const* {
19766	if (auto Value = combineADDToADDZE(N, DAG&: DCI.DAG, Subtarget))
19767	return Value;
19768
19769	if (auto Value = combineADDToMAT_PCREL_ADDR(N, DAG&: DCI.DAG, Subtarget))
19770	return Value;
19771
19772	if (auto Value = combineADDToSUB(N, DAG&: DCI.DAG, Subtarget))
19773	return Value;
19774	return SDValue ();
19775	}
19776
19777	// Detect TRUNCATE operations on bitcasts of float128 values.
19778	// What we are looking for here is the situtation where we extract a subset
19779	// of bits from a 128 bit float.
19780	// This can be of two forms:
19781	// 1) BITCAST of f128 feeding TRUNCATE
19782	// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19783	// The reason this is required is because we do not have a legal i128 type
19784	// and so we want to prevent having to store the f128 and then reload part
19785	// of it.
19786	SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19787	DAGCombinerInfo &DCI) const {
19788	// If we are using CRBits then try that first.
19789	if (Subtarget.useCRBits()) {
19790	// Check if CRBits did anything and return that if it did.
19791	if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19792	return CRTruncValue;
19793	}
19794
19795	SDLoc dl(N);
19796	SDValue Op0 = N->getOperand(Num: `0`);
19797
19798	// Looking for a truncate of i128 to i64.
19799	if (Op0.getValueType() != MVT::i128 \|\| N->getValueType(ResNo: `0`) != MVT::i64)
19800	return SDValue ();
19801
19802	int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? `1` : `0`;
19803
19804	// SRL feeding TRUNCATE.
19805	if (Op0.getOpcode() == ISD::SRL) {
19806	ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: `1`));
19807	// The right shift has to be by 64 bits.
19808	if (!ConstNode \|\| ConstNode->getZExtValue() != `64`)
19809	return SDValue ();
19810
19811	// Switch the element number to extract.
19812	EltToExtract = EltToExtract ? `0` : `1`;
19813	// Update Op0 past the SRL.
19814	Op0 = Op0.getOperand(i: `0`);
19815	}
19816
19817	// BITCAST feeding a TRUNCATE possibly via SRL.
19818	if (Op0.getOpcode() == ISD::BITCAST &&
19819	Op0.getValueType() == MVT::i128 &&
19820	Op0.getOperand(i: `0`).getValueType() == MVT::f128) {
19821	SDValue Bitcast = DCI.DAG.getBitcast(VT: MVT::v2i64, V: Op0.getOperand(i: `0`));
19822	return DCI.DAG.getNode(
19823	Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Bitcast,
19824	N2: DCI.DAG.getTargetConstant(Val: EltToExtract, DL: dl, VT: MVT::i32));
19825	}
19826	return SDValue ();
19827	}
19828
19829	SDValue PPCTargetLowering::combineMUL(SDNode N, DAGCombinerInfo &DCI) const* {
19830	SelectionDAG &DAG = DCI.DAG;
19831
19832	ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N: N->getOperand(Num: `1`));
19833	if (!ConstOpOrElement)
19834	return SDValue ();
19835
19836	// An imul is usually smaller than the alternative sequence for legal type.
19837	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
19838	isOperationLegal(Op: ISD::MUL, VT: N->getValueType(ResNo: `0`)))
19839	return SDValue ();
19840
19841	auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19842	switch (this->Subtarget.getCPUDirective()) {
19843	default:
19844	// TODO: enhance the condition for subtarget before pwr8
19845	return false;
19846	case PPC::DIR_PWR8:
19847	// type mul add shl
19848	// scalar 4 1 1
19849	// vector 7 2 2
19850	return true;
19851	case PPC::DIR_PWR9:
19852	case PPC::DIR_PWR10:
19853	case PPC::DIR_PWR11:
19854	case PPC::DIR_PWR_FUTURE:
19855	// type mul add shl
19856	// scalar 5 2 2
19857	// vector 7 2 2
19858
19859	// The cycle RATIO of related operations are showed as a table above.
19860	// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19861	// scalar and vector type. For 2 instrs patterns, add/sub + shl
19862	// are 4, it is always profitable; but for 3 instrs patterns
19863	// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19864	// So we should only do it for vector type.
19865	return IsAddOne && IsNeg ? VT.isVector() : true;
19866	}
19867	};
19868
19869	EVT VT = N->getValueType(ResNo: `0`);
19870	SDLoc DL(N);
19871
19872	const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19873	bool IsNeg = MulAmt.isNegative();
19874	APInt MulAmtAbs = MulAmt.abs();
19875
19876	if ((MulAmtAbs - `1`).isPowerOf2()) {
19877	// (mul x, 2^N + 1) => (add (shl x, N), x)
19878	// (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19879
19880	if (!IsProfitable (IsNeg, true, VT))
19881	return SDValue ();
19882
19883	SDValue Op0 = N->getOperand(Num: `0`);
19884	SDValue Op1 =
19885	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
19886	N2: DAG.getConstant(Val: (MulAmtAbs - `1`).logBase2(), DL, VT));
19887	SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
19888
19889	if (!IsNeg)
19890	return Res;
19891
19892	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Res);
19893	} else if ((MulAmtAbs + `1`).isPowerOf2()) {
19894	// (mul x, 2^N - 1) => (sub (shl x, N), x)
19895	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19896
19897	if (!IsProfitable (IsNeg, false, VT))
19898	return SDValue ();
19899
19900	SDValue Op0 = N->getOperand(Num: `0`);
19901	SDValue Op1 =
19902	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
19903	N2: DAG.getConstant(Val: (MulAmtAbs + `1`).logBase2(), DL, VT));
19904
19905	if (!IsNeg)
19906	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op1, N2: Op0);
19907	else
19908	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
19909
19910	} else {
19911	return SDValue ();
19912	}
19913	}
19914
19915	// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19916	// in combiner since we need to check SD flags and other subtarget features.
19917	SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19918	DAGCombinerInfo &DCI) const {
19919	SDValue N0 = N->getOperand(Num: `0`);
19920	SDValue N1 = N->getOperand(Num: `1`);
19921	SDValue N2 = N->getOperand(Num: `2`);
19922	SDNodeFlags Flags = N->getFlags();
19923	EVT VT = N->getValueType(ResNo: `0`);
19924	SelectionDAG &DAG = DCI.DAG;
19925	unsigned Opc = N->getOpcode();
19926	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
19927	bool LegalOps = !DCI.isBeforeLegalizeOps();
19928	SDLoc Loc(N);
19929
19930	if (!isOperationLegal(Op: ISD::FMA, VT))
19931	return SDValue ();
19932
19933	// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19934	// since (fnmsub a b c)=-0 while c-ab=+0.
19935	if (!Flags.hasNoSignedZeros())
19936	return SDValue ();
19937
19938	// (fma (fneg a) b c) => (fnmsub a b c)
19939	// (fnmsub (fneg a) b c) => (fma a b c)
19940	if (SDValue NegN0 = getCheaperNegatedExpression(Op: N0, DAG, LegalOps, OptForSize: CodeSize))
19941	return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: NegN0, N2: N1, N3: N2, Flags);
19942
19943	// (fma a (fneg b) c) => (fnmsub a b c)
19944	// (fnmsub a (fneg b) c) => (fma a b c)
19945	if (SDValue NegN1 = getCheaperNegatedExpression(Op: N1, DAG, LegalOps, OptForSize: CodeSize))
19946	return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: N0, N2: NegN1, N3: N2, Flags);
19947
19948	return SDValue ();
19949	}
19950
19951	bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
19952	// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19953	if (!Subtarget.is64BitELFABI())
19954	return false;
19955
19956	// If not a tail call then no need to proceed.
19957	if (!CI->isTailCall())
19958	return false;
19959
19960	// If sibling calls have been disabled and tail-calls aren't guaranteed
19961	// there is no reason to duplicate.
19962	auto &TM = getTargetMachine();
19963	if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19964	return false;
19965
19966	// Can't tail call a function called indirectly, or if it has variadic args.
19967	const Function *Callee = CI->getCalledFunction();
19968	if (!Callee \|\| Callee->isVarArg())
19969	return false;
19970
19971	// Make sure the callee and caller calling conventions are eligible for tco.
19972	const Function *Caller = CI->getParent()->getParent();
19973	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC: Caller->getCallingConv(),
19974	CalleeCC: CI->getCallingConv()))
19975	return false;
19976
19977	// If the function is local then we have a good chance at tail-calling it
19978	return getTargetMachine().shouldAssumeDSOLocal(GV: Callee);
19979	}
19980
19981	bool PPCTargetLowering::
19982	isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19983	const Value *Mask = AndI.getOperand(i: `1`);
19984	// If the mask is suitable for andi. or andis. we should sink the and.
19985	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Mask)) {
19986	// Can't handle constants wider than 64-bits.
19987	if (CI->getBitWidth() > `64`)
19988	return false;
19989	int64_t ConstVal = CI->getZExtValue();
19990	return isUInt<`16`>(x: ConstVal) \|\|
19991	(isUInt<`16`>(x: ConstVal >> `16`) && !(ConstVal & `0xFFFF`));
19992	}
19993
19994	// For non-constant masks, we can always use the record-form and.
19995	return true;
19996	}
19997
19998	/// getAddrModeForFlags - Based on the set of address flags, select the most
19999	/// optimal instruction format to match by.
20000	PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20001	// This is not a node we should be handling here.
20002	if (Flags == PPC::MOF_None)
20003	return PPC::AM_None;
20004	// Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20005	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DForm))
20006	if ((Flags & FlagSet) == FlagSet)
20007	return PPC::AM_DForm;
20008	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DSForm))
20009	if ((Flags & FlagSet) == FlagSet)
20010	return PPC::AM_DSForm;
20011	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DQForm))
20012	if ((Flags & FlagSet) == FlagSet)
20013	return PPC::AM_DQForm;
20014	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_PrefixDForm))
20015	if ((Flags & FlagSet) == FlagSet)
20016	return PPC::AM_PrefixDForm;
20017	// If no other forms are selected, return an X-Form as it is the most
20018	// general addressing mode.
20019	return PPC::AM_XForm;
20020	}
20021
20022	/// Set alignment flags based on whether or not the Frame Index is aligned.
20023	/// Utilized when computing flags for address computation when selecting
20024	/// load and store instructions.
20025	static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20026	SelectionDAG &DAG) {
20027	bool IsAdd = ((N.getOpcode() == ISD::ADD) \|\| (N.getOpcode() == ISD::OR));
20028	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: IsAdd ? N.getOperand(i: `0`) : N);
20029	if (!FI)
20030	return;
20031	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
20032	unsigned FrameIndexAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
20033	// If this is (add $FI, $S16Imm), the alignment flags are already set
20034	// based on the immediate. We just need to clear the alignment flags
20035	// if the FI alignment is weaker.
20036	if ((FrameIndexAlign % `4`) != `0`)
20037	FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20038	if ((FrameIndexAlign % `16`) != `0`)
20039	FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20040	// If the address is a plain FrameIndex, set alignment flags based on
20041	// FI alignment.
20042	if (!IsAdd) {
20043	if ((FrameIndexAlign % `4`) == `0`)
20044	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
20045	if ((FrameIndexAlign % `16`) == `0`)
20046	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
20047	}
20048	}
20049
20050	/// Given a node, compute flags that are used for address computation when
20051	/// selecting load and store instructions. The flags computed are stored in
20052	/// FlagSet. This function takes into account whether the node is a constant,
20053	/// an ADD, OR, or a constant, and computes the address flags accordingly.
20054	static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20055	SelectionDAG &DAG) {
20056	// Set the alignment flags for the node depending on if the node is
20057	// 4-byte or 16-byte aligned.
20058	auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20059	if ((Imm & `0x3`) == `0`)
20060	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
20061	if ((Imm & `0xf`) == `0`)
20062	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
20063	};
20064
20065	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
20066	// All 32-bit constants can be computed as LIS + Disp.
20067	const APInt &ConstImm = CN->getAPIntValue();
20068	if (ConstImm.isSignedIntN(N: `32`)) { // Flag to handle 32-bit constants.
20069	FlagSet \|= PPC::MOF_AddrIsSImm32;
20070	SetAlignFlagsForImm (ConstImm.getZExtValue());
20071	setAlignFlagsForFI(N, FlagSet, DAG);
20072	}
20073	if (ConstImm.isSignedIntN(N: `34`)) // Flag to handle 34-bit constants.
20074	FlagSet \|= PPC::MOF_RPlusSImm34;
20075	else // Let constant materialization handle large constants.
20076	FlagSet \|= PPC::MOF_NotAddNorCst;
20077	} else if (N.getOpcode() == ISD::ADD \|\| provablyDisjointOr(DAG, N)) {
20078	// This address can be represented as an addition of:
20079	// - Register + Imm16 (possibly a multiple of 4/16)
20080	// - Register + Imm34
20081	// - Register + PPCISD::Lo
20082	// - Register + Register
20083	// In any case, we won't have to match this as Base + Zero.
20084	SDValue RHS = N.getOperand(i: `1`);
20085	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: RHS)) {
20086	const APInt &ConstImm = CN->getAPIntValue();
20087	if (ConstImm.isSignedIntN(N: `16`)) {
20088	FlagSet \|= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20089	SetAlignFlagsForImm (ConstImm.getZExtValue());
20090	setAlignFlagsForFI(N, FlagSet, DAG);
20091	}
20092	if (ConstImm.isSignedIntN(N: `34`))
20093	FlagSet \|= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20094	else
20095	FlagSet \|= PPC::MOF_RPlusR; // Register.
20096	} else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(i: `1`))
20097	FlagSet \|= PPC::MOF_RPlusLo; // PPCISD::Lo.
20098	else
20099	FlagSet \|= PPC::MOF_RPlusR;
20100	} else { // The address computation is not a constant or an addition.
20101	setAlignFlagsForFI(N, FlagSet, DAG);
20102	FlagSet \|= PPC::MOF_NotAddNorCst;
20103	}
20104	}
20105
20106	static bool isPCRelNode(SDValue N) {
20107	return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR \|\|
20108	isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
20109	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
20110	isValidPCRelNode<JumpTableSDNode>(N) \|\|
20111	isValidPCRelNode<BlockAddressSDNode>(N));
20112	}
20113
20114	/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20115	/// the address flags of the load/store instruction that is to be matched.
20116	unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20117	SelectionDAG &DAG) const {
20118	unsigned FlagSet = PPC::MOF_None;
20119
20120	// Compute subtarget flags.
20121	if (!Subtarget.hasP9Vector())
20122	FlagSet \|= PPC::MOF_SubtargetBeforeP9;
20123	else
20124	FlagSet \|= PPC::MOF_SubtargetP9;
20125
20126	if (Subtarget.hasPrefixInstrs())
20127	FlagSet \|= PPC::MOF_SubtargetP10;
20128
20129	if (Subtarget.hasSPE())
20130	FlagSet \|= PPC::MOF_SubtargetSPE;
20131
20132	// Check if we have a PCRel node and return early.
20133	if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20134	return FlagSet;
20135
20136	// If the node is the paired load/store intrinsics, compute flags for
20137	// address computation and return early.
20138	unsigned ParentOp = Parent->getOpcode();
20139	if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) \|\|
20140	(ParentOp == ISD::INTRINSIC_VOID))) {
20141	unsigned ID = Parent->getConstantOperandVal(Num: `1`);
20142	if ((ID == Intrinsic::ppc_vsx_lxvp) \|\| (ID == Intrinsic::ppc_vsx_stxvp)) {
20143	SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20144	? Parent->getOperand(Num: `2`)
20145	: Parent->getOperand(Num: `3`);
20146	computeFlagsForAddressComputation(N: IntrinOp, FlagSet, DAG);
20147	FlagSet \|= PPC::MOF_Vector;
20148	return FlagSet;
20149	}
20150	}
20151
20152	// Mark this as something we don't want to handle here if it is atomic
20153	// or pre-increment instruction.
20154	if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Val: Parent))
20155	if (LSB->isIndexed())
20156	return PPC::MOF_None;
20157
20158	// Compute in-memory type flags. This is based on if there are scalars,
20159	// floats or vectors.
20160	const MemSDNode *MN = dyn_cast<MemSDNode>(Val: Parent);
20161	assert(MN && "Parent should be a MemSDNode!");
20162	EVT MemVT = MN->getMemoryVT();
20163	unsigned Size = MemVT.getSizeInBits();
20164	if (MemVT.isScalarInteger()) {
20165	assert(Size <= `128` &&
20166	"Not expecting scalar integers larger than 16 bytes!");
20167	if (Size < `32`)
20168	FlagSet \|= PPC::MOF_SubWordInt;
20169	else if (Size == `32`)
20170	FlagSet \|= PPC::MOF_WordInt;
20171	else
20172	FlagSet \|= PPC::MOF_DoubleWordInt;
20173	} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20174	if (Size == `128`)
20175	FlagSet \|= PPC::MOF_Vector;
20176	else if (Size == `256`) {
20177	assert(Subtarget.pairedVectorMemops() &&
20178	"256-bit vectors are only available when paired vector memops is "
20179	"enabled!");
20180	FlagSet \|= PPC::MOF_Vector;
20181	} else
20182	llvm_unreachable("Not expecting illegal vectors!");
20183	} else { // Floating point type: can be scalar, f128 or vector types.
20184	if (Size == `32` \|\| Size == `64`)
20185	FlagSet \|= PPC::MOF_ScalarFloat;
20186	else if (MemVT == MVT::f128 \|\| MemVT.isVector())
20187	FlagSet \|= PPC::MOF_Vector;
20188	else
20189	llvm_unreachable("Not expecting illegal scalar floats!");
20190	}
20191
20192	// Compute flags for address computation.
20193	computeFlagsForAddressComputation(N, FlagSet, DAG);
20194
20195	// Compute type extension flags.
20196	if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Val: Parent)) {
20197	switch (LN->getExtensionType()) {
20198	case ISD::SEXTLOAD:
20199	FlagSet \|= PPC::MOF_SExt;
20200	break;
20201	case ISD::EXTLOAD:
20202	case ISD::ZEXTLOAD:
20203	FlagSet \|= PPC::MOF_ZExt;
20204	break;
20205	case ISD::NON_EXTLOAD:
20206	FlagSet \|= PPC::MOF_NoExt;
20207	break;
20208	}
20209	} else
20210	FlagSet \|= PPC::MOF_NoExt;
20211
20212	// For integers, no extension is the same as zero extension.
20213	// We set the extension mode to zero extension so we don't have
20214	// to add separate entries in AddrModesMap for loads and stores.
20215	if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20216	FlagSet \|= PPC::MOF_ZExt;
20217	FlagSet &= ~PPC::MOF_NoExt;
20218	}
20219
20220	// If we don't have prefixed instructions, 34-bit constants should be
20221	// treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20222	bool IsNonP1034BitConst =
20223	((PPC::MOF_RPlusSImm34 \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubtargetP10) &
20224	FlagSet) == PPC::MOF_RPlusSImm34;
20225	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20226	IsNonP1034BitConst)
20227	FlagSet \|= PPC::MOF_NotAddNorCst;
20228
20229	return FlagSet;
20230	}
20231
20232	/// SelectForceXFormMode - Given the specified address, force it to be
20233	/// represented as an indexed [r+r] operation (an XForm instruction).
20234	PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
20235	SDValue &Base,
20236	SelectionDAG &DAG) const {
20237
20238	PPC::AddrMode Mode = PPC::AM_XForm;
20239	int16_t ForceXFormImm = `0`;
20240	if (provablyDisjointOr(DAG, N) &&
20241	!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: ForceXFormImm)) {
20242	Disp = N.getOperand(i: `0`);
20243	Base = N.getOperand(i: `1`);
20244	return Mode;
20245	}
20246
20247	// If the address is the result of an add, we will utilize the fact that the
20248	// address calculation includes an implicit add. However, we can reduce
20249	// register pressure if we do not materialize a constant just for use as the
20250	// index register. We only get rid of the add if it is not an add of a
20251	// value and a 16-bit signed constant and both have a single use.
20252	if (N.getOpcode() == ISD::ADD &&
20253	(!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: ForceXFormImm) \|\|
20254	!N.getOperand(i: `1`).hasOneUse() \|\| !N.getOperand(i: `0`).hasOneUse())) {
20255	Disp = N.getOperand(i: `0`);
20256	Base = N.getOperand(i: `1`);
20257	return Mode;
20258	}
20259
20260	// Otherwise, use R0 as the base register.
20261	Disp = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20262	VT: N.getValueType());
20263	Base = N;
20264
20265	return Mode;
20266	}
20267
20268	bool PPCTargetLowering::splitValueIntoRegisterParts(
20269	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20270	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20271	EVT ValVT = Val.getValueType();
20272	// If we are splitting a scalar integer into f64 parts (i.e. so they
20273	// can be placed into VFRC registers), we need to zero extend and
20274	// bitcast the values. This will ensure the value is placed into a
20275	// VSR using direct moves or stack operations as needed.
20276	if (PartVT == MVT::f64 &&
20277	(ValVT == MVT::i32 \|\| ValVT == MVT::i16 \|\| ValVT == MVT::i8)) {
20278	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
20279	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Val);
20280	Parts[`0`] = Val;
20281	return true;
20282	}
20283	return false;
20284	}
20285
20286	SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20287	SelectionDAG &DAG) const {
20288	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20289	TargetLowering::CallLoweringInfo CLI(DAG);
20290	EVT RetVT = Op.getValueType();
20291	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
20292	SDValue Callee =
20293	DAG.getExternalSymbol(Sym: LibCallName, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
20294	bool SignExtend = TLI.shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: false);
20295	TargetLowering::ArgListTy Args;
20296	for (const SDValue &N : Op ->op_values()) {
20297	EVT ArgVT = N.getValueType();
20298	Type ArgTy = ArgVT.getTypeForEVT(Context&: DAG.getContext());
20299	TargetLowering::ArgListEntry Entry(N, ArgTy);
20300	Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(Ty: ArgTy, IsSigned: SignExtend);
20301	Entry.IsZExt = !Entry.IsSExt;
20302	Args.push_back(x: Entry);
20303	}
20304
20305	SDValue InChain = DAG.getEntryNode();
20306	SDValue TCChain = InChain;
20307	const Function &F = DAG.getMachineFunction().getFunction();
20308	bool isTailCall =
20309	TLI.isInTailCallPosition(DAG, Node: Op.getNode(), Chain&: TCChain) &&
20310	(RetTy == F.getReturnType() \|\| F.getReturnType()->isVoidTy());
20311	if (isTailCall)
20312	InChain = TCChain;
20313	CLI.setDebugLoc(SDLoc (Op))
20314	.setChain(InChain)
20315	.setLibCallee(CC: CallingConv::C, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
20316	.setTailCall(isTailCall)
20317	.setSExtResult(SignExtend)
20318	.setZExtResult(!SignExtend)
20319	.setIsPostTypeLegalization(true);
20320	return TLI.LowerCallTo(CLI).first;
20321	}
20322
20323	SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20324	const char LibCallFloatName, const* char *LibCallDoubleName, SDValue Op,
20325	SelectionDAG &DAG) const {
20326	if (Op.getValueType() == MVT::f32)
20327	return lowerToLibCall(LibCallName: LibCallFloatName, Op, DAG);
20328
20329	if (Op.getValueType() == MVT::f64)
20330	return lowerToLibCall(LibCallName: LibCallDoubleName, Op, DAG);
20331
20332	return SDValue ();
20333	}
20334
20335	bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20336	SDNodeFlags Flags = Op.getNode()->getFlags();
20337	return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20338	Flags.hasNoNaNs() && Flags.hasNoInfs();
20339	}
20340
20341	bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20342	return Op.getNode()->getFlags().hasApproximateFuncs();
20343	}
20344
20345	bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20346	return getTargetMachine().Options.PPCGenScalarMASSEntries;
20347	}
20348
20349	SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20350	const char *LibCallFloatName,
20351	const char *LibCallDoubleNameFinite,
20352	const char *LibCallFloatNameFinite,
20353	SDValue Op,
20354	SelectionDAG &DAG) const {
20355	if (!isScalarMASSConversionEnabled() \|\| !isLowringToMASSSafe(Op))
20356	return SDValue ();
20357
20358	if (!isLowringToMASSFiniteSafe(Op))
20359	return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20360	DAG);
20361
20362	return lowerLibCallBasedOnType(LibCallFloatName: LibCallFloatNameFinite,
20363	LibCallDoubleName: LibCallDoubleNameFinite, Op, DAG);
20364	}
20365
20366	SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20367	return lowerLibCallBase(LibCallDoubleName: "__xl_pow", LibCallFloatName: "__xl_powf", LibCallDoubleNameFinite: "__xl_pow_finite",
20368	LibCallFloatNameFinite: "__xl_powf_finite", Op, DAG);
20369	}
20370
20371	SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20372	return lowerLibCallBase(LibCallDoubleName: "__xl_sin", LibCallFloatName: "__xl_sinf", LibCallDoubleNameFinite: "__xl_sin_finite",
20373	LibCallFloatNameFinite: "__xl_sinf_finite", Op, DAG);
20374	}
20375
20376	SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20377	return lowerLibCallBase(LibCallDoubleName: "__xl_cos", LibCallFloatName: "__xl_cosf", LibCallDoubleNameFinite: "__xl_cos_finite",
20378	LibCallFloatNameFinite: "__xl_cosf_finite", Op, DAG);
20379	}
20380
20381	SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20382	return lowerLibCallBase(LibCallDoubleName: "__xl_log", LibCallFloatName: "__xl_logf", LibCallDoubleNameFinite: "__xl_log_finite",
20383	LibCallFloatNameFinite: "__xl_logf_finite", Op, DAG);
20384	}
20385
20386	SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20387	return lowerLibCallBase(LibCallDoubleName: "__xl_log10", LibCallFloatName: "__xl_log10f", LibCallDoubleNameFinite: "__xl_log10_finite",
20388	LibCallFloatNameFinite: "__xl_log10f_finite", Op, DAG);
20389	}
20390
20391	SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20392	return lowerLibCallBase(LibCallDoubleName: "__xl_exp", LibCallFloatName: "__xl_expf", LibCallDoubleNameFinite: "__xl_exp_finite",
20393	LibCallFloatNameFinite: "__xl_expf_finite", Op, DAG);
20394	}
20395
20396	// If we happen to match to an aligned D-Form, check if the Frame Index is
20397	// adequately aligned. If it is not, reset the mode to match to X-Form.
20398	static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20399	PPC::AddrMode &Mode) {
20400	if (!isa<FrameIndexSDNode>(Val: N))
20401	return;
20402	if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) \|\|
20403	(Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
20404	Mode = PPC::AM_XForm;
20405	}
20406
20407	/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20408	/// compute the address flags of the node, get the optimal address mode based
20409	/// on the flags, and set the Base and Disp based on the address mode.
20410	PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
20411	SDValue N, SDValue &Disp,
20412	SDValue &Base,
20413	SelectionDAG &DAG,
20414	MaybeAlign Align) const {
20415	SDLoc DL(Parent);
20416
20417	// Compute the address flags.
20418	unsigned Flags = computeMOFlags(Parent, N, DAG);
20419
20420	// Get the optimal address mode based on the Flags.
20421	PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20422
20423	// If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20424	// Select an X-Form load if it is not.
20425	setXFormForUnalignedFI(N, Flags, Mode);
20426
20427	// Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20428	if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20429	assert(Subtarget.isUsingPCRelativeCalls() &&
20430	"Must be using PC-Relative calls when a valid PC-Relative node is "
20431	"present!");
20432	Mode = PPC::AM_PCRel;
20433	}
20434
20435	// Set Base and Disp accordingly depending on the address mode.
20436	switch (Mode) {
20437	case PPC::AM_DForm:
20438	case PPC::AM_DSForm:
20439	case PPC::AM_DQForm: {
20440	// This is a register plus a 16-bit immediate. The base will be the
20441	// register and the displacement will be the immediate unless it
20442	// isn't sufficiently aligned.
20443	if (Flags & PPC::MOF_RPlusSImm16) {
20444	SDValue Op0 = N.getOperand(i: `0`);
20445	SDValue Op1 = N.getOperand(i: `1`);
20446	int16_t Imm = Op1 ->getAsZExtVal();
20447	if (!Align \|\| isAligned(Lhs: *Align, SizeInBytes: Imm)) {
20448	Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: N.getValueType());
20449	Base = Op0;
20450	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op0)) {
20451	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20452	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
20453	}
20454	break;
20455	}
20456	}
20457	// This is a register plus the @lo relocation. The base is the register
20458	// and the displacement is the global address.
20459	else if (Flags & PPC::MOF_RPlusLo) {
20460	Disp = N.getOperand(i: `1`).getOperand(i: `0`); // The global address.
20461	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
20462	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
20463	Disp.getOpcode() == ISD::TargetConstantPool \|\|
20464	Disp.getOpcode() == ISD::TargetJumpTable);
20465	Base = N.getOperand(i: `0`);
20466	break;
20467	}
20468	// This is a constant address at most 32 bits. The base will be
20469	// zero or load-immediate-shifted and the displacement will be
20470	// the low 16 bits of the address.
20471	else if (Flags & PPC::MOF_AddrIsSImm32) {
20472	auto *CN = cast<ConstantSDNode>(Val&: N);
20473	EVT CNType = CN->getValueType(ResNo: `0`);
20474	uint64_t CNImm = CN->getZExtValue();
20475	// If this address fits entirely in a 16-bit sext immediate field, codegen
20476	// this as "d, 0".
20477	int16_t Imm;
20478	if (isIntS16Immediate(N: CN, Imm) && (!Align \|\| isAligned(Lhs: *Align, SizeInBytes: Imm))) {
20479	Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: CNType);
20480	Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20481	VT: CNType);
20482	break;
20483	}
20484	// Handle 32-bit sext immediate with LIS + Addr mode.
20485	if ((CNType == MVT::i32 \|\| isInt<`32`>(x: CNImm)) &&
20486	(!Align \|\| isAligned(Lhs: *Align, SizeInBytes: CNImm))) {
20487	int32_t Addr = (int32_t)CNImm;
20488	// Otherwise, break this down into LIS + Disp.
20489	Disp = DAG.getSignedTargetConstant(Val: (int16_t)Addr, DL, VT: MVT::i32);
20490	Base = DAG.getSignedTargetConstant(Val: (Addr - (int16_t)Addr) >> `16`, DL,
20491	VT: MVT::i32);
20492	uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20493	Base = SDValue (DAG.getMachineNode(Opcode: LIS, dl: DL, VT: CNType, Op1: Base), `0`);
20494	break;
20495	}
20496	}
20497	// Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20498	Disp = DAG.getTargetConstant(Val: `0`, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
20499	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
20500	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20501	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
20502	} else
20503	Base = N;
20504	break;
20505	}
20506	case PPC::AM_PrefixDForm: {
20507	int64_t Imm34 = `0`;
20508	unsigned Opcode = N.getOpcode();
20509	if (((Opcode == ISD::ADD) \|\| (Opcode == ISD::OR)) &&
20510	(isIntS34Immediate(Op: N.getOperand(i: `1`), Imm&: Imm34))) {
20511	// N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20512	Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
20513	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
20514	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20515	else
20516	Base = N.getOperand(i: `0`);
20517	} else if (isIntS34Immediate(Op: N, Imm&: Imm34)) {
20518	// The address is a 34-bit signed immediate.
20519	Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
20520	Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
20521	}
20522	break;
20523	}
20524	case PPC::AM_PCRel: {
20525	// When selecting PC-Relative instructions, "Base" is not utilized as
20526	// we select the address as [PC+imm].
20527	Disp = N;
20528	break;
20529	}
20530	case PPC::AM_None:
20531	break;
20532	default: { // By default, X-Form is always available to be selected.
20533	// When a frame index is not aligned, we also match by XForm.
20534	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N);
20535	Base = FI ? N : N.getOperand(i: `1`);
20536	Disp = FI ? DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20537	VT: N.getValueType())
20538	: N.getOperand(i: `0`);
20539	break;
20540	}
20541	}
20542	return Mode;
20543	}
20544
20545	CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
20546	bool Return,
20547	bool IsVarArg) const {
20548	switch (CC) {
20549	case CallingConv::Cold:
20550	return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20551	default:
20552	return CC_PPC64_ELF;
20553	}
20554	}
20555
20556	bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
20557	return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20558	}
20559
20560	TargetLowering::AtomicExpansionKind
20561	PPCTargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst AI) const* {
20562	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20563	if (shouldInlineQuadwordAtomics() && Size == `128`)
20564	return AtomicExpansionKind::MaskedIntrinsic;
20565
20566	switch (AI->getOperation()) {
20567	case AtomicRMWInst::UIncWrap:
20568	case AtomicRMWInst::UDecWrap:
20569	case AtomicRMWInst::USubCond:
20570	case AtomicRMWInst::USubSat:
20571	return AtomicExpansionKind::CmpXChg;
20572	default:
20573	return TargetLowering::shouldExpandAtomicRMWInIR(RMW: AI);
20574	}
20575
20576	llvm_unreachable("unreachable atomicrmw operation");
20577	}
20578
20579	TargetLowering::AtomicExpansionKind
20580	PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(
20581	const AtomicCmpXchgInst AI) const* {
20582	unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
20583	if (shouldInlineQuadwordAtomics() && Size == `128`)
20584	return AtomicExpansionKind::MaskedIntrinsic;
20585	return AtomicExpansionKind::LLSC;
20586	}
20587
20588	static Intrinsic::ID
20589	getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
20590	switch (BinOp) {
20591	default:
20592	llvm_unreachable("Unexpected AtomicRMW BinOp");
20593	case AtomicRMWInst::Xchg:
20594	return Intrinsic::ppc_atomicrmw_xchg_i128;
20595	case AtomicRMWInst::Add:
20596	return Intrinsic::ppc_atomicrmw_add_i128;
20597	case AtomicRMWInst::Sub:
20598	return Intrinsic::ppc_atomicrmw_sub_i128;
20599	case AtomicRMWInst::And:
20600	return Intrinsic::ppc_atomicrmw_and_i128;
20601	case AtomicRMWInst::Or:
20602	return Intrinsic::ppc_atomicrmw_or_i128;
20603	case AtomicRMWInst::Xor:
20604	return Intrinsic::ppc_atomicrmw_xor_i128;
20605	case AtomicRMWInst::Nand:
20606	return Intrinsic::ppc_atomicrmw_nand_i128;
20607	}
20608	}
20609
20610	Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
20611	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
20612	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
20613	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20614	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20615	Type *ValTy = Incr->getType();
20616	assert(ValTy->getPrimitiveSizeInBits() == `128`);
20617	Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
20618	Value *IncrLo = Builder.CreateTrunc(V: Incr, DestTy: Int64Ty, Name: "incr_lo");
20619	Value *IncrHi =
20620	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Incr, RHS: `64`), DestTy: Int64Ty, Name: "incr_hi");
20621	Value *LoHi = Builder.CreateIntrinsic(
20622	ID: getIntrinsicForAtomicRMWBinOp128(BinOp: AI->getOperation()), Types: {},
20623	Args: {AlignedAddr, IncrLo, IncrHi});
20624	Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: `0`, Name: "lo");
20625	Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: `1`, Name: "hi");
20626	Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
20627	Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
20628	return Builder.CreateOr(
20629	LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: `64`)), Name: "val64");
20630	}
20631
20632	Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
20633	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
20634	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
20635	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20636	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20637	Type *ValTy = CmpVal->getType();
20638	assert(ValTy->getPrimitiveSizeInBits() == `128`);
20639	Function *IntCmpXchg =
20640	Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::ppc_cmpxchg_i128);
20641	Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
20642	Value *CmpLo = Builder.CreateTrunc(V: CmpVal, DestTy: Int64Ty, Name: "cmp_lo");
20643	Value *CmpHi =
20644	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: CmpVal, RHS: `64`), DestTy: Int64Ty, Name: "cmp_hi");
20645	Value *NewLo = Builder.CreateTrunc(V: NewVal, DestTy: Int64Ty, Name: "new_lo");
20646	Value *NewHi =
20647	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: NewVal, RHS: `64`), DestTy: Int64Ty, Name: "new_hi");
20648	emitLeadingFence(Builder, Inst: CI, Ord);
20649	Value *LoHi =
20650	Builder.CreateCall(Callee: IntCmpXchg, Args: {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20651	emitTrailingFence(Builder, Inst: CI, Ord);
20652	Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: `0`, Name: "lo");
20653	Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: `1`, Name: "hi");
20654	Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
20655	Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
20656	return Builder.CreateOr(
20657	LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: `64`)), Name: "val64");
20658	}
20659
20660	bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
20661	return Subtarget.useCRBits();
20662	}
20663

Browse the source code of llvm_projects/llvm/lib/Target/PowerPC/PPCISelLowering.cpp