1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCMCTargetDesc.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/CodeGen/CallingConvLower.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/LivePhysRegs.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/SelectionDAG.h"
51#include "llvm/CodeGen/SelectionDAGNodes.h"
52#include "llvm/CodeGen/TargetInstrInfo.h"
53#include "llvm/CodeGen/TargetLowering.h"
54#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
55#include "llvm/CodeGen/TargetRegisterInfo.h"
56#include "llvm/CodeGen/ValueTypes.h"
57#include "llvm/CodeGenTypes/MachineValueType.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCSectionXCOFF.h"
77#include "llvm/MC/MCSymbolXCOFF.h"
78#include "llvm/Support/AtomicOrdering.h"
79#include "llvm/Support/BranchProbability.h"
80#include "llvm/Support/Casting.h"
81#include "llvm/Support/CodeGen.h"
82#include "llvm/Support/CommandLine.h"
83#include "llvm/Support/Compiler.h"
84#include "llvm/Support/Debug.h"
85#include "llvm/Support/ErrorHandling.h"
86#include "llvm/Support/Format.h"
87#include "llvm/Support/KnownBits.h"
88#include "llvm/Support/MathExtras.h"
89#include "llvm/Support/raw_ostream.h"
90#include "llvm/Target/TargetMachine.h"
91#include "llvm/Target/TargetOptions.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
105static cl::opt<bool> DisableP10StoreForward(
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(Val: false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(Val: true), cl::Hidden);
132
133cl::opt<bool> DisableAutoPairedVecSt(
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(Val: true), cl::Hidden);
137
138static cl::opt<unsigned> PPCMinimumJumpTableEntries(
139 "ppc-min-jump-table-entries", cl::init(Val: 64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
142static cl::opt<unsigned> PPCMinimumBitTestCmps(
143 "ppc-min-bit-test-cmps", cl::init(Val: 3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
147static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
148 "ppc-gather-alias-max-depth", cl::init(Val: 18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
151static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(Val: 1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
170constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
171
172// FIXME: Remove this once the bug has been fixed!
173extern cl::opt<bool> ANDIGlueBug;
174
175PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(VT: MVT::i32, RC: &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(VT: MVT::f32, RC: &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(VT: MVT::f64, RC: &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(VT: MVT::f32, RC: &PPC::F4RCRegClass);
198 addRegisterClass(VT: MVT::f64, RC: &PPC::F8RCRegClass);
199 }
200 }
201
202 setOperationAction(Op: ISD::UADDO, VT: RegVT, Action: Custom);
203 setOperationAction(Op: ISD::USUBO, VT: RegVT, Action: Custom);
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
206 setOperationAction(Op: ISD::UADDO_CARRY, VT: RegVT, Action: Custom);
207 setOperationAction(Op: ISD::USUBO_CARRY, VT: RegVT, Action: Custom);
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
212 setOperationAction(Op: ISD::SSUBO, VT: MVT::i32, Action: Custom);
213 setOperationAction(Op: ISD::SADDO, VT: MVT::i32, Action: Custom);
214 if (isPPC64) {
215 setOperationAction(Op: ISD::SSUBO, VT: MVT::i64, Action: Custom);
216 setOperationAction(Op: ISD::SADDO, VT: MVT::i64, Action: Custom);
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
221 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Legal);
222 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i64, Action: Legal);
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
225 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i32, Action: Custom);
226
227 // Custom lower inline assembly to check for special registers.
228 setOperationAction(Op: ISD::INLINEASM, VT: MVT::Other, Action: Custom);
229 setOperationAction(Op: ISD::INLINEASM_BR, VT: MVT::Other, Action: Custom);
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
233 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
234 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
235 }
236
237 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
238 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f128, Action: Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Legal);
242 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
243 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
244 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
245 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
249 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f128, Action: Expand);
250 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
251 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
252 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
253 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
254 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
255 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
256 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
257 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
258 }
259
260 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
261
262 // PowerPC has pre-inc load and store's.
263 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
264 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
265 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
266 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
267 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
268 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
269 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
270 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
271 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
272 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
273 if (!Subtarget.hasSPE()) {
274 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
275 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
276 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
277 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
278 }
279
280 if (Subtarget.useCRBits()) {
281 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
284 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Promote);
285 AddPromotedToType(Opc: ISD::STRICT_SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
286 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Promote);
287 AddPromotedToType(Opc: ISD::STRICT_UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
288
289 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Promote);
290 AddPromotedToType(Opc: ISD::SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
291 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Promote);
292 AddPromotedToType(Opc: ISD::UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
293
294 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i1, Action: Promote);
295 AddPromotedToType(Opc: ISD::STRICT_FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
296 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i1, Action: Promote);
297 AddPromotedToType(Opc: ISD::STRICT_FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
298
299 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i1, Action: Promote);
300 AddPromotedToType(Opc: ISD::FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
301 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i1, Action: Promote);
302 AddPromotedToType(Opc: ISD::FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
303 } else {
304 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Custom);
305 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Custom);
306 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Custom);
307 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Custom);
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
311 setOperationAction(Op: ISD::LOAD, VT: MVT::i1, Action: Custom);
312 setOperationAction(Op: ISD::STORE, VT: MVT::i1, Action: Custom);
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
316 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::i1, Action: Custom);
317
318 for (MVT VT : MVT::integer_valuetypes()) {
319 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
320 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
321 setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
322 }
323
324 addRegisterClass(VT: MVT::i1, RC: &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
329 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
330 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
331 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
332 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(Op: ISD::FFLOOR, VT: MVT::ppcf128, Action: Expand);
336 setOperationAction(Op: ISD::FCEIL, VT: MVT::ppcf128, Action: Expand);
337 setOperationAction(Op: ISD::FTRUNC, VT: MVT::ppcf128, Action: Expand);
338 setOperationAction(Op: ISD::FRINT, VT: MVT::ppcf128, Action: Expand);
339 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::ppcf128, Action: Expand);
340 setOperationAction(Op: ISD::FREM, VT: MVT::ppcf128, Action: LibCall);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
350 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
351 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
352 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
353 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
354 } else {
355 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Expand);
356 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Expand);
357 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Expand);
358 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Expand);
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
363 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
364 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
365 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
366 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
367 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
368 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
369 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
373 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f32, Action: Legal);
374 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f32, Action: Legal);
375 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f32, Action: Legal);
376 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f32, Action: Legal);
377 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
378
379 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f64, Action: Legal);
380 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f64, Action: Legal);
381 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f64, Action: Legal);
382 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f64, Action: Legal);
383
384 if (!Subtarget.hasSPE()) {
385 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f32, Action: Legal);
386 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f64, Action: Legal);
387 }
388
389 if (Subtarget.hasVSX()) {
390 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f32, Action: Legal);
391 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f64, Action: Legal);
392 }
393
394 if (Subtarget.hasFSQRT()) {
395 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f32, Action: Legal);
396 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f64, Action: Legal);
397 }
398
399 if (Subtarget.hasFPRND()) {
400 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f32, Action: Legal);
401 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f32, Action: Legal);
402 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f32, Action: Legal);
403 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f32, Action: Legal);
404
405 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f64, Action: Legal);
406 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f64, Action: Legal);
407 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f64, Action: Legal);
408 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f64, Action: Legal);
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
412 setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Expand);
413 setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Expand);
414 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
415 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
416 setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Expand);
417 setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Expand);
418 setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Expand);
419 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
420 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
421 setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Expand);
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
426 setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Custom);
427 setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Custom);
428 setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Custom);
429 setOperationAction(Op: ISD::FLOG, VT: MVT::f64, Action: Custom);
430 setOperationAction(Op: ISD::FLOG10, VT: MVT::f64, Action: Custom);
431 setOperationAction(Op: ISD::FEXP, VT: MVT::f64, Action: Custom);
432 setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Custom);
433 setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Custom);
434 setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Custom);
435 setOperationAction(Op: ISD::FLOG, VT: MVT::f32, Action: Custom);
436 setOperationAction(Op: ISD::FLOG10, VT: MVT::f32, Action: Custom);
437 setOperationAction(Op: ISD::FEXP, VT: MVT::f32, Action: Custom);
438 }
439
440 if (Subtarget.hasSPE()) {
441 setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Expand);
442 setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Expand);
443 } else {
444 setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Legal);
445 setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Legal);
446 setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom);
447 setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
455 setOperationAction(Op: ISD::FSQRT, VT: MVT::f64, Action: Expand);
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
459 setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Expand);
460
461 if (Subtarget.hasFCPSGN()) {
462 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Legal);
463 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Legal);
464 } else {
465 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Expand);
466 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Expand);
467 }
468
469 if (Subtarget.hasFPRND()) {
470 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f64, Action: Legal);
471 setOperationAction(Op: ISD::FCEIL, VT: MVT::f64, Action: Legal);
472 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f64, Action: Legal);
473 setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
474
475 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f32, Action: Legal);
476 setOperationAction(Op: ISD::FCEIL, VT: MVT::f32, Action: Legal);
477 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f32, Action: Legal);
478 setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
484 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Legal);
485 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64, Action: Legal);
486 } else {
487 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Expand);
488 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64,
489 Action: (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Legal);
495 setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Legal);
496 } else {
497 setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Expand);
498 setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
502 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Legal);
503 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Legal);
504 } else {
505 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Expand);
506 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Expand);
507 }
508
509 // PowerPC does not have ROTR
510 setOperationAction(Op: ISD::ROTR, VT: MVT::i32 , Action: Expand);
511 setOperationAction(Op: ISD::ROTR, VT: MVT::i64 , Action: Expand);
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
515 setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Expand);
516 setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Expand);
517 setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Expand);
518 setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Expand);
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
522 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Custom);
523 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Custom);
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
527 setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Custom);
528
529 if (Subtarget.hasFPU()) {
530 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
531 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
532 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Legal);
533
534 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
535 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
536 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Legal);
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
541 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
542
543 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
547 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Legal);
548 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Legal);
549 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Legal);
550 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Legal);
551 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Legal);
552 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Legal);
553
554 // SPE supports signaling compare of f32/f64.
555 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
556 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
559 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
560 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
561
562 // PowerPC does not have [U|S]INT_TO_FP
563 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Expand);
564 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Expand);
565 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Expand);
566 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Expand);
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
570 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Legal);
571 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Legal);
572 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Legal);
573 setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Legal);
574
575 setOperationAction(Op: ISD::STRICT_LRINT, VT: MVT::f64, Action: Custom);
576 setOperationAction(Op: ISD::STRICT_LRINT, VT: MVT::f32, Action: Custom);
577 setOperationAction(Op: ISD::STRICT_LLRINT, VT: MVT::f64, Action: Custom);
578 setOperationAction(Op: ISD::STRICT_LLRINT, VT: MVT::f32, Action: Custom);
579 setOperationAction(Op: ISD::STRICT_LROUND, VT: MVT::f64, Action: Custom);
580 setOperationAction(Op: ISD::STRICT_LROUND, VT: MVT::f32, Action: Custom);
581 setOperationAction(Op: ISD::STRICT_LLROUND, VT: MVT::f64, Action: Custom);
582 setOperationAction(Op: ISD::STRICT_LLROUND, VT: MVT::f32, Action: Custom);
583 } else {
584 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Expand);
585 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Expand);
586 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Expand);
587 setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Expand);
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
591 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
592
593 // Custom handling for PowerPC ucmp instruction
594 setOperationAction(Op: ISD::UCMP, VT: MVT::i32, Action: Custom);
595 setOperationAction(Op: ISD::UCMP, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
603 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
604 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
608 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
609 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i32, Action: Custom);
610 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i32, Action: Custom);
611 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i32, Action: Custom);
612 setOperationAction(Op: ISD::JumpTable, VT: MVT::i32, Action: Custom);
613 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i64, Action: Custom);
614 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i64, Action: Custom);
615 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i64, Action: Custom);
616 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i64, Action: Custom);
617 setOperationAction(Op: ISD::JumpTable, VT: MVT::i64, Action: Custom);
618
619 // TRAP is legal.
620 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
621
622 // TRAMPOLINE is custom lowered.
623 setOperationAction(Op: ISD::INIT_TRAMPOLINE, VT: MVT::Other, Action: Custom);
624 setOperationAction(Op: ISD::ADJUST_TRAMPOLINE, VT: MVT::Other, Action: Custom);
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
627 setOperationAction(Op: ISD::VASTART , VT: MVT::Other, Action: Custom);
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
631 setOperationAction(Op: ISD::VAARG, VT: MVT::i1, Action: Promote);
632 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i1, DestVT: MVT::i64);
633 setOperationAction(Op: ISD::VAARG, VT: MVT::i8, Action: Promote);
634 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i8, DestVT: MVT::i64);
635 setOperationAction(Op: ISD::VAARG, VT: MVT::i16, Action: Promote);
636 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i16, DestVT: MVT::i64);
637 setOperationAction(Op: ISD::VAARG, VT: MVT::i32, Action: Promote);
638 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i32, DestVT: MVT::i64);
639 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
642 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
643 setOperationAction(Op: ISD::VAARG, VT: MVT::i64, Action: Custom);
644 } else
645 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
649 setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Custom);
650 else
651 setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Expand);
652
653 // Use the default implementation.
654 setOperationAction(Op: ISD::VAEND , VT: MVT::Other, Action: Expand);
655 setOperationAction(Op: ISD::STACKSAVE , VT: MVT::Other, Action: Expand);
656 setOperationAction(Op: ISD::STACKRESTORE , VT: MVT::Other, Action: Custom);
657 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32 , Action: Custom);
658 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64 , Action: Custom);
659 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i32, Action: Custom);
660 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i64, Action: Custom);
661 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
662 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v16i1, Action: Custom);
666 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v8i1, Action: Custom);
667 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v4i1, Action: Custom);
668 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v2i1, Action: Custom);
669 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v16i1, Action: Custom);
670 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v8i1, Action: Custom);
671 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v4i1, Action: Custom);
672 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v2i1, Action: Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
676 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
677 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::f64, Action: Custom);
678 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::ppcf128, Action: Custom);
679 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v4f32, Action: Custom);
680 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v2f64, Action: Custom);
681
682 // To handle counter-based loop conditions.
683 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i1, Action: Custom);
684 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
685
686 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i8, Action: Custom);
687 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i16, Action: Custom);
688 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
689 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
693 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
694 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
695 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
696 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f64, Action: Expand);
697 }
698 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
699 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f64, Action: Expand);
700 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
701 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
702 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
703 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f64, Action: Expand);
704 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
705 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
706 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f32, Action: Expand);
707 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f64, Action: Expand);
708 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
709 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
710
711 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal);
712 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal);
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
716 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
717 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Expand);
718 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
719 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Expand);
720 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
721 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
722 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
723 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
726 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
727 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
730 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
731 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
736 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Legal);
737 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Legal);
738 } else {
739 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Expand);
740 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Expand);
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
747 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
748 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Custom);
749 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
750 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Custom);
751 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
752 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Custom);
753 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
754 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
755 }
756
757 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
758 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
759 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
760 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Custom);
761 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
762 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
763 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
764 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Custom);
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(VT: MVT::i64, RC: &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
771 setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
772 // 64-bit PowerPC wants to expand i128 shifts itself.
773 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Custom);
774 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Custom);
775 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Custom);
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
778 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i32, Action: Custom);
779 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i32, Action: Custom);
780 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i32, Action: Custom);
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
786 setOperationAction(Op: ISD::FSHL, VT: MVT::i64, Action: Custom);
787 setOperationAction(Op: ISD::FSHR, VT: MVT::i64, Action: Custom);
788 }
789 setOperationAction(Op: ISD::FSHL, VT: MVT::i32, Action: Custom);
790 setOperationAction(Op: ISD::FSHR, VT: MVT::i32, Action: Custom);
791
792 if (Subtarget.hasVSX()) {
793 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
794 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
795 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
796 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
797 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
798 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
799 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
800 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
801 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
802 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
807 setOperationAction(Op: ISD::AVGCEILS, VT, Action: Legal);
808 setOperationAction(Op: ISD::AVGCEILU, VT, Action: Legal);
809 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
810 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
811 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
812 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
813 }
814 // First set operation action for all vector types to expand. Then we
815 // will selectively turn on ones that can be effectively codegen'd.
816 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
817 // add/sub are legal for all supported vector VT's.
818 setOperationAction(Op: ISD::ADD, VT, Action: Legal);
819 setOperationAction(Op: ISD::SUB, VT, Action: Legal);
820
821 // For v2i64, these are only valid with P8Vector. This is corrected after
822 // the loop.
823 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
824 setOperationAction(Op: ISD::SMAX, VT, Action: Legal);
825 setOperationAction(Op: ISD::SMIN, VT, Action: Legal);
826 setOperationAction(Op: ISD::UMAX, VT, Action: Legal);
827 setOperationAction(Op: ISD::UMIN, VT, Action: Legal);
828 }
829 else {
830 setOperationAction(Op: ISD::SMAX, VT, Action: Expand);
831 setOperationAction(Op: ISD::SMIN, VT, Action: Expand);
832 setOperationAction(Op: ISD::UMAX, VT, Action: Expand);
833 setOperationAction(Op: ISD::UMIN, VT, Action: Expand);
834 }
835
836 if (Subtarget.hasVSX()) {
837 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT, Action: Legal);
838 setOperationAction(Op: ISD::FMINNUM_IEEE, VT, Action: Legal);
839 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
840 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
841 setOperationAction(Op: ISD::FCANONICALIZE, VT, Action: Legal);
842 }
843
844 // Vector instructions introduced in P8
845 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
846 setOperationAction(Op: ISD::CTPOP, VT, Action: Legal);
847 setOperationAction(Op: ISD::CTLZ, VT, Action: Legal);
848 }
849 else {
850 setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
851 setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
852 }
853
854 // Vector instructions introduced in P9
855 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
856 setOperationAction(Op: ISD::CTTZ, VT, Action: Legal);
857 else
858 setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
859
860 // We promote all shuffles to v16i8.
861 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Promote);
862 AddPromotedToType (Opc: ISD::VECTOR_SHUFFLE, OrigVT: VT, DestVT: MVT::v16i8);
863
864 // We promote all non-typed operations to v4i32.
865 setOperationAction(Op: ISD::AND , VT, Action: Promote);
866 AddPromotedToType (Opc: ISD::AND , OrigVT: VT, DestVT: MVT::v4i32);
867 setOperationAction(Op: ISD::OR , VT, Action: Promote);
868 AddPromotedToType (Opc: ISD::OR , OrigVT: VT, DestVT: MVT::v4i32);
869 setOperationAction(Op: ISD::XOR , VT, Action: Promote);
870 AddPromotedToType (Opc: ISD::XOR , OrigVT: VT, DestVT: MVT::v4i32);
871 setOperationAction(Op: ISD::LOAD , VT, Action: Promote);
872 AddPromotedToType (Opc: ISD::LOAD , OrigVT: VT, DestVT: MVT::v4i32);
873 setOperationAction(Op: ISD::SELECT, VT, Action: Promote);
874 AddPromotedToType (Opc: ISD::SELECT, OrigVT: VT, DestVT: MVT::v4i32);
875 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
876 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Promote);
877 AddPromotedToType (Opc: ISD::SELECT_CC, OrigVT: VT, DestVT: MVT::v4i32);
878 setOperationAction(Op: ISD::STORE, VT, Action: Promote);
879 AddPromotedToType (Opc: ISD::STORE, OrigVT: VT, DestVT: MVT::v4i32);
880
881 // No other operations are legal.
882 setOperationAction(Op: ISD::MUL , VT, Action: Expand);
883 setOperationAction(Op: ISD::SDIV, VT, Action: Expand);
884 setOperationAction(Op: ISD::SREM, VT, Action: Expand);
885 setOperationAction(Op: ISD::UDIV, VT, Action: Expand);
886 setOperationAction(Op: ISD::UREM, VT, Action: Expand);
887 setOperationAction(Op: ISD::FDIV, VT, Action: Expand);
888 setOperationAction(Op: ISD::FREM, VT, Action: Expand);
889 setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
890 setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
891 setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
892 setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
893 setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
894 setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
895 setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
896 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
897 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
898 setOperationAction(Op: ISD::FABS, VT, Action: Expand);
899 setOperationAction(Op: ISD::FFLOOR, VT, Action: Expand);
900 setOperationAction(Op: ISD::FCEIL, VT, Action: Expand);
901 setOperationAction(Op: ISD::FTRUNC, VT, Action: Expand);
902 setOperationAction(Op: ISD::FRINT, VT, Action: Expand);
903 setOperationAction(Op: ISD::FLDEXP, VT, Action: Expand);
904 setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Expand);
905 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Expand);
906 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Expand);
907 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Expand);
908 setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
909 setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
910 setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
911 setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
912 setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
913 setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
914 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Expand);
915 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
916 setOperationAction(Op: ISD::BSWAP, VT, Action: Expand);
917 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT, Action: Expand);
918 setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
919 setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
920
921 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
923 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
924 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
925 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
926 }
927 }
928 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::v4i32, Action: Expand);
929 if (!Subtarget.hasP8Vector()) {
930 setOperationAction(Op: ISD::SMAX, VT: MVT::v2i64, Action: Expand);
931 setOperationAction(Op: ISD::SMIN, VT: MVT::v2i64, Action: Expand);
932 setOperationAction(Op: ISD::UMAX, VT: MVT::v2i64, Action: Expand);
933 setOperationAction(Op: ISD::UMIN, VT: MVT::v2i64, Action: Expand);
934 }
935
936 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937 // with merges, splats, etc.
938 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v16i8, Action: Custom);
939
940 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
941 // are cheap, so handle them before they get expanded to scalar.
942 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v8i8, Action: Custom);
943 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i8, Action: Custom);
944 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i8, Action: Custom);
945 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i16, Action: Custom);
946 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i16, Action: Custom);
947
948 setOperationAction(Op: ISD::AND , VT: MVT::v4i32, Action: Legal);
949 setOperationAction(Op: ISD::OR , VT: MVT::v4i32, Action: Legal);
950 setOperationAction(Op: ISD::XOR , VT: MVT::v4i32, Action: Legal);
951 setOperationAction(Op: ISD::LOAD , VT: MVT::v4i32, Action: Legal);
952 setOperationAction(Op: ISD::SELECT, VT: MVT::v4i32,
953 Action: Subtarget.useCRBits() ? Legal : Expand);
954 setOperationAction(Op: ISD::STORE , VT: MVT::v4i32, Action: Legal);
955 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
956 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
957 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
958 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
959 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
960 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
961 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
962 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
963 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal);
964 setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal);
965 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal);
966 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
967
968 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969 setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Custom);
970 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
971 if (Subtarget.hasAltivec())
972 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
973 setOperationAction(Op: ISD::ROTL, VT, Action: Legal);
974 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975 if (Subtarget.hasP8Altivec())
976 setOperationAction(Op: ISD::ROTL, VT: MVT::v2i64, Action: Legal);
977
978 addRegisterClass(VT: MVT::v4f32, RC: &PPC::VRRCRegClass);
979 addRegisterClass(VT: MVT::v4i32, RC: &PPC::VRRCRegClass);
980 addRegisterClass(VT: MVT::v8i16, RC: &PPC::VRRCRegClass);
981 addRegisterClass(VT: MVT::v16i8, RC: &PPC::VRRCRegClass);
982
983 setOperationAction(Op: ISD::MUL, VT: MVT::v4f32, Action: Legal);
984 setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal);
985
986 if (Subtarget.hasVSX()) {
987 setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal);
988 setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal);
989 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
990 }
991
992 if (Subtarget.hasP8Altivec())
993 setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Legal);
994 else
995 setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Custom);
996
997 if (Subtarget.isISA3_1()) {
998 setOperationAction(Op: ISD::MUL, VT: MVT::v2i64, Action: Legal);
999 setOperationAction(Op: ISD::MULHS, VT: MVT::v2i64, Action: Legal);
1000 setOperationAction(Op: ISD::MULHU, VT: MVT::v2i64, Action: Legal);
1001 setOperationAction(Op: ISD::MULHS, VT: MVT::v4i32, Action: Legal);
1002 setOperationAction(Op: ISD::MULHU, VT: MVT::v4i32, Action: Legal);
1003 setOperationAction(Op: ISD::UDIV, VT: MVT::v2i64, Action: Legal);
1004 setOperationAction(Op: ISD::SDIV, VT: MVT::v2i64, Action: Legal);
1005 setOperationAction(Op: ISD::UDIV, VT: MVT::v4i32, Action: Legal);
1006 setOperationAction(Op: ISD::SDIV, VT: MVT::v4i32, Action: Legal);
1007 setOperationAction(Op: ISD::UREM, VT: MVT::v2i64, Action: Legal);
1008 setOperationAction(Op: ISD::SREM, VT: MVT::v2i64, Action: Legal);
1009 setOperationAction(Op: ISD::UREM, VT: MVT::v4i32, Action: Legal);
1010 setOperationAction(Op: ISD::SREM, VT: MVT::v4i32, Action: Legal);
1011 setOperationAction(Op: ISD::UREM, VT: MVT::v1i128, Action: Legal);
1012 setOperationAction(Op: ISD::SREM, VT: MVT::v1i128, Action: Legal);
1013 setOperationAction(Op: ISD::UDIV, VT: MVT::v1i128, Action: Legal);
1014 setOperationAction(Op: ISD::SDIV, VT: MVT::v1i128, Action: Legal);
1015 setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Legal);
1016 }
1017
1018 setOperationAction(Op: ISD::MUL, VT: MVT::v8i16, Action: Legal);
1019 setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Custom);
1020
1021 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Custom);
1022 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Custom);
1023 // LE is P8+/64-bit so direct moves are supported and these operations
1024 // are legal. The custom transformation requires 64-bit since we need a
1025 // pair of stores that will cover a 128-bit load for P10.
1026 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1027 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Custom);
1028 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Custom);
1029 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Custom);
1030 }
1031
1032 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v16i8, Action: Custom);
1033 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v8i16, Action: Custom);
1034 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4i32, Action: Custom);
1035 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4f32, Action: Custom);
1036
1037 // Altivec does not contain unordered floating-point compare instructions
1038 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v4f32, Action: Expand);
1039 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v4f32, Action: Expand);
1040 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v4f32, Action: Expand);
1041 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v4f32, Action: Expand);
1042
1043 if (Subtarget.hasVSX()) {
1044 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal);
1045 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1046 if (Subtarget.hasP8Vector()) {
1047 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal);
1048 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Legal);
1049 }
1050 if (Subtarget.hasDirectMove() && isPPC64) {
1051 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Legal);
1052 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Legal);
1053 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Legal);
1054 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Legal);
1055 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1056 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1057 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1058 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1059 }
1060 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1061
1062 // The nearbyint variants are not allowed to raise the inexact exception
1063 // so we can only code-gen them with fpexcept.ignore.
1064 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f64, Action: Custom);
1065 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f32, Action: Custom);
1066 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v2f64, Action: Custom);
1067 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v4f32, Action: Custom);
1068
1069 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal);
1070 setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal);
1071 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal);
1072 setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal);
1073 setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal);
1074 setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
1075 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
1076
1077 setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal);
1078 setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal);
1079 setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
1080 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
1081
1082 setOperationAction(Op: ISD::MUL, VT: MVT::v2f64, Action: Legal);
1083 setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal);
1084
1085 setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal);
1086 setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal);
1087
1088 // Share the Altivec comparison restrictions.
1089 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v2f64, Action: Expand);
1090 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v2f64, Action: Expand);
1091 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v2f64, Action: Expand);
1092 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v2f64, Action: Expand);
1093
1094 setOperationAction(Op: ISD::LOAD, VT: MVT::v2f64, Action: Legal);
1095 setOperationAction(Op: ISD::STORE, VT: MVT::v2f64, Action: Legal);
1096
1097 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2f64, Action: Custom);
1098
1099 if (Subtarget.hasP8Vector())
1100 addRegisterClass(VT: MVT::f32, RC: &PPC::VSSRCRegClass);
1101
1102 addRegisterClass(VT: MVT::f64, RC: &PPC::VSFRCRegClass);
1103
1104 addRegisterClass(VT: MVT::v4i32, RC: &PPC::VSRCRegClass);
1105 addRegisterClass(VT: MVT::v4f32, RC: &PPC::VSRCRegClass);
1106 addRegisterClass(VT: MVT::v2f64, RC: &PPC::VSRCRegClass);
1107
1108 if (Subtarget.hasP8Altivec()) {
1109 setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Legal);
1110 setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Legal);
1111 setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Legal);
1112
1113 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1114 // SRL, but not for SRA because of the instructions available:
1115 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116 // doing
1117 setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Expand);
1118 setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Expand);
1119 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1120
1121 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Legal);
1122 }
1123 else {
1124 setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Expand);
1125 setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Expand);
1126 setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Expand);
1127
1128 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Custom);
1129
1130 // VSX v2i64 only supports non-arithmetic operations.
1131 setOperationAction(Op: ISD::ADD, VT: MVT::v2i64, Action: Expand);
1132 setOperationAction(Op: ISD::SUB, VT: MVT::v2i64, Action: Expand);
1133 }
1134
1135 if (Subtarget.isISA3_1())
1136 setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Legal);
1137 else
1138 setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Expand);
1139
1140 setOperationAction(Op: ISD::LOAD, VT: MVT::v2i64, Action: Promote);
1141 AddPromotedToType (Opc: ISD::LOAD, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1142 setOperationAction(Op: ISD::STORE, VT: MVT::v2i64, Action: Promote);
1143 AddPromotedToType (Opc: ISD::STORE, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1144
1145 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2i64, Action: Custom);
1146
1147 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1148 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1149 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1150 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1151 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1152 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1153 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1154 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1155
1156 // Custom handling for partial vectors of integers converted to
1157 // floating point. We already have optimal handling for v2i32 through
1158 // the DAG combine, so those aren't necessary.
1159 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1160 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1161 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1162 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1163 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1164 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1165 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1166 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1167 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1168 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1169 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1170 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1171 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1172 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1173 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1174 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1175
1176 setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal);
1177 setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal);
1178 setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal);
1179 setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal);
1180 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v4f32, Action: Legal);
1181 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v2f64, Action: Legal);
1182
1183 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2i64, Action: Custom);
1184 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2f64, Action: Custom);
1185
1186 // Handle constrained floating-point operations of vector.
1187 // The predictor is `hasVSX` because altivec instruction has
1188 // no exception but VSX vector instruction has.
1189 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal);
1190 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal);
1191 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal);
1192 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal);
1193 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal);
1194 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal);
1195 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v4f32, Action: Legal);
1196 setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v4f32, Action: Legal);
1197 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal);
1198 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal);
1199 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal);
1200 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal);
1201 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal);
1202
1203 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal);
1204 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal);
1205 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal);
1206 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal);
1207 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal);
1208 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal);
1209 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v2f64, Action: Legal);
1210 setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v2f64, Action: Legal);
1211 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal);
1212 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal);
1213 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal);
1214 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal);
1215 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal);
1216
1217 addRegisterClass(VT: MVT::v2i64, RC: &PPC::VSRCRegClass);
1218 addRegisterClass(VT: MVT::f128, RC: &PPC::VRRCRegClass);
1219
1220 for (MVT FPT : MVT::fp_valuetypes())
1221 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: FPT, Action: Expand);
1222
1223 // Expand the SELECT to SELECT_CC
1224 setOperationAction(Op: ISD::SELECT, VT: MVT::f128, Action: Expand);
1225
1226 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
1227 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
1228
1229 // No implementation for these ops for PowerPC.
1230 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f128, Action: Expand);
1231 setOperationAction(Op: ISD::FSIN, VT: MVT::f128, Action: Expand);
1232 setOperationAction(Op: ISD::FCOS, VT: MVT::f128, Action: Expand);
1233 setOperationAction(Op: ISD::FPOW, VT: MVT::f128, Action: Expand);
1234 setOperationAction(Op: ISD::FPOWI, VT: MVT::f128, Action: Expand);
1235 setOperationAction(Op: ISD::FREM, VT: MVT::f128, Action: LibCall);
1236 }
1237
1238 if (Subtarget.hasP8Altivec()) {
1239 addRegisterClass(VT: MVT::v2i64, RC: &PPC::VRRCRegClass);
1240 addRegisterClass(VT: MVT::v1i128, RC: &PPC::VRRCRegClass);
1241 }
1242
1243 if (Subtarget.hasP9Vector()) {
1244 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Custom);
1245 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
1246
1247 // Test data class instructions store results in CR bits.
1248 if (Subtarget.useCRBits()) {
1249 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom);
1250 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom);
1251 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f128, Action: Custom);
1252 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::ppcf128, Action: Custom);
1253 }
1254
1255 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1256 // SRL, but not for SRA because of the instructions available:
1257 // VS{RL} and VS{RL}O.
1258 setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Legal);
1259 setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Legal);
1260 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1261
1262 setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: Legal);
1263 setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: Legal);
1264 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Legal);
1265 setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Legal);
1266 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Legal);
1267
1268 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Legal);
1269 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f128, Action: Expand);
1270 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f128, Action: Expand);
1271 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f128, Action: Expand);
1272 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f128, Action: Expand);
1273 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f128, Action: Expand);
1274 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f128, Action: Expand);
1275
1276 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f128, Action: Legal);
1277 setOperationAction(Op: ISD::FRINT, VT: MVT::f128, Action: Legal);
1278 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f128, Action: Legal);
1279 setOperationAction(Op: ISD::FCEIL, VT: MVT::f128, Action: Legal);
1280 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f128, Action: Legal);
1281 setOperationAction(Op: ISD::FROUND, VT: MVT::f128, Action: Legal);
1282
1283 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f64, Action: Legal);
1284 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f32, Action: Legal);
1285 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
1286
1287 // Handle constrained floating-point operations of fp128
1288 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f128, Action: Legal);
1289 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f128, Action: Legal);
1290 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f128, Action: Legal);
1291 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f128, Action: Legal);
1292 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Legal);
1293 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f128, Action: Legal);
1294 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Legal);
1295 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f64, Action: Legal);
1296 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
1297 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f128, Action: Legal);
1298 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f128, Action: Legal);
1299 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f128, Action: Legal);
1300 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f128, Action: Legal);
1301 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f128, Action: Legal);
1302 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f128, Action: Legal);
1303 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v2f32, Action: Custom);
1304 setOperationAction(Op: ISD::BSWAP, VT: MVT::v8i16, Action: Legal);
1305 setOperationAction(Op: ISD::BSWAP, VT: MVT::v4i32, Action: Legal);
1306 setOperationAction(Op: ISD::BSWAP, VT: MVT::v2i64, Action: Legal);
1307 setOperationAction(Op: ISD::BSWAP, VT: MVT::v1i128, Action: Legal);
1308 } else if (Subtarget.hasVSX()) {
1309 setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Promote);
1310 setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Promote);
1311
1312 AddPromotedToType(Opc: ISD::LOAD, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1313 AddPromotedToType(Opc: ISD::STORE, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1314
1315 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316 // fp_to_uint and int_to_fp.
1317 setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: LibCall);
1318 setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: LibCall);
1319
1320 setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Expand);
1321 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
1322 setOperationAction(Op: ISD::FNEG, VT: MVT::f128, Action: Expand);
1323 setOperationAction(Op: ISD::FABS, VT: MVT::f128, Action: Expand);
1324 setOperationAction(Op: ISD::FSQRT, VT: MVT::f128, Action: Expand);
1325 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand);
1326 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand);
1327
1328 // Expand the fp_extend if the target type is fp128.
1329 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Expand);
1330 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Expand);
1331
1332 // Expand the fp_round if the source type is fp128.
1333 for (MVT VT : {MVT::f32, MVT::f64}) {
1334 setOperationAction(Op: ISD::FP_ROUND, VT, Action: Custom);
1335 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Custom);
1336 }
1337
1338 setOperationAction(Op: ISD::SETCC, VT: MVT::f128, Action: Custom);
1339 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Custom);
1340 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Custom);
1341 setOperationAction(Op: ISD::BR_CC, VT: MVT::f128, Action: Expand);
1342
1343 // Lower following f128 select_cc pattern:
1344 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1345 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1346
1347 // We need to handle f128 SELECT_CC with integer result type.
1348 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Custom);
1349 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
1350 }
1351
1352 if (Subtarget.hasP9Altivec()) {
1353 if (Subtarget.isISA3_1()) {
1354 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1355 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1356 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1357 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1358 } else {
1359 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Custom);
1360 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Custom);
1361 }
1362 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i8, Action: Legal);
1363 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i16, Action: Legal);
1364 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i32, Action: Legal);
1365 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i8, Action: Legal);
1366 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i16, Action: Legal);
1367 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i32, Action: Legal);
1368 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i64, Action: Legal);
1369
1370 setOperationAction(Op: ISD::ABDU, VT: MVT::v16i8, Action: Legal);
1371 setOperationAction(Op: ISD::ABDU, VT: MVT::v8i16, Action: Legal);
1372 setOperationAction(Op: ISD::ABDU, VT: MVT::v4i32, Action: Legal);
1373 setOperationAction(Op: ISD::ABDS, VT: MVT::v4i32, Action: Legal);
1374 }
1375
1376 if (Subtarget.hasP10Vector()) {
1377 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1378 }
1379 }
1380
1381 if (Subtarget.pairedVectorMemops()) {
1382 addRegisterClass(VT: MVT::v256i1, RC: &PPC::VSRpRCRegClass);
1383 setOperationAction(Op: ISD::LOAD, VT: MVT::v256i1, Action: Custom);
1384 setOperationAction(Op: ISD::STORE, VT: MVT::v256i1, Action: Custom);
1385 }
1386 if (Subtarget.hasMMA()) {
1387 if (Subtarget.isISAFuture()) {
1388 addRegisterClass(VT: MVT::v512i1, RC: &PPC::WACCRCRegClass);
1389 addRegisterClass(VT: MVT::v1024i1, RC: &PPC::DMRRCRegClass);
1390 addRegisterClass(VT: MVT::v2048i1, RC: &PPC::DMRpRCRegClass);
1391 setOperationAction(Op: ISD::LOAD, VT: MVT::v1024i1, Action: Custom);
1392 setOperationAction(Op: ISD::STORE, VT: MVT::v1024i1, Action: Custom);
1393 setOperationAction(Op: ISD::LOAD, VT: MVT::v2048i1, Action: Custom);
1394 setOperationAction(Op: ISD::STORE, VT: MVT::v2048i1, Action: Custom);
1395 } else {
1396 addRegisterClass(VT: MVT::v512i1, RC: &PPC::UACCRCRegClass);
1397 }
1398 setOperationAction(Op: ISD::LOAD, VT: MVT::v512i1, Action: Custom);
1399 setOperationAction(Op: ISD::STORE, VT: MVT::v512i1, Action: Custom);
1400 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v512i1, Action: Custom);
1401 }
1402
1403 if (Subtarget.has64BitSupport())
1404 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal);
1405
1406 if (Subtarget.isISA3_1())
1407 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Legal);
1408
1409 setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: isPPC64 ? Legal : Custom);
1410
1411 if (!isPPC64) {
1412 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i64, Action: Expand);
1413 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i64, Action: Expand);
1414 }
1415
1416 if (shouldInlineQuadwordAtomics()) {
1417 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom);
1418 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom);
1419 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i128, Action: Custom);
1420 }
1421
1422 setBooleanContents(ZeroOrOneBooleanContent);
1423
1424 if (Subtarget.hasAltivec()) {
1425 // Altivec instructions set fields to all zeros or all ones.
1426 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1427 }
1428
1429 if (shouldInlineQuadwordAtomics())
1430 setMaxAtomicSizeInBitsSupported(128);
1431 else if (isPPC64)
1432 setMaxAtomicSizeInBitsSupported(64);
1433 else
1434 setMaxAtomicSizeInBitsSupported(32);
1435
1436 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438 // We have target-specific dag combine patterns for the following nodes:
1439 setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
1440 ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
1441 ISD::BUILD_VECTOR});
1442 if (Subtarget.hasFPCVT())
1443 setTargetDAGCombine(ISD::UINT_TO_FP);
1444 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1445 if (Subtarget.useCRBits())
1446 setTargetDAGCombine(ISD::BRCOND);
1447 setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1448 ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1449
1450 setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1451
1452 setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1453
1454 if (Subtarget.useCRBits()) {
1455 setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1456 }
1457
1458 // With 32 condition bits, we don't need to sink (and duplicate) compares
1459 // aggressively in CodeGenPrep.
1460 if (Subtarget.useCRBits()) {
1461 setJumpIsExpensive();
1462 }
1463
1464 // TODO: The default entry number is set to 64. This stops most jump table
1465 // generation on PPC. But it is good for current PPC HWs because the indirect
1466 // branch instruction mtctr to the jump table may lead to bad branch predict.
1467 // Re-evaluate this value on future HWs that can do better with mtctr.
1468 setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1469
1470 // The default minimum of largest number in a BitTest cluster is 3.
1471 setMinimumBitTestCmps(PPCMinimumBitTestCmps);
1472
1473 setMinFunctionAlignment(Align(4));
1474 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1475
1476 auto CPUDirective = Subtarget.getCPUDirective();
1477 switch (CPUDirective) {
1478 default: break;
1479 case PPC::DIR_970:
1480 case PPC::DIR_A2:
1481 case PPC::DIR_E500:
1482 case PPC::DIR_E500mc:
1483 case PPC::DIR_E5500:
1484 case PPC::DIR_PWR4:
1485 case PPC::DIR_PWR5:
1486 case PPC::DIR_PWR5X:
1487 case PPC::DIR_PWR6:
1488 case PPC::DIR_PWR6X:
1489 case PPC::DIR_PWR7:
1490 case PPC::DIR_PWR8:
1491 case PPC::DIR_PWR9:
1492 case PPC::DIR_PWR10:
1493 case PPC::DIR_PWR11:
1494 case PPC::DIR_PWR_FUTURE:
1495 setPrefLoopAlignment(Align(16));
1496 setPrefFunctionAlignment(Align(16));
1497 break;
1498 }
1499
1500 if (Subtarget.enableMachineScheduler())
1501 setSchedulingPreference(Sched::Source);
1502 else
1503 setSchedulingPreference(Sched::Hybrid);
1504
1505 computeRegisterProperties(TRI: STI.getRegisterInfo());
1506
1507 // The Freescale cores do better with aggressive inlining of memcpy and
1508 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1509 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1510 MaxStoresPerMemset = 32;
1511 MaxStoresPerMemsetOptSize = 16;
1512 MaxStoresPerMemcpy = 32;
1513 MaxStoresPerMemcpyOptSize = 8;
1514 MaxStoresPerMemmove = 32;
1515 MaxStoresPerMemmoveOptSize = 8;
1516 } else if (CPUDirective == PPC::DIR_A2) {
1517 // The A2 also benefits from (very) aggressive inlining of memcpy and
1518 // friends. The overhead of a the function call, even when warm, can be
1519 // over one hundred cycles.
1520 MaxStoresPerMemset = 128;
1521 MaxStoresPerMemcpy = 128;
1522 MaxStoresPerMemmove = 128;
1523 MaxLoadsPerMemcmp = 128;
1524 } else {
1525 MaxLoadsPerMemcmp = 8;
1526 MaxLoadsPerMemcmpOptSize = 4;
1527 }
1528
1529 // Enable generation of STXVP instructions by default for mcpu=future.
1530 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1531 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1532 DisableAutoPairedVecSt = false;
1533
1534 IsStrictFPEnabled = true;
1535
1536 // Let the subtarget (CPU) decide if a predictable select is more expensive
1537 // than the corresponding branch. This information is used in CGP to decide
1538 // when to convert selects into branches.
1539 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1540
1541 GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1542}
1543
1544// *********************************** NOTE ************************************
1545// For selecting load and store instructions, the addressing modes are defined
1546// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1547// patterns to match the load the store instructions.
1548//
1549// The TD definitions for the addressing modes correspond to their respective
1550// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1551// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1552// address mode flags of a particular node. Afterwards, the computed address
1553// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1554// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1555// accordingly, based on the preferred addressing mode.
1556//
1557// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1558// MemOpFlags contains all the possible flags that can be used to compute the
1559// optimal addressing mode for load and store instructions.
1560// AddrMode contains all the possible load and store addressing modes available
1561// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1562//
1563// When adding new load and store instructions, it is possible that new address
1564// flags may need to be added into MemOpFlags, and a new addressing mode will
1565// need to be added to AddrMode. An entry of the new addressing mode (consisting
1566// of the minimal and main distinguishing address flags for the new load/store
1567// instructions) will need to be added into initializeAddrModeMap() below.
1568// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1569// need to be updated to account for selecting the optimal addressing mode.
1570// *****************************************************************************
1571/// Initialize the map that relates the different addressing modes of the load
1572/// and store instructions to a set of flags. This ensures the load/store
1573/// instruction is correctly matched during instruction selection.
1574void PPCTargetLowering::initializeAddrModeMap() {
1575 AddrModesMap[PPC::AM_DForm] = {
1576 // LWZ, STW
1577 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1578 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1579 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1580 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1581 // LBZ, LHZ, STB, STH
1582 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1583 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1584 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1585 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1586 // LHA
1587 PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1588 PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1589 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1590 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1591 // LFS, LFD, STFS, STFD
1592 PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1593 PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1594 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1595 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1596 };
1597 AddrModesMap[PPC::AM_DSForm] = {
1598 // LWA
1599 PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1600 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1601 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1602 // LD, STD
1603 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1604 PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1605 PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1606 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1607 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1608 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1609 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1610 };
1611 AddrModesMap[PPC::AM_DQForm] = {
1612 // LXV, STXV
1613 PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1614 PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1615 PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1616 };
1617 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1618 PPC::MOF_SubtargetP10};
1619 // TODO: Add mapping for quadword load/store.
1620}
1621
1622/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1623/// the desired ByVal argument alignment.
1624static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1625 if (MaxAlign == MaxMaxAlign)
1626 return;
1627 if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
1628 if (MaxMaxAlign >= 32 &&
1629 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1630 MaxAlign = Align(32);
1631 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1632 MaxAlign < 16)
1633 MaxAlign = Align(16);
1634 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
1635 Align EltAlign;
1636 getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign, MaxMaxAlign);
1637 if (EltAlign > MaxAlign)
1638 MaxAlign = EltAlign;
1639 } else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
1640 for (auto *EltTy : STy->elements()) {
1641 Align EltAlign;
1642 getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign, MaxMaxAlign);
1643 if (EltAlign > MaxAlign)
1644 MaxAlign = EltAlign;
1645 if (MaxAlign == MaxMaxAlign)
1646 break;
1647 }
1648 }
1649}
1650
1651/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1652/// function arguments in the caller parameter area.
1653Align PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1654 const DataLayout &DL) const {
1655 // 16byte and wider vectors are passed on 16byte boundary.
1656 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1657 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1658 if (Subtarget.hasAltivec())
1659 getMaxByValAlign(Ty, MaxAlign&: Alignment, MaxMaxAlign: Align(16));
1660 return Alignment;
1661}
1662
1663bool PPCTargetLowering::useSoftFloat() const {
1664 return Subtarget.useSoftFloat();
1665}
1666
1667bool PPCTargetLowering::hasSPE() const {
1668 return Subtarget.hasSPE();
1669}
1670
1671bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1672 return VT.isScalarInteger();
1673}
1674
1675bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1676 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1677 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1678 return false;
1679
1680 if (auto *VTy = dyn_cast<VectorType>(Val: VectorTy)) {
1681 if (VTy->getScalarType()->isIntegerTy()) {
1682 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1683 if (ElemSizeInBits == 32) {
1684 Index = Subtarget.isLittleEndian() ? 2 : 1;
1685 return true;
1686 }
1687 if (ElemSizeInBits == 64) {
1688 Index = Subtarget.isLittleEndian() ? 1 : 0;
1689 return true;
1690 }
1691 }
1692 }
1693 return false;
1694}
1695
1696EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1697 EVT VT) const {
1698 if (!VT.isVector())
1699 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1700
1701 return VT.changeVectorElementTypeToInteger();
1702}
1703
1704bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1705 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1706 return true;
1707}
1708
1709//===----------------------------------------------------------------------===//
1710// Node matching predicates, for use by the tblgen matching code.
1711//===----------------------------------------------------------------------===//
1712
1713/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1714static bool isFloatingPointZero(SDValue Op) {
1715 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
1716 return CFP->getValueAPF().isZero();
1717 else if (ISD::isEXTLoad(N: Op.getNode()) || ISD::isNON_EXTLoad(N: Op.getNode())) {
1718 // Maybe this has already been legalized into the constant pool?
1719 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val: Op.getOperand(i: 1)))
1720 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: CP->getConstVal()))
1721 return CFP->getValueAPF().isZero();
1722 }
1723 return false;
1724}
1725
1726/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1727/// true if Op is undef or if it matches the specified value.
1728static bool isConstantOrUndef(int Op, int Val) {
1729 return Op < 0 || Op == Val;
1730}
1731
1732/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1733/// VPKUHUM instruction.
1734/// The ShuffleKind distinguishes between big-endian operations with
1735/// two different inputs (0), either-endian operations with two identical
1736/// inputs (1), and little-endian operations with two different inputs (2).
1737/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1738bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1739 SelectionDAG &DAG) {
1740 bool IsLE = DAG.getDataLayout().isLittleEndian();
1741 if (ShuffleKind == 0) {
1742 if (IsLE)
1743 return false;
1744 for (unsigned i = 0; i != 16; ++i)
1745 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2+1))
1746 return false;
1747 } else if (ShuffleKind == 2) {
1748 if (!IsLE)
1749 return false;
1750 for (unsigned i = 0; i != 16; ++i)
1751 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2))
1752 return false;
1753 } else if (ShuffleKind == 1) {
1754 unsigned j = IsLE ? 0 : 1;
1755 for (unsigned i = 0; i != 8; ++i)
1756 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2+j) ||
1757 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j))
1758 return false;
1759 }
1760 return true;
1761}
1762
1763/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1764/// VPKUWUM instruction.
1765/// The ShuffleKind distinguishes between big-endian operations with
1766/// two different inputs (0), either-endian operations with two identical
1767/// inputs (1), and little-endian operations with two different inputs (2).
1768/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1769bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1770 SelectionDAG &DAG) {
1771 bool IsLE = DAG.getDataLayout().isLittleEndian();
1772 if (ShuffleKind == 0) {
1773 if (IsLE)
1774 return false;
1775 for (unsigned i = 0; i != 16; i += 2)
1776 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+2) ||
1777 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+3))
1778 return false;
1779 } else if (ShuffleKind == 2) {
1780 if (!IsLE)
1781 return false;
1782 for (unsigned i = 0; i != 16; i += 2)
1783 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2) ||
1784 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+1))
1785 return false;
1786 } else if (ShuffleKind == 1) {
1787 unsigned j = IsLE ? 0 : 2;
1788 for (unsigned i = 0; i != 8; i += 2)
1789 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+j) ||
1790 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+j+1) ||
1791 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j) ||
1792 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+9), Val: i*2+j+1))
1793 return false;
1794 }
1795 return true;
1796}
1797
1798/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1799/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1800/// current subtarget.
1801///
1802/// The ShuffleKind distinguishes between big-endian operations with
1803/// two different inputs (0), either-endian operations with two identical
1804/// inputs (1), and little-endian operations with two different inputs (2).
1805/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1806bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1807 SelectionDAG &DAG) {
1808 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1809 if (!Subtarget.hasP8Vector())
1810 return false;
1811
1812 bool IsLE = DAG.getDataLayout().isLittleEndian();
1813 if (ShuffleKind == 0) {
1814 if (IsLE)
1815 return false;
1816 for (unsigned i = 0; i != 16; i += 4)
1817 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+4) ||
1818 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+5) ||
1819 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+6) ||
1820 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+7))
1821 return false;
1822 } else if (ShuffleKind == 2) {
1823 if (!IsLE)
1824 return false;
1825 for (unsigned i = 0; i != 16; i += 4)
1826 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2) ||
1827 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+1) ||
1828 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+2) ||
1829 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+3))
1830 return false;
1831 } else if (ShuffleKind == 1) {
1832 unsigned j = IsLE ? 0 : 4;
1833 for (unsigned i = 0; i != 8; i += 4)
1834 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+j) ||
1835 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+j+1) ||
1836 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+j+2) ||
1837 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+j+3) ||
1838 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j) ||
1839 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+9), Val: i*2+j+1) ||
1840 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+10), Val: i*2+j+2) ||
1841 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+11), Val: i*2+j+3))
1842 return false;
1843 }
1844 return true;
1845}
1846
1847/// isVMerge - Common function, used to match vmrg* shuffles.
1848///
1849static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1850 unsigned LHSStart, unsigned RHSStart) {
1851 if (N->getValueType(ResNo: 0) != MVT::v16i8)
1852 return false;
1853 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1854 "Unsupported merge size!");
1855
1856 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1857 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1858 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*UnitSize*2+j),
1859 Val: LHSStart+j+i*UnitSize) ||
1860 !isConstantOrUndef(Op: N->getMaskElt(Idx: i*UnitSize*2+UnitSize+j),
1861 Val: RHSStart+j+i*UnitSize))
1862 return false;
1863 }
1864 return true;
1865}
1866
1867/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1868/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1869/// The ShuffleKind distinguishes between big-endian merges with two
1870/// different inputs (0), either-endian merges with two identical inputs (1),
1871/// and little-endian merges with two different inputs (2). For the latter,
1872/// the input operands are swapped (see PPCInstrAltivec.td).
1873bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1874 unsigned ShuffleKind, SelectionDAG &DAG) {
1875 if (DAG.getDataLayout().isLittleEndian()) {
1876 if (ShuffleKind == 1) // unary
1877 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 0);
1878 else if (ShuffleKind == 2) // swapped
1879 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 16);
1880 else
1881 return false;
1882 } else {
1883 if (ShuffleKind == 1) // unary
1884 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 8);
1885 else if (ShuffleKind == 0) // normal
1886 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 24);
1887 else
1888 return false;
1889 }
1890}
1891
1892/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1893/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1894/// The ShuffleKind distinguishes between big-endian merges with two
1895/// different inputs (0), either-endian merges with two identical inputs (1),
1896/// and little-endian merges with two different inputs (2). For the latter,
1897/// the input operands are swapped (see PPCInstrAltivec.td).
1898bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1899 unsigned ShuffleKind, SelectionDAG &DAG) {
1900 if (DAG.getDataLayout().isLittleEndian()) {
1901 if (ShuffleKind == 1) // unary
1902 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 8);
1903 else if (ShuffleKind == 2) // swapped
1904 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 24);
1905 else
1906 return false;
1907 } else {
1908 if (ShuffleKind == 1) // unary
1909 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 0);
1910 else if (ShuffleKind == 0) // normal
1911 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 16);
1912 else
1913 return false;
1914 }
1915}
1916
1917/**
1918 * Common function used to match vmrgew and vmrgow shuffles
1919 *
1920 * The indexOffset determines whether to look for even or odd words in
1921 * the shuffle mask. This is based on the of the endianness of the target
1922 * machine.
1923 * - Little Endian:
1924 * - Use offset of 0 to check for odd elements
1925 * - Use offset of 4 to check for even elements
1926 * - Big Endian:
1927 * - Use offset of 0 to check for even elements
1928 * - Use offset of 4 to check for odd elements
1929 * A detailed description of the vector element ordering for little endian and
1930 * big endian can be found at
1931 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1932 * Targeting your applications - what little endian and big endian IBM XL C/C++
1933 * compiler differences mean to you
1934 *
1935 * The mask to the shuffle vector instruction specifies the indices of the
1936 * elements from the two input vectors to place in the result. The elements are
1937 * numbered in array-access order, starting with the first vector. These vectors
1938 * are always of type v16i8, thus each vector will contain 16 elements of size
1939 * 8. More info on the shuffle vector can be found in the
1940 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1941 * Language Reference.
1942 *
1943 * The RHSStartValue indicates whether the same input vectors are used (unary)
1944 * or two different input vectors are used, based on the following:
1945 * - If the instruction uses the same vector for both inputs, the range of the
1946 * indices will be 0 to 15. In this case, the RHSStart value passed should
1947 * be 0.
1948 * - If the instruction has two different vectors then the range of the
1949 * indices will be 0 to 31. In this case, the RHSStart value passed should
1950 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1951 * to 31 specify elements in the second vector).
1952 *
1953 * \param[in] N The shuffle vector SD Node to analyze
1954 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1955 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1956 * vector to the shuffle_vector instruction
1957 * \return true iff this shuffle vector represents an even or odd word merge
1958 */
1959static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1960 unsigned RHSStartValue) {
1961 if (N->getValueType(ResNo: 0) != MVT::v16i8)
1962 return false;
1963
1964 for (unsigned i = 0; i < 2; ++i)
1965 for (unsigned j = 0; j < 4; ++j)
1966 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*4+j),
1967 Val: i*RHSStartValue+j+IndexOffset) ||
1968 !isConstantOrUndef(Op: N->getMaskElt(Idx: i*4+j+8),
1969 Val: i*RHSStartValue+j+IndexOffset+8))
1970 return false;
1971 return true;
1972}
1973
1974/**
1975 * Determine if the specified shuffle mask is suitable for the vmrgew or
1976 * vmrgow instructions.
1977 *
1978 * \param[in] N The shuffle vector SD Node to analyze
1979 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1980 * \param[in] ShuffleKind Identify the type of merge:
1981 * - 0 = big-endian merge with two different inputs;
1982 * - 1 = either-endian merge with two identical inputs;
1983 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1984 * little-endian merges).
1985 * \param[in] DAG The current SelectionDAG
1986 * \return true iff this shuffle mask
1987 */
1988bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1989 unsigned ShuffleKind, SelectionDAG &DAG) {
1990 if (DAG.getDataLayout().isLittleEndian()) {
1991 unsigned indexOffset = CheckEven ? 4 : 0;
1992 if (ShuffleKind == 1) // Unary
1993 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 0);
1994 else if (ShuffleKind == 2) // swapped
1995 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 16);
1996 else
1997 return false;
1998 }
1999 else {
2000 unsigned indexOffset = CheckEven ? 0 : 4;
2001 if (ShuffleKind == 1) // Unary
2002 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 0);
2003 else if (ShuffleKind == 0) // Normal
2004 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 16);
2005 else
2006 return false;
2007 }
2008 return false;
2009}
2010
2011/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2012/// amount, otherwise return -1.
2013/// The ShuffleKind distinguishes between big-endian operations with two
2014/// different inputs (0), either-endian operations with two identical inputs
2015/// (1), and little-endian operations with two different inputs (2). For the
2016/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2017int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2018 SelectionDAG &DAG) {
2019 if (N->getValueType(ResNo: 0) != MVT::v16i8)
2020 return -1;
2021
2022 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2023
2024 // Find the first non-undef value in the shuffle mask.
2025 unsigned i;
2026 for (i = 0; i != 16 && SVOp->getMaskElt(Idx: i) < 0; ++i)
2027 /*search*/;
2028
2029 if (i == 16) return -1; // all undef.
2030
2031 // Otherwise, check to see if the rest of the elements are consecutively
2032 // numbered from this value.
2033 unsigned ShiftAmt = SVOp->getMaskElt(Idx: i);
2034 if (ShiftAmt < i) return -1;
2035
2036 ShiftAmt -= i;
2037 bool isLE = DAG.getDataLayout().isLittleEndian();
2038
2039 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2040 // Check the rest of the elements to see if they are consecutive.
2041 for (++i; i != 16; ++i)
2042 if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: ShiftAmt+i))
2043 return -1;
2044 } else if (ShuffleKind == 1) {
2045 // Check the rest of the elements to see if they are consecutive.
2046 for (++i; i != 16; ++i)
2047 if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: (ShiftAmt+i) & 15))
2048 return -1;
2049 } else
2050 return -1;
2051
2052 if (isLE)
2053 ShiftAmt = 16 - ShiftAmt;
2054
2055 return ShiftAmt;
2056}
2057
2058/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2059/// specifies a splat of a single element that is suitable for input to
2060/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2061bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2062 EVT VT = N->getValueType(ResNo: 0);
2063 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2064 return EltSize == 8 && N->getMaskElt(Idx: 0) == N->getMaskElt(Idx: 1);
2065
2066 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2067 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2068
2069 // The consecutive indices need to specify an element, not part of two
2070 // different elements. So abandon ship early if this isn't the case.
2071 if (N->getMaskElt(Idx: 0) % EltSize != 0)
2072 return false;
2073
2074 // This is a splat operation if each element of the permute is the same, and
2075 // if the value doesn't reference the second vector.
2076 unsigned ElementBase = N->getMaskElt(Idx: 0);
2077
2078 // FIXME: Handle UNDEF elements too!
2079 if (ElementBase >= 16)
2080 return false;
2081
2082 // Check that the indices are consecutive, in the case of a multi-byte element
2083 // splatted with a v16i8 mask.
2084 for (unsigned i = 1; i != EltSize; ++i)
2085 if (N->getMaskElt(Idx: i) < 0 || N->getMaskElt(Idx: i) != (int)(i+ElementBase))
2086 return false;
2087
2088 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2089 // An UNDEF element is a sequence of UNDEF bytes.
2090 if (N->getMaskElt(Idx: i) < 0) {
2091 for (unsigned j = 1; j != EltSize; ++j)
2092 if (N->getMaskElt(Idx: i + j) >= 0)
2093 return false;
2094 } else
2095 for (unsigned j = 0; j != EltSize; ++j)
2096 if (N->getMaskElt(Idx: i + j) != N->getMaskElt(Idx: j))
2097 return false;
2098 }
2099 return true;
2100}
2101
2102/// Check that the mask is shuffling N byte elements. Within each N byte
2103/// element of the mask, the indices could be either in increasing or
2104/// decreasing order as long as they are consecutive.
2105/// \param[in] N the shuffle vector SD Node to analyze
2106/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2107/// Word/DoubleWord/QuadWord).
2108/// \param[in] StepLen the delta indices number among the N byte element, if
2109/// the mask is in increasing/decreasing order then it is 1/-1.
2110/// \return true iff the mask is shuffling N byte elements.
2111static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2112 int StepLen) {
2113 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2114 "Unexpected element width.");
2115 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2116
2117 unsigned NumOfElem = 16 / Width;
2118 unsigned MaskVal[16]; // Width is never greater than 16
2119 for (unsigned i = 0; i < NumOfElem; ++i) {
2120 MaskVal[0] = N->getMaskElt(Idx: i * Width);
2121 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2122 return false;
2123 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2124 return false;
2125 }
2126
2127 for (unsigned int j = 1; j < Width; ++j) {
2128 MaskVal[j] = N->getMaskElt(Idx: i * Width + j);
2129 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2130 return false;
2131 }
2132 }
2133 }
2134
2135 return true;
2136}
2137
2138bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2139 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2140 if (!isNByteElemShuffleMask(N, Width: 4, StepLen: 1))
2141 return false;
2142
2143 // Now we look at mask elements 0,4,8,12
2144 unsigned M0 = N->getMaskElt(Idx: 0) / 4;
2145 unsigned M1 = N->getMaskElt(Idx: 4) / 4;
2146 unsigned M2 = N->getMaskElt(Idx: 8) / 4;
2147 unsigned M3 = N->getMaskElt(Idx: 12) / 4;
2148 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2149 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2150
2151 // Below, let H and L be arbitrary elements of the shuffle mask
2152 // where H is in the range [4,7] and L is in the range [0,3].
2153 // H, 1, 2, 3 or L, 5, 6, 7
2154 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2155 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2156 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2157 InsertAtByte = IsLE ? 12 : 0;
2158 Swap = M0 < 4;
2159 return true;
2160 }
2161 // 0, H, 2, 3 or 4, L, 6, 7
2162 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2163 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2164 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2165 InsertAtByte = IsLE ? 8 : 4;
2166 Swap = M1 < 4;
2167 return true;
2168 }
2169 // 0, 1, H, 3 or 4, 5, L, 7
2170 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2171 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2172 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2173 InsertAtByte = IsLE ? 4 : 8;
2174 Swap = M2 < 4;
2175 return true;
2176 }
2177 // 0, 1, 2, H or 4, 5, 6, L
2178 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2179 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2180 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2181 InsertAtByte = IsLE ? 0 : 12;
2182 Swap = M3 < 4;
2183 return true;
2184 }
2185
2186 // If both vector operands for the shuffle are the same vector, the mask will
2187 // contain only elements from the first one and the second one will be undef.
2188 if (N->getOperand(Num: 1).isUndef()) {
2189 ShiftElts = 0;
2190 Swap = true;
2191 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2192 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2193 InsertAtByte = IsLE ? 12 : 0;
2194 return true;
2195 }
2196 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2197 InsertAtByte = IsLE ? 8 : 4;
2198 return true;
2199 }
2200 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2201 InsertAtByte = IsLE ? 4 : 8;
2202 return true;
2203 }
2204 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2205 InsertAtByte = IsLE ? 0 : 12;
2206 return true;
2207 }
2208 }
2209
2210 return false;
2211}
2212
2213bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2214 bool &Swap, bool IsLE) {
2215 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2216 // Ensure each byte index of the word is consecutive.
2217 if (!isNByteElemShuffleMask(N, Width: 4, StepLen: 1))
2218 return false;
2219
2220 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2221 unsigned M0 = N->getMaskElt(Idx: 0) / 4;
2222 unsigned M1 = N->getMaskElt(Idx: 4) / 4;
2223 unsigned M2 = N->getMaskElt(Idx: 8) / 4;
2224 unsigned M3 = N->getMaskElt(Idx: 12) / 4;
2225
2226 // If both vector operands for the shuffle are the same vector, the mask will
2227 // contain only elements from the first one and the second one will be undef.
2228 if (N->getOperand(Num: 1).isUndef()) {
2229 assert(M0 < 4 && "Indexing into an undef vector?");
2230 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2231 return false;
2232
2233 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2234 Swap = false;
2235 return true;
2236 }
2237
2238 // Ensure each word index of the ShuffleVector Mask is consecutive.
2239 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2240 return false;
2241
2242 if (IsLE) {
2243 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2244 // Input vectors don't need to be swapped if the leading element
2245 // of the result is one of the 3 left elements of the second vector
2246 // (or if there is no shift to be done at all).
2247 Swap = false;
2248 ShiftElts = (8 - M0) % 8;
2249 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2250 // Input vectors need to be swapped if the leading element
2251 // of the result is one of the 3 left elements of the first vector
2252 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2253 Swap = true;
2254 ShiftElts = (4 - M0) % 4;
2255 }
2256
2257 return true;
2258 } else { // BE
2259 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2260 // Input vectors don't need to be swapped if the leading element
2261 // of the result is one of the 4 elements of the first vector.
2262 Swap = false;
2263 ShiftElts = M0;
2264 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2265 // Input vectors need to be swapped if the leading element
2266 // of the result is one of the 4 elements of the right vector.
2267 Swap = true;
2268 ShiftElts = M0 - 4;
2269 }
2270
2271 return true;
2272 }
2273}
2274
2275bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2276 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2277
2278 if (!isNByteElemShuffleMask(N, Width, StepLen: -1))
2279 return false;
2280
2281 for (int i = 0; i < 16; i += Width)
2282 if (N->getMaskElt(Idx: i) != i + Width - 1)
2283 return false;
2284
2285 return true;
2286}
2287
2288bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2289 return isXXBRShuffleMaskHelper(N, Width: 2);
2290}
2291
2292bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2293 return isXXBRShuffleMaskHelper(N, Width: 4);
2294}
2295
2296bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2297 return isXXBRShuffleMaskHelper(N, Width: 8);
2298}
2299
2300bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2301 return isXXBRShuffleMaskHelper(N, Width: 16);
2302}
2303
2304/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2305/// if the inputs to the instruction should be swapped and set \p DM to the
2306/// value for the immediate.
2307/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2308/// AND element 0 of the result comes from the first input (LE) or second input
2309/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2310/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2311/// mask.
2312bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2313 bool &Swap, bool IsLE) {
2314 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2315
2316 // Ensure each byte index of the double word is consecutive.
2317 if (!isNByteElemShuffleMask(N, Width: 8, StepLen: 1))
2318 return false;
2319
2320 unsigned M0 = N->getMaskElt(Idx: 0) / 8;
2321 unsigned M1 = N->getMaskElt(Idx: 8) / 8;
2322 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2323
2324 // If both vector operands for the shuffle are the same vector, the mask will
2325 // contain only elements from the first one and the second one will be undef.
2326 if (N->getOperand(Num: 1).isUndef()) {
2327 if ((M0 | M1) < 2) {
2328 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2329 Swap = false;
2330 return true;
2331 } else
2332 return false;
2333 }
2334
2335 if (IsLE) {
2336 if (M0 > 1 && M1 < 2) {
2337 Swap = false;
2338 } else if (M0 < 2 && M1 > 1) {
2339 M0 = (M0 + 2) % 4;
2340 M1 = (M1 + 2) % 4;
2341 Swap = true;
2342 } else
2343 return false;
2344
2345 // Note: if control flow comes here that means Swap is already set above
2346 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2347 return true;
2348 } else { // BE
2349 if (M0 < 2 && M1 > 1) {
2350 Swap = false;
2351 } else if (M0 > 1 && M1 < 2) {
2352 M0 = (M0 + 2) % 4;
2353 M1 = (M1 + 2) % 4;
2354 Swap = true;
2355 } else
2356 return false;
2357
2358 // Note: if control flow comes here that means Swap is already set above
2359 DM = (M0 << 1) + (M1 & 1);
2360 return true;
2361 }
2362}
2363
2364
2365/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2366/// appropriate for PPC mnemonics (which have a big endian bias - namely
2367/// elements are counted from the left of the vector register).
2368unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2369 SelectionDAG &DAG) {
2370 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2371 assert(isSplatShuffleMask(SVOp, EltSize));
2372 EVT VT = SVOp->getValueType(ResNo: 0);
2373
2374 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2375 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(Idx: 0)
2376 : SVOp->getMaskElt(Idx: 0);
2377
2378 if (DAG.getDataLayout().isLittleEndian())
2379 return (16 / EltSize) - 1 - (SVOp->getMaskElt(Idx: 0) / EltSize);
2380 else
2381 return SVOp->getMaskElt(Idx: 0) / EltSize;
2382}
2383
2384/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2385/// by using a vspltis[bhw] instruction of the specified element size, return
2386/// the constant being splatted. The ByteSize field indicates the number of
2387/// bytes of each element [124] -> [bhw].
2388SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2389 SDValue OpVal;
2390
2391 // If ByteSize of the splat is bigger than the element size of the
2392 // build_vector, then we have a case where we are checking for a splat where
2393 // multiple elements of the buildvector are folded together into a single
2394 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2395 unsigned EltSize = 16/N->getNumOperands();
2396 if (EltSize < ByteSize) {
2397 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2398 SDValue UniquedVals[4];
2399 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2400
2401 // See if all of the elements in the buildvector agree across.
2402 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2403 if (N->getOperand(Num: i).isUndef()) continue;
2404 // If the element isn't a constant, bail fully out.
2405 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: i))) return SDValue();
2406
2407 if (!UniquedVals[i&(Multiple-1)].getNode())
2408 UniquedVals[i&(Multiple-1)] = N->getOperand(Num: i);
2409 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(Num: i))
2410 return SDValue(); // no match.
2411 }
2412
2413 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2414 // either constant or undef values that are identical for each chunk. See
2415 // if these chunks can form into a larger vspltis*.
2416
2417 // Check to see if all of the leading entries are either 0 or -1. If
2418 // neither, then this won't fit into the immediate field.
2419 bool LeadingZero = true;
2420 bool LeadingOnes = true;
2421 for (unsigned i = 0; i != Multiple-1; ++i) {
2422 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2423
2424 LeadingZero &= isNullConstant(V: UniquedVals[i]);
2425 LeadingOnes &= isAllOnesConstant(V: UniquedVals[i]);
2426 }
2427 // Finally, check the least significant entry.
2428 if (LeadingZero) {
2429 if (!UniquedVals[Multiple-1].getNode())
2430 return DAG.getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32); // 0,0,0,undef
2431 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2432 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2433 return DAG.getTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
2434 }
2435 if (LeadingOnes) {
2436 if (!UniquedVals[Multiple-1].getNode())
2437 return DAG.getTargetConstant(Val: ~0U, DL: SDLoc(N), VT: MVT::i32); // -1,-1,-1,undef
2438 int Val =cast<ConstantSDNode>(Val&: UniquedVals[Multiple-1])->getSExtValue();
2439 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2440 return DAG.getTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
2441 }
2442
2443 return SDValue();
2444 }
2445
2446 // Check to see if this buildvec has a single non-undef value in its elements.
2447 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2448 if (N->getOperand(Num: i).isUndef()) continue;
2449 if (!OpVal.getNode())
2450 OpVal = N->getOperand(Num: i);
2451 else if (OpVal != N->getOperand(Num: i))
2452 return SDValue();
2453 }
2454
2455 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2456
2457 unsigned ValSizeInBytes = EltSize;
2458 uint64_t Value = 0;
2459 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) {
2460 Value = CN->getZExtValue();
2461 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) {
2462 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2463 Value = llvm::bit_cast<uint32_t>(from: CN->getValueAPF().convertToFloat());
2464 }
2465
2466 // If the splat value is larger than the element value, then we can never do
2467 // this splat. The only case that we could fit the replicated bits into our
2468 // immediate field for would be zero, and we prefer to use vxor for it.
2469 if (ValSizeInBytes < ByteSize) return SDValue();
2470
2471 // If the element value is larger than the splat value, check if it consists
2472 // of a repeated bit pattern of size ByteSize.
2473 if (!APInt(ValSizeInBytes * 8, Value).isSplat(SplatSizeInBits: ByteSize * 8))
2474 return SDValue();
2475
2476 // Properly sign extend the value.
2477 int MaskVal = SignExtend32(X: Value, B: ByteSize * 8);
2478
2479 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2480 if (MaskVal == 0) return SDValue();
2481
2482 // Finally, if this value fits in a 5 bit sext field, return it
2483 if (SignExtend32<5>(X: MaskVal) == MaskVal)
2484 return DAG.getSignedTargetConstant(Val: MaskVal, DL: SDLoc(N), VT: MVT::i32);
2485 return SDValue();
2486}
2487
2488//===----------------------------------------------------------------------===//
2489// Addressing Mode Selection
2490//===----------------------------------------------------------------------===//
2491
2492/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2493/// or 64-bit immediate, and if the value can be accurately represented as a
2494/// sign extension from a 16-bit value. If so, this returns true and the
2495/// immediate.
2496bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2497 if (!isa<ConstantSDNode>(Val: N))
2498 return false;
2499
2500 Imm = (int16_t)N->getAsZExtVal();
2501 if (N->getValueType(ResNo: 0) == MVT::i32)
2502 return Imm == (int32_t)N->getAsZExtVal();
2503 else
2504 return Imm == (int64_t)N->getAsZExtVal();
2505}
2506bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2507 return isIntS16Immediate(N: Op.getNode(), Imm);
2508}
2509
2510/// Used when computing address flags for selecting loads and stores.
2511/// If we have an OR, check if the LHS and RHS are provably disjoint.
2512/// An OR of two provably disjoint values is equivalent to an ADD.
2513/// Most PPC load/store instructions compute the effective address as a sum,
2514/// so doing this conversion is useful.
2515static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2516 if (N.getOpcode() != ISD::OR)
2517 return false;
2518 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2519 if (!LHSKnown.Zero.getBoolValue())
2520 return false;
2521 KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 1));
2522 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2523}
2524
2525/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2526/// be represented as an indexed [r+r] operation.
2527bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2528 SDValue &Index,
2529 SelectionDAG &DAG) const {
2530 for (SDNode *U : N->users()) {
2531 if (MemSDNode *Memop = dyn_cast<MemSDNode>(Val: U)) {
2532 if (Memop->getMemoryVT() == MVT::f64) {
2533 Base = N.getOperand(i: 0);
2534 Index = N.getOperand(i: 1);
2535 return true;
2536 }
2537 }
2538 }
2539 return false;
2540}
2541
2542/// isIntS34Immediate - This method tests if value of node given can be
2543/// accurately represented as a sign extension from a 34-bit value. If so,
2544/// this returns true and the immediate.
2545bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2546 if (!isa<ConstantSDNode>(Val: N))
2547 return false;
2548
2549 Imm = cast<ConstantSDNode>(Val: N)->getSExtValue();
2550 return isInt<34>(x: Imm);
2551}
2552bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2553 return isIntS34Immediate(N: Op.getNode(), Imm);
2554}
2555
2556/// SelectAddressRegReg - Given the specified addressed, check to see if it
2557/// can be represented as an indexed [r+r] operation. Returns false if it
2558/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2559/// non-zero and N can be represented by a base register plus a signed 16-bit
2560/// displacement, make a more precise judgement by checking (displacement % \p
2561/// EncodingAlignment).
2562bool PPCTargetLowering::SelectAddressRegReg(
2563 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2564 MaybeAlign EncodingAlignment) const {
2565 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2566 // a [pc+imm].
2567 if (SelectAddressPCRel(N, Base))
2568 return false;
2569
2570 int16_t Imm = 0;
2571 if (N.getOpcode() == ISD::ADD) {
2572 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2573 // SPE load/store can only handle 8-bit offsets.
2574 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2575 return true;
2576 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm) &&
2577 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2578 return false; // r+i
2579 if (N.getOperand(i: 1).getOpcode() == PPCISD::Lo)
2580 return false; // r+i
2581
2582 Base = N.getOperand(i: 0);
2583 Index = N.getOperand(i: 1);
2584 return true;
2585 } else if (N.getOpcode() == ISD::OR) {
2586 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm) &&
2587 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2588 return false; // r+i can fold it if we can.
2589
2590 // If this is an or of disjoint bitfields, we can codegen this as an add
2591 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2592 // disjoint.
2593 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2594
2595 if (LHSKnown.Zero.getBoolValue()) {
2596 KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 1));
2597 // If all of the bits are known zero on the LHS or RHS, the add won't
2598 // carry.
2599 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2600 Base = N.getOperand(i: 0);
2601 Index = N.getOperand(i: 1);
2602 return true;
2603 }
2604 }
2605 }
2606
2607 return false;
2608}
2609
2610// If we happen to be doing an i64 load or store into a stack slot that has
2611// less than a 4-byte alignment, then the frame-index elimination may need to
2612// use an indexed load or store instruction (because the offset may not be a
2613// multiple of 4). The extra register needed to hold the offset comes from the
2614// register scavenger, and it is possible that the scavenger will need to use
2615// an emergency spill slot. As a result, we need to make sure that a spill slot
2616// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2617// stack slot.
2618static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2619 // FIXME: This does not handle the LWA case.
2620 if (VT != MVT::i64)
2621 return;
2622
2623 // NOTE: We'll exclude negative FIs here, which come from argument
2624 // lowering, because there are no known test cases triggering this problem
2625 // using packed structures (or similar). We can remove this exclusion if
2626 // we find such a test case. The reason why this is so test-case driven is
2627 // because this entire 'fixup' is only to prevent crashes (from the
2628 // register scavenger) on not-really-valid inputs. For example, if we have:
2629 // %a = alloca i1
2630 // %b = bitcast i1* %a to i64*
2631 // store i64* a, i64 b
2632 // then the store should really be marked as 'align 1', but is not. If it
2633 // were marked as 'align 1' then the indexed form would have been
2634 // instruction-selected initially, and the problem this 'fixup' is preventing
2635 // won't happen regardless.
2636 if (FrameIdx < 0)
2637 return;
2638
2639 MachineFunction &MF = DAG.getMachineFunction();
2640 MachineFrameInfo &MFI = MF.getFrameInfo();
2641
2642 if (MFI.getObjectAlign(ObjectIdx: FrameIdx) >= Align(4))
2643 return;
2644
2645 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2646 FuncInfo->setHasNonRISpills();
2647}
2648
2649/// Returns true if the address N can be represented by a base register plus
2650/// a signed 16-bit displacement [r+imm], and if it is not better
2651/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2652/// displacements that are multiples of that value.
2653bool PPCTargetLowering::SelectAddressRegImm(
2654 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2655 MaybeAlign EncodingAlignment) const {
2656 // FIXME dl should come from parent load or store, not from address
2657 SDLoc dl(N);
2658
2659 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2660 // a [pc+imm].
2661 if (SelectAddressPCRel(N, Base))
2662 return false;
2663
2664 // If this can be more profitably realized as r+r, fail.
2665 if (SelectAddressRegReg(N, Base&: Disp, Index&: Base, DAG, EncodingAlignment))
2666 return false;
2667
2668 if (N.getOpcode() == ISD::ADD) {
2669 int16_t imm = 0;
2670 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) &&
2671 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2672 Disp = DAG.getSignedTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2673 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0))) {
2674 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2675 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2676 } else {
2677 Base = N.getOperand(i: 0);
2678 }
2679 return true; // [r+i]
2680 } else if (N.getOperand(i: 1).getOpcode() == PPCISD::Lo) {
2681 // Match LOAD (ADD (X, Lo(G))).
2682 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2683 "Cannot handle constant offsets yet!");
2684 Disp = N.getOperand(i: 1).getOperand(i: 0); // The global address.
2685 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2686 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2687 Disp.getOpcode() == ISD::TargetConstantPool ||
2688 Disp.getOpcode() == ISD::TargetJumpTable);
2689 Base = N.getOperand(i: 0);
2690 return true; // [&g+r]
2691 }
2692 } else if (N.getOpcode() == ISD::OR) {
2693 int16_t imm = 0;
2694 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) &&
2695 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2696 // If this is an or of disjoint bitfields, we can codegen this as an add
2697 // (for better address arithmetic) if the LHS and RHS of the OR are
2698 // provably disjoint.
2699 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2700
2701 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2702 // If all of the bits are known zero on the LHS or RHS, the add won't
2703 // carry.
2704 if (FrameIndexSDNode *FI =
2705 dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0))) {
2706 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2707 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2708 } else {
2709 Base = N.getOperand(i: 0);
2710 }
2711 Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2712 return true;
2713 }
2714 }
2715 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
2716 // Loading from a constant address.
2717
2718 // If this address fits entirely in a 16-bit sext immediate field, codegen
2719 // this as "d, 0"
2720 int16_t Imm;
2721 if (isIntS16Immediate(N: CN, Imm) &&
2722 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm))) {
2723 Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: CN->getValueType(ResNo: 0));
2724 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2725 VT: CN->getValueType(ResNo: 0));
2726 return true;
2727 }
2728
2729 // Handle 32-bit sext immediates with LIS + addr mode.
2730 if ((CN->getValueType(ResNo: 0) == MVT::i32 ||
2731 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2732 (!EncodingAlignment ||
2733 isAligned(Lhs: *EncodingAlignment, SizeInBytes: CN->getZExtValue()))) {
2734 int Addr = (int)CN->getZExtValue();
2735
2736 // Otherwise, break this down into an LIS + disp.
2737 Disp = DAG.getTargetConstant(Val: (short)Addr, DL: dl, VT: MVT::i32);
2738
2739 Base = DAG.getTargetConstant(Val: (Addr - (signed short)Addr) >> 16, DL: dl,
2740 VT: MVT::i32);
2741 unsigned Opc = CN->getValueType(ResNo: 0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2742 Base = SDValue(DAG.getMachineNode(Opcode: Opc, dl, VT: CN->getValueType(ResNo: 0), Op1: Base), 0);
2743 return true;
2744 }
2745 }
2746
2747 Disp = DAG.getTargetConstant(Val: 0, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()));
2748 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2749 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2750 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2751 } else
2752 Base = N;
2753 return true; // [r+0]
2754}
2755
2756/// Similar to the 16-bit case but for instructions that take a 34-bit
2757/// displacement field (prefixed loads/stores).
2758bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2759 SDValue &Base,
2760 SelectionDAG &DAG) const {
2761 // Only on 64-bit targets.
2762 if (N.getValueType() != MVT::i64)
2763 return false;
2764
2765 SDLoc dl(N);
2766 int64_t Imm = 0;
2767
2768 if (N.getOpcode() == ISD::ADD) {
2769 if (!isIntS34Immediate(Op: N.getOperand(i: 1), Imm))
2770 return false;
2771 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2772 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
2773 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2774 else
2775 Base = N.getOperand(i: 0);
2776 return true;
2777 }
2778
2779 if (N.getOpcode() == ISD::OR) {
2780 if (!isIntS34Immediate(Op: N.getOperand(i: 1), Imm))
2781 return false;
2782 // If this is an or of disjoint bitfields, we can codegen this as an add
2783 // (for better address arithmetic) if the LHS and RHS of the OR are
2784 // provably disjoint.
2785 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2786 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2787 return false;
2788 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
2789 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2790 else
2791 Base = N.getOperand(i: 0);
2792 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2793 return true;
2794 }
2795
2796 if (isIntS34Immediate(Op: N, Imm)) { // If the address is a 34-bit const.
2797 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2798 Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
2799 return true;
2800 }
2801
2802 return false;
2803}
2804
2805/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2806/// represented as an indexed [r+r] operation.
2807bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2808 SDValue &Index,
2809 SelectionDAG &DAG) const {
2810 // Check to see if we can easily represent this as an [r+r] address. This
2811 // will fail if it thinks that the address is more profitably represented as
2812 // reg+imm, e.g. where imm = 0.
2813 if (SelectAddressRegReg(N, Base, Index, DAG))
2814 return true;
2815
2816 // If the address is the result of an add, we will utilize the fact that the
2817 // address calculation includes an implicit add. However, we can reduce
2818 // register pressure if we do not materialize a constant just for use as the
2819 // index register. We only get rid of the add if it is not an add of a
2820 // value and a 16-bit signed constant and both have a single use.
2821 int16_t imm = 0;
2822 if (N.getOpcode() == ISD::ADD &&
2823 (!isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) ||
2824 !N.getOperand(i: 1).hasOneUse() || !N.getOperand(i: 0).hasOneUse())) {
2825 Base = N.getOperand(i: 0);
2826 Index = N.getOperand(i: 1);
2827 return true;
2828 }
2829
2830 // Otherwise, do it the hard way, using R0 as the base register.
2831 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2832 VT: N.getValueType());
2833 Index = N;
2834 return true;
2835}
2836
2837template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2838 Ty *PCRelCand = dyn_cast<Ty>(N);
2839 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(TF: PCRelCand->getTargetFlags()));
2840}
2841
2842/// Returns true if this address is a PC Relative address.
2843/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2844/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2845bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2846 // This is a materialize PC Relative node. Always select this as PC Relative.
2847 Base = N;
2848 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2849 return true;
2850 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2851 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2852 isValidPCRelNode<JumpTableSDNode>(N) ||
2853 isValidPCRelNode<BlockAddressSDNode>(N))
2854 return true;
2855 return false;
2856}
2857
2858/// Returns true if we should use a direct load into vector instruction
2859/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2860static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2861
2862 // If there are any other uses other than scalar to vector, then we should
2863 // keep it as a scalar load -> direct move pattern to prevent multiple
2864 // loads.
2865 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N);
2866 if (!LD)
2867 return false;
2868
2869 EVT MemVT = LD->getMemoryVT();
2870 if (!MemVT.isSimple())
2871 return false;
2872 switch(MemVT.getSimpleVT().SimpleTy) {
2873 case MVT::i64:
2874 break;
2875 case MVT::i32:
2876 if (!ST.hasP8Vector())
2877 return false;
2878 break;
2879 case MVT::i16:
2880 case MVT::i8:
2881 if (!ST.hasP9Vector())
2882 return false;
2883 break;
2884 default:
2885 return false;
2886 }
2887
2888 SDValue LoadedVal(N, 0);
2889 if (!LoadedVal.hasOneUse())
2890 return false;
2891
2892 for (SDUse &Use : LD->uses())
2893 if (Use.getResNo() == 0 &&
2894 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2895 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2896 return false;
2897
2898 return true;
2899}
2900
2901/// getPreIndexedAddressParts - returns true by value, base pointer and
2902/// offset pointer and addressing mode by reference if the node's address
2903/// can be legally represented as pre-indexed load / store address.
2904bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2905 SDValue &Offset,
2906 ISD::MemIndexedMode &AM,
2907 SelectionDAG &DAG) const {
2908 if (DisablePPCPreinc) return false;
2909
2910 bool isLoad = true;
2911 SDValue Ptr;
2912 EVT VT;
2913 Align Alignment;
2914 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
2915 Ptr = LD->getBasePtr();
2916 VT = LD->getMemoryVT();
2917 Alignment = LD->getAlign();
2918 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
2919 Ptr = ST->getBasePtr();
2920 VT = ST->getMemoryVT();
2921 Alignment = ST->getAlign();
2922 isLoad = false;
2923 } else
2924 return false;
2925
2926 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2927 // instructions because we can fold these into a more efficient instruction
2928 // instead, (such as LXSD).
2929 if (isLoad && usePartialVectorLoads(N, ST: Subtarget)) {
2930 return false;
2931 }
2932
2933 // PowerPC doesn't have preinc load/store instructions for vectors
2934 if (VT.isVector())
2935 return false;
2936
2937 if (SelectAddressRegReg(N: Ptr, Base, Index&: Offset, DAG)) {
2938 // Common code will reject creating a pre-inc form if the base pointer
2939 // is a frame index, or if N is a store and the base pointer is either
2940 // the same as or a predecessor of the value being stored. Check for
2941 // those situations here, and try with swapped Base/Offset instead.
2942 bool Swap = false;
2943
2944 if (isa<FrameIndexSDNode>(Val: Base) || isa<RegisterSDNode>(Val: Base))
2945 Swap = true;
2946 else if (!isLoad) {
2947 SDValue Val = cast<StoreSDNode>(Val: N)->getValue();
2948 if (Val == Base || Base.getNode()->isPredecessorOf(N: Val.getNode()))
2949 Swap = true;
2950 }
2951
2952 if (Swap)
2953 std::swap(a&: Base, b&: Offset);
2954
2955 AM = ISD::PRE_INC;
2956 return true;
2957 }
2958
2959 // LDU/STU can only handle immediates that are a multiple of 4.
2960 if (VT != MVT::i64) {
2961 if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: std::nullopt))
2962 return false;
2963 } else {
2964 // LDU/STU need an address with at least 4-byte alignment.
2965 if (Alignment < Align(4))
2966 return false;
2967
2968 if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: Align(4)))
2969 return false;
2970 }
2971
2972 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
2973 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2974 // sext i32 to i64 when addr mode is r+i.
2975 if (LD->getValueType(ResNo: 0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2976 LD->getExtensionType() == ISD::SEXTLOAD &&
2977 isa<ConstantSDNode>(Val: Offset))
2978 return false;
2979 }
2980
2981 AM = ISD::PRE_INC;
2982 return true;
2983}
2984
2985//===----------------------------------------------------------------------===//
2986// LowerOperation implementation
2987//===----------------------------------------------------------------------===//
2988
2989/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2990/// and LoOpFlags to the target MO flags.
2991static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2992 unsigned &HiOpFlags, unsigned &LoOpFlags,
2993 const GlobalValue *GV = nullptr) {
2994 HiOpFlags = PPCII::MO_HA;
2995 LoOpFlags = PPCII::MO_LO;
2996
2997 // Don't use the pic base if not in PIC relocation model.
2998 if (IsPIC) {
2999 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3000 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3001 }
3002}
3003
3004static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3005 SelectionDAG &DAG) {
3006 SDLoc DL(HiPart);
3007 EVT PtrVT = HiPart.getValueType();
3008 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: PtrVT);
3009
3010 SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL, VT: PtrVT, N1: HiPart, N2: Zero);
3011 SDValue Lo = DAG.getNode(Opcode: PPCISD::Lo, DL, VT: PtrVT, N1: LoPart, N2: Zero);
3012
3013 // With PIC, the first instruction is actually "GR+hi(&G)".
3014 if (isPIC)
3015 Hi = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
3016 N1: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL, VT: PtrVT), N2: Hi);
3017
3018 // Generate non-pic code that has direct accesses to the constant pool.
3019 // The address of the global is just (hi(&g)+lo(&g)).
3020 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Hi, N2: Lo);
3021}
3022
3023static void setUsesTOCBasePtr(MachineFunction &MF) {
3024 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3025 FuncInfo->setUsesTOCBasePtr();
3026}
3027
3028static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3029 setUsesTOCBasePtr(DAG.getMachineFunction());
3030}
3031
3032SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3033 SDValue GA) const {
3034 EVT VT = Subtarget.getScalarIntVT();
3035 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(Reg: PPC::X2, VT)
3036 : Subtarget.isAIXABI()
3037 ? DAG.getRegister(Reg: PPC::R2, VT)
3038 : DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT);
3039 SDValue Ops[] = { GA, Reg };
3040 return DAG.getMemIntrinsicNode(
3041 Opcode: PPCISD::TOC_ENTRY, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops, MemVT: VT,
3042 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()), Alignment: std::nullopt,
3043 Flags: MachineMemOperand::MOLoad);
3044}
3045
3046SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3047 SelectionDAG &DAG) const {
3048 EVT PtrVT = Op.getValueType();
3049 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val&: Op);
3050 const Constant *C = CP->getConstVal();
3051
3052 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3053 // The actual address of the GlobalValue is stored in the TOC.
3054 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3055 if (Subtarget.isUsingPCRelativeCalls()) {
3056 SDLoc DL(CP);
3057 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3058 SDValue ConstPool = DAG.getTargetConstantPool(
3059 C, VT: Ty, Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: PPCII::MO_PCREL_FLAG);
3060 return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: ConstPool);
3061 }
3062 setUsesTOCBasePtr(DAG);
3063 SDValue GA = DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0);
3064 return getTOCEntry(DAG, dl: SDLoc(CP), GA);
3065 }
3066
3067 unsigned MOHiFlag, MOLoFlag;
3068 bool IsPIC = isPositionIndependent();
3069 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3070
3071 if (IsPIC && Subtarget.isSVR4ABI()) {
3072 SDValue GA =
3073 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: PPCII::MO_PIC_FLAG);
3074 return getTOCEntry(DAG, dl: SDLoc(CP), GA);
3075 }
3076
3077 SDValue CPIHi =
3078 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0, TargetFlags: MOHiFlag);
3079 SDValue CPILo =
3080 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0, TargetFlags: MOLoFlag);
3081 return LowerLabelRef(HiPart: CPIHi, LoPart: CPILo, isPIC: IsPIC, DAG);
3082}
3083
3084// For 64-bit PowerPC, prefer the more compact relative encodings.
3085// This trades 32 bits per jump table entry for one or two instructions
3086// on the jump site.
3087unsigned PPCTargetLowering::getJumpTableEncoding() const {
3088 if (isJumpTableRelative())
3089 return MachineJumpTableInfo::EK_LabelDifference32;
3090
3091 return TargetLowering::getJumpTableEncoding();
3092}
3093
3094bool PPCTargetLowering::isJumpTableRelative() const {
3095 if (UseAbsoluteJumpTables)
3096 return false;
3097 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3098 return true;
3099 return TargetLowering::isJumpTableRelative();
3100}
3101
3102SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3103 SelectionDAG &DAG) const {
3104 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3105 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3106
3107 switch (getTargetMachine().getCodeModel()) {
3108 case CodeModel::Small:
3109 case CodeModel::Medium:
3110 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3111 default:
3112 return DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: SDLoc(),
3113 VT: getPointerTy(DL: DAG.getDataLayout()));
3114 }
3115}
3116
3117const MCExpr *
3118PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3119 unsigned JTI,
3120 MCContext &Ctx) const {
3121 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3122 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3123
3124 switch (getTargetMachine().getCodeModel()) {
3125 case CodeModel::Small:
3126 case CodeModel::Medium:
3127 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3128 default:
3129 return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
3130 }
3131}
3132
3133SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3134 EVT PtrVT = Op.getValueType();
3135 JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
3136
3137 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3138 if (Subtarget.isUsingPCRelativeCalls()) {
3139 SDLoc DL(JT);
3140 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3141 SDValue GA =
3142 DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Ty, TargetFlags: PPCII::MO_PCREL_FLAG);
3143 SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3144 return MatAddr;
3145 }
3146
3147 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3148 // The actual address of the GlobalValue is stored in the TOC.
3149 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3150 setUsesTOCBasePtr(DAG);
3151 SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
3152 return getTOCEntry(DAG, dl: SDLoc(JT), GA);
3153 }
3154
3155 unsigned MOHiFlag, MOLoFlag;
3156 bool IsPIC = isPositionIndependent();
3157 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3158
3159 if (IsPIC && Subtarget.isSVR4ABI()) {
3160 SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT,
3161 TargetFlags: PPCII::MO_PIC_FLAG);
3162 return getTOCEntry(DAG, dl: SDLoc(GA), GA);
3163 }
3164
3165 SDValue JTIHi = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOHiFlag);
3166 SDValue JTILo = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOLoFlag);
3167 return LowerLabelRef(HiPart: JTIHi, LoPart: JTILo, isPIC: IsPIC, DAG);
3168}
3169
3170SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3171 SelectionDAG &DAG) const {
3172 EVT PtrVT = Op.getValueType();
3173 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Val&: Op);
3174 const BlockAddress *BA = BASDN->getBlockAddress();
3175
3176 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3177 if (Subtarget.isUsingPCRelativeCalls()) {
3178 SDLoc DL(BASDN);
3179 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3180 SDValue GA = DAG.getTargetBlockAddress(BA, VT: Ty, Offset: BASDN->getOffset(),
3181 TargetFlags: PPCII::MO_PCREL_FLAG);
3182 SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3183 return MatAddr;
3184 }
3185
3186 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3187 // The actual BlockAddress is stored in the TOC.
3188 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3189 setUsesTOCBasePtr(DAG);
3190 SDValue GA = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset());
3191 return getTOCEntry(DAG, dl: SDLoc(BASDN), GA);
3192 }
3193
3194 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3195 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3196 return getTOCEntry(
3197 DAG, dl: SDLoc(BASDN),
3198 GA: DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset()));
3199
3200 unsigned MOHiFlag, MOLoFlag;
3201 bool IsPIC = isPositionIndependent();
3202 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3203 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: MOHiFlag);
3204 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: MOLoFlag);
3205 return LowerLabelRef(HiPart: TgtBAHi, LoPart: TgtBALo, isPIC: IsPIC, DAG);
3206}
3207
3208SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3209 SelectionDAG &DAG) const {
3210 if (Subtarget.isAIXABI())
3211 return LowerGlobalTLSAddressAIX(Op, DAG);
3212
3213 return LowerGlobalTLSAddressLinux(Op, DAG);
3214}
3215
3216/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3217/// and then apply the update.
3218static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3219 SelectionDAG &DAG,
3220 const TargetMachine &TM) {
3221 // Initialize TLS model opt setting lazily:
3222 // (1) Use initial-exec for single TLS var references within current function.
3223 // (2) Use local-dynamic for multiple TLS var references within current
3224 // function.
3225 PPCFunctionInfo *FuncInfo =
3226 DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3227 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3228 SmallPtrSet<const GlobalValue *, 8> TLSGV;
3229 // Iterate over all instructions within current function, collect all TLS
3230 // global variables (global variables taken as the first parameter to
3231 // Intrinsic::threadlocal_address).
3232 const Function &Func = DAG.getMachineFunction().getFunction();
3233 for (const BasicBlock &BB : Func)
3234 for (const Instruction &I : BB)
3235 if (I.getOpcode() == Instruction::Call)
3236 if (const CallInst *CI = dyn_cast<const CallInst>(Val: &I))
3237 if (Function *CF = CI->getCalledFunction())
3238 if (CF->isDeclaration() &&
3239 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3240 if (const GlobalValue *GV =
3241 dyn_cast<GlobalValue>(Val: I.getOperand(i: 0))) {
3242 TLSModel::Model GVModel = TM.getTLSModel(GV);
3243 if (GVModel == TLSModel::LocalDynamic)
3244 TLSGV.insert(Ptr: GV);
3245 }
3246
3247 unsigned TLSGVCnt = TLSGV.size();
3248 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3249 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3250 FuncInfo->setAIXFuncUseTLSIEForLD();
3251 FuncInfo->setAIXFuncTLSModelOptInitDone();
3252 }
3253
3254 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3255 LLVM_DEBUG(
3256 dbgs() << DAG.getMachineFunction().getName()
3257 << " function is using the TLS-IE model for TLS-LD access.\n");
3258 Model = TLSModel::InitialExec;
3259 }
3260}
3261
3262SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3265
3266 if (DAG.getTarget().useEmulatedTLS())
3267 report_fatal_error(reason: "Emulated TLS is not yet supported on AIX");
3268
3269 SDLoc dl(GA);
3270 const GlobalValue *GV = GA->getGlobal();
3271 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3272 bool Is64Bit = Subtarget.isPPC64();
3273 TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3274
3275 // Apply update to the TLS model.
3276 if (Subtarget.hasAIXShLibTLSModelOpt())
3277 updateForAIXShLibTLSModelOpt(Model, DAG, TM: getTargetMachine());
3278
3279 // TLS variables are accessed through TOC entries.
3280 // To support this, set the DAG to use the TOC base pointer.
3281 setUsesTOCBasePtr(DAG);
3282
3283 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3284
3285 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3286 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3287 bool HasAIXSmallTLSGlobalAttr = false;
3288 SDValue VariableOffsetTGA =
3289 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TPREL_FLAG);
3290 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3291 SDValue TLSReg;
3292
3293 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV))
3294 if (GVar->hasAttribute(Kind: "aix-small-tls"))
3295 HasAIXSmallTLSGlobalAttr = true;
3296
3297 if (Is64Bit) {
3298 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3299 // involves a load of the variable offset (from the TOC), followed by an
3300 // add of the loaded variable offset to R13 (the thread pointer).
3301 // This code sequence looks like:
3302 // ld reg1,var[TC](2)
3303 // add reg2, reg1, r13 // r13 contains the thread pointer
3304 TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3305
3306 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3307 // global variable attribute, produce a faster access sequence for
3308 // local-exec TLS variables where the offset from the TLS base is encoded
3309 // as an immediate operand.
3310 //
3311 // We only utilize the faster local-exec access sequence when the TLS
3312 // variable has a size within the policy limit. We treat types that are
3313 // not sized or are empty as being over the policy size limit.
3314 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3315 IsTLSLocalExecModel) {
3316 Type *GVType = GV->getValueType();
3317 if (GVType->isSized() && !GVType->isEmptyTy() &&
3318 GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3319 AIXSmallTlsPolicySizeLimit)
3320 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA, N2: TLSReg);
3321 }
3322 } else {
3323 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3324 // involves loading the variable offset from the TOC, generating a call to
3325 // .__get_tpointer to get the thread pointer (which will be in R3), and
3326 // adding the two together:
3327 // lwz reg1,var[TC](2)
3328 // bla .__get_tpointer
3329 // add reg2, reg1, r3
3330 TLSReg = DAG.getNode(Opcode: PPCISD::GET_TPOINTER, DL: dl, VT: PtrVT);
3331
3332 // We do not implement the 32-bit version of the faster access sequence
3333 // for local-exec that is controlled by the -maix-small-local-exec-tls
3334 // option, or the "aix-small-tls" global variable attribute.
3335 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3336 report_fatal_error(reason: "The small-local-exec TLS access sequence is "
3337 "currently only supported on AIX (64-bit mode).");
3338 }
3339 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: VariableOffset);
3340 }
3341
3342 if (Model == TLSModel::LocalDynamic) {
3343 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3344
3345 // We do not implement the 32-bit version of the faster access sequence
3346 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3347 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3348 report_fatal_error(reason: "The small-local-dynamic TLS access sequence is "
3349 "currently only supported on AIX (64-bit mode).");
3350
3351 // For local-dynamic on AIX, we need to generate one TOC entry for each
3352 // variable offset, and a single module-handle TOC entry for the entire
3353 // file.
3354
3355 SDValue VariableOffsetTGA =
3356 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSLD_FLAG);
3357 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3358
3359 Module *M = DAG.getMachineFunction().getFunction().getParent();
3360 GlobalVariable *TLSGV =
3361 dyn_cast_or_null<GlobalVariable>(Val: M->getOrInsertGlobal(
3362 Name: StringRef("_$TLSML"), Ty: PointerType::getUnqual(C&: *DAG.getContext())));
3363 TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3364 assert(TLSGV && "Not able to create GV for _$TLSML.");
3365 SDValue ModuleHandleTGA =
3366 DAG.getTargetGlobalAddress(GV: TLSGV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSLDM_FLAG);
3367 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, GA: ModuleHandleTGA);
3368 SDValue ModuleHandle =
3369 DAG.getNode(Opcode: PPCISD::TLSLD_AIX, DL: dl, VT: PtrVT, Operand: ModuleHandleTOC);
3370
3371 // With the -maix-small-local-dynamic-tls option, produce a faster access
3372 // sequence for local-dynamic TLS variables where the offset from the
3373 // module-handle is encoded as an immediate operand.
3374 //
3375 // We only utilize the faster local-dynamic access sequence when the TLS
3376 // variable has a size within the policy limit. We treat types that are
3377 // not sized or are empty as being over the policy size limit.
3378 if (HasAIXSmallLocalDynamicTLS) {
3379 Type *GVType = GV->getValueType();
3380 if (GVType->isSized() && !GVType->isEmptyTy() &&
3381 GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3382 AIXSmallTlsPolicySizeLimit)
3383 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA,
3384 N2: ModuleHandle);
3385 }
3386
3387 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: ModuleHandle, N2: VariableOffset);
3388 }
3389
3390 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3391 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3392 // need to generate two TOC entries, one for the variable offset, one for the
3393 // region handle. The global address for the TOC entry of the region handle is
3394 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3395 // entry of the variable offset is created with MO_TLSGD_FLAG.
3396 SDValue VariableOffsetTGA =
3397 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSGD_FLAG);
3398 SDValue RegionHandleTGA =
3399 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSGDM_FLAG);
3400 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3401 SDValue RegionHandle = getTOCEntry(DAG, dl, GA: RegionHandleTGA);
3402 return DAG.getNode(Opcode: PPCISD::TLSGD_AIX, DL: dl, VT: PtrVT, N1: VariableOffset,
3403 N2: RegionHandle);
3404}
3405
3406SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3407 SelectionDAG &DAG) const {
3408 // FIXME: TLS addresses currently use medium model code sequences,
3409 // which is the most useful form. Eventually support for small and
3410 // large models could be added if users need it, at the cost of
3411 // additional complexity.
3412 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3413 if (DAG.getTarget().useEmulatedTLS())
3414 return LowerToTLSEmulatedModel(GA, DAG);
3415
3416 SDLoc dl(GA);
3417 const GlobalValue *GV = GA->getGlobal();
3418 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3419 bool is64bit = Subtarget.isPPC64();
3420 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3421 PICLevel::Level picLevel = M->getPICLevel();
3422
3423 const TargetMachine &TM = getTargetMachine();
3424 TLSModel::Model Model = TM.getTLSModel(GV);
3425
3426 if (Model == TLSModel::LocalExec) {
3427 if (Subtarget.isUsingPCRelativeCalls()) {
3428 SDValue TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3429 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3430 TargetFlags: PPCII::MO_TPREL_PCREL_FLAG);
3431 SDValue MatAddr =
3432 DAG.getNode(Opcode: PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3433 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: MatAddr);
3434 }
3435
3436 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3437 TargetFlags: PPCII::MO_TPREL_HA);
3438 SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3439 TargetFlags: PPCII::MO_TPREL_LO);
3440 SDValue TLSReg = is64bit ? DAG.getRegister(Reg: PPC::X13, VT: MVT::i64)
3441 : DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
3442
3443 SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL: dl, VT: PtrVT, N1: TGAHi, N2: TLSReg);
3444 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: TGALo, N2: Hi);
3445 }
3446
3447 if (Model == TLSModel::InitialExec) {
3448 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3449 SDValue TGA = DAG.getTargetGlobalAddress(
3450 GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3451 SDValue TGATLS = DAG.getTargetGlobalAddress(
3452 GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3453 SDValue TPOffset;
3454 if (IsPCRel) {
3455 SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3456 TPOffset = DAG.getLoad(VT: MVT::i64, dl, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3457 PtrInfo: MachinePointerInfo());
3458 } else {
3459 SDValue GOTPtr;
3460 if (is64bit) {
3461 setUsesTOCBasePtr(DAG);
3462 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3463 GOTPtr =
3464 DAG.getNode(Opcode: PPCISD::ADDIS_GOT_TPREL_HA, DL: dl, VT: PtrVT, N1: GOTReg, N2: TGA);
3465 } else {
3466 if (!TM.isPositionIndependent())
3467 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_GOT, DL: dl, VT: PtrVT);
3468 else if (picLevel == PICLevel::SmallPIC)
3469 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3470 else
3471 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3472 }
3473 TPOffset = DAG.getNode(Opcode: PPCISD::LD_GOT_TPREL_L, DL: dl, VT: PtrVT, N1: TGA, N2: GOTPtr);
3474 }
3475 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TPOffset, N2: TGATLS);
3476 }
3477
3478 if (Model == TLSModel::GeneralDynamic) {
3479 if (Subtarget.isUsingPCRelativeCalls()) {
3480 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3481 TargetFlags: PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3482 return DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3483 }
3484
3485 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: 0);
3486 SDValue GOTPtr;
3487 if (is64bit) {
3488 setUsesTOCBasePtr(DAG);
3489 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3490 GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSGD_HA, DL: dl, VT: PtrVT,
3491 N1: GOTReg, N2: TGA);
3492 } else {
3493 if (picLevel == PICLevel::SmallPIC)
3494 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3495 else
3496 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3497 }
3498 return DAG.getNode(Opcode: PPCISD::ADDI_TLSGD_L_ADDR, DL: dl, VT: PtrVT,
3499 N1: GOTPtr, N2: TGA, N3: TGA);
3500 }
3501
3502 if (Model == TLSModel::LocalDynamic) {
3503 if (Subtarget.isUsingPCRelativeCalls()) {
3504 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3505 TargetFlags: PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3506 SDValue MatPCRel =
3507 DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3508 return DAG.getNode(Opcode: PPCISD::PADDI_DTPREL, DL: dl, VT: PtrVT, N1: MatPCRel, N2: TGA);
3509 }
3510
3511 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: 0);
3512 SDValue GOTPtr;
3513 if (is64bit) {
3514 setUsesTOCBasePtr(DAG);
3515 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3516 GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSLD_HA, DL: dl, VT: PtrVT,
3517 N1: GOTReg, N2: TGA);
3518 } else {
3519 if (picLevel == PICLevel::SmallPIC)
3520 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3521 else
3522 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3523 }
3524 SDValue TLSAddr = DAG.getNode(Opcode: PPCISD::ADDI_TLSLD_L_ADDR, DL: dl,
3525 VT: PtrVT, N1: GOTPtr, N2: TGA, N3: TGA);
3526 SDValue DtvOffsetHi = DAG.getNode(Opcode: PPCISD::ADDIS_DTPREL_HA, DL: dl,
3527 VT: PtrVT, N1: TLSAddr, N2: TGA);
3528 return DAG.getNode(Opcode: PPCISD::ADDI_DTPREL_L, DL: dl, VT: PtrVT, N1: DtvOffsetHi, N2: TGA);
3529 }
3530
3531 llvm_unreachable("Unknown TLS model!");
3532}
3533
3534SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3535 SelectionDAG &DAG) const {
3536 EVT PtrVT = Op.getValueType();
3537 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Val&: Op);
3538 SDLoc DL(GSDN);
3539 const GlobalValue *GV = GSDN->getGlobal();
3540
3541 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3542 // The actual address of the GlobalValue is stored in the TOC.
3543 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3544 if (Subtarget.isUsingPCRelativeCalls()) {
3545 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3546 if (isAccessedAsGotIndirect(N: Op)) {
3547 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3548 TargetFlags: PPCII::MO_GOT_PCREL_FLAG);
3549 SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3550 SDValue Load = DAG.getLoad(VT: MVT::i64, dl: DL, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3551 PtrInfo: MachinePointerInfo());
3552 return Load;
3553 } else {
3554 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3555 TargetFlags: PPCII::MO_PCREL_FLAG);
3556 return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3557 }
3558 }
3559 setUsesTOCBasePtr(DAG);
3560 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset());
3561 return getTOCEntry(DAG, dl: DL, GA);
3562 }
3563
3564 unsigned MOHiFlag, MOLoFlag;
3565 bool IsPIC = isPositionIndependent();
3566 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag, GV);
3567
3568 if (IsPIC && Subtarget.isSVR4ABI()) {
3569 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT,
3570 offset: GSDN->getOffset(),
3571 TargetFlags: PPCII::MO_PIC_FLAG);
3572 return getTOCEntry(DAG, dl: DL, GA);
3573 }
3574
3575 SDValue GAHi =
3576 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOHiFlag);
3577 SDValue GALo =
3578 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOLoFlag);
3579
3580 return LowerLabelRef(HiPart: GAHi, LoPart: GALo, isPIC: IsPIC, DAG);
3581}
3582
3583SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3584 bool IsStrict = Op->isStrictFPOpcode();
3585 ISD::CondCode CC =
3586 cast<CondCodeSDNode>(Val: Op.getOperand(i: IsStrict ? 3 : 2))->get();
3587 SDValue LHS = Op.getOperand(i: IsStrict ? 1 : 0);
3588 SDValue RHS = Op.getOperand(i: IsStrict ? 2 : 1);
3589 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : SDValue();
3590 EVT LHSVT = LHS.getValueType();
3591 SDLoc dl(Op);
3592
3593 // Soften the setcc with libcall if it is fp128.
3594 if (LHSVT == MVT::f128) {
3595 assert(!Subtarget.hasP9Vector() &&
3596 "SETCC for f128 is already legal under Power9!");
3597 softenSetCCOperands(DAG, VT: LHSVT, NewLHS&: LHS, NewRHS&: RHS, CCCode&: CC, DL: dl, OldLHS: LHS, OldRHS: RHS, Chain,
3598 IsSignaling: Op->getOpcode() == ISD::STRICT_FSETCCS);
3599 if (RHS.getNode())
3600 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS,
3601 N3: DAG.getCondCode(Cond: CC));
3602 if (IsStrict)
3603 return DAG.getMergeValues(Ops: {LHS, Chain}, dl);
3604 return LHS;
3605 }
3606
3607 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3608
3609 if (Op.getValueType() == MVT::v2i64) {
3610 // When the operands themselves are v2i64 values, we need to do something
3611 // special because VSX has no underlying comparison operations for these.
3612 if (LHS.getValueType() == MVT::v2i64) {
3613 // Equality can be handled by casting to the legal type for Altivec
3614 // comparisons, everything else needs to be expanded.
3615 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3616 return SDValue();
3617 SDValue SetCC32 = DAG.getSetCC(
3618 DL: dl, VT: MVT::v4i32, LHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: LHS),
3619 RHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: RHS), Cond: CC);
3620 int ShuffV[] = {1, 0, 3, 2};
3621 SDValue Shuff =
3622 DAG.getVectorShuffle(VT: MVT::v4i32, dl, N1: SetCC32, N2: SetCC32, Mask: ShuffV);
3623 return DAG.getBitcast(VT: MVT::v2i64,
3624 V: DAG.getNode(Opcode: CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3625 DL: dl, VT: MVT::v4i32, N1: Shuff, N2: SetCC32));
3626 }
3627
3628 // We handle most of these in the usual way.
3629 return Op;
3630 }
3631
3632 // If we're comparing for equality to zero, expose the fact that this is
3633 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3634 // fold the new nodes.
3635 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3636 return V;
3637
3638 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3639 // Leave comparisons against 0 and -1 alone for now, since they're usually
3640 // optimized. FIXME: revisit this when we can custom lower all setcc
3641 // optimizations.
3642 if (C->isAllOnes() || C->isZero())
3643 return SDValue();
3644 }
3645
3646 // If we have an integer seteq/setne, turn it into a compare against zero
3647 // by xor'ing the rhs with the lhs, which is faster than setting a
3648 // condition register, reading it back out, and masking the correct bit. The
3649 // normal approach here uses sub to do this instead of xor. Using xor exposes
3650 // the result to other bit-twiddling opportunities.
3651 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3652 EVT VT = Op.getValueType();
3653 SDValue Sub = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: LHSVT, N1: LHS, N2: RHS);
3654 return DAG.getSetCC(DL: dl, VT, LHS: Sub, RHS: DAG.getConstant(Val: 0, DL: dl, VT: LHSVT), Cond: CC);
3655 }
3656 return SDValue();
3657}
3658
3659SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3660 SDNode *Node = Op.getNode();
3661 EVT VT = Node->getValueType(ResNo: 0);
3662 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3663 SDValue InChain = Node->getOperand(Num: 0);
3664 SDValue VAListPtr = Node->getOperand(Num: 1);
3665 const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue();
3666 SDLoc dl(Node);
3667
3668 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3669
3670 // gpr_index
3671 SDValue GprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3672 Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV), MemVT: MVT::i8);
3673 InChain = GprIndex.getValue(R: 1);
3674
3675 if (VT == MVT::i64) {
3676 // Check if GprIndex is even
3677 SDValue GprAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: GprIndex,
3678 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3679 SDValue CC64 = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: GprAnd,
3680 RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32), Cond: ISD::SETNE);
3681 SDValue GprIndexPlusOne = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: GprIndex,
3682 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3683 // Align GprIndex to be even if it isn't
3684 GprIndex = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC64, N2: GprIndexPlusOne,
3685 N3: GprIndex);
3686 }
3687
3688 // fpr index is 1 byte after gpr
3689 SDValue FprPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3690 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3691
3692 // fpr
3693 SDValue FprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3694 Ptr: FprPtr, PtrInfo: MachinePointerInfo(SV), MemVT: MVT::i8);
3695 InChain = FprIndex.getValue(R: 1);
3696
3697 SDValue RegSaveAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3698 N2: DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32));
3699
3700 SDValue OverflowAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3701 N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32));
3702
3703 // areas
3704 SDValue OverflowArea =
3705 DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: OverflowAreaPtr, PtrInfo: MachinePointerInfo());
3706 InChain = OverflowArea.getValue(R: 1);
3707
3708 SDValue RegSaveArea =
3709 DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: RegSaveAreaPtr, PtrInfo: MachinePointerInfo());
3710 InChain = RegSaveArea.getValue(R: 1);
3711
3712 // select overflow_area if index > 8
3713 SDValue CC = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: VT.isInteger() ? GprIndex : FprIndex,
3714 RHS: DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32), Cond: ISD::SETLT);
3715
3716 // adjustment constant gpr_index * 4/8
3717 SDValue RegConstant = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32,
3718 N1: VT.isInteger() ? GprIndex : FprIndex,
3719 N2: DAG.getConstant(Val: VT.isInteger() ? 4 : 8, DL: dl,
3720 VT: MVT::i32));
3721
3722 // OurReg = RegSaveArea + RegConstant
3723 SDValue OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: RegSaveArea,
3724 N2: RegConstant);
3725
3726 // Floating types are 32 bytes into RegSaveArea
3727 if (VT.isFloatingPoint())
3728 OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OurReg,
3729 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
3730
3731 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3732 SDValue IndexPlus1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32,
3733 N1: VT.isInteger() ? GprIndex : FprIndex,
3734 N2: DAG.getConstant(Val: VT == MVT::i64 ? 2 : 1, DL: dl,
3735 VT: MVT::i32));
3736
3737 InChain = DAG.getTruncStore(Chain: InChain, dl, Val: IndexPlus1,
3738 Ptr: VT.isInteger() ? VAListPtr : FprPtr,
3739 PtrInfo: MachinePointerInfo(SV), SVT: MVT::i8);
3740
3741 // determine if we should load from reg_save_area or overflow_area
3742 SDValue Result = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: PtrVT, N1: CC, N2: OurReg, N3: OverflowArea);
3743
3744 // increase overflow_area by 4/8 if gpr/fpr > 8
3745 SDValue OverflowAreaPlusN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OverflowArea,
3746 N2: DAG.getConstant(Val: VT.isInteger() ? 4 : 8,
3747 DL: dl, VT: MVT::i32));
3748
3749 OverflowArea = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC, N2: OverflowArea,
3750 N3: OverflowAreaPlusN);
3751
3752 InChain = DAG.getTruncStore(Chain: InChain, dl, Val: OverflowArea, Ptr: OverflowAreaPtr,
3753 PtrInfo: MachinePointerInfo(), SVT: MVT::i32);
3754
3755 return DAG.getLoad(VT, dl, Chain: InChain, Ptr: Result, PtrInfo: MachinePointerInfo());
3756}
3757
3758SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3759 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3760
3761 // We have to copy the entire va_list struct:
3762 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3763 return DAG.getMemcpy(Chain: Op.getOperand(i: 0), dl: Op, Dst: Op.getOperand(i: 1), Src: Op.getOperand(i: 2),
3764 Size: DAG.getConstant(Val: 12, DL: SDLoc(Op), VT: MVT::i32), Alignment: Align(8),
3765 isVol: false, AlwaysInline: true, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
3766 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
3767}
3768
3769SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3770 SelectionDAG &DAG) const {
3771 return Op.getOperand(i: 0);
3772}
3773
3774SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3775 MachineFunction &MF = DAG.getMachineFunction();
3776 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3777
3778 assert((Op.getOpcode() == ISD::INLINEASM ||
3779 Op.getOpcode() == ISD::INLINEASM_BR) &&
3780 "Expecting Inline ASM node.");
3781
3782 // If an LR store is already known to be required then there is not point in
3783 // checking this ASM as well.
3784 if (MFI.isLRStoreRequired())
3785 return Op;
3786
3787 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3788 // type MVT::Glue. We want to ignore this last operand if that is the case.
3789 unsigned NumOps = Op.getNumOperands();
3790 if (Op.getOperand(i: NumOps - 1).getValueType() == MVT::Glue)
3791 --NumOps;
3792
3793 // Check all operands that may contain the LR.
3794 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3795 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3796 unsigned NumVals = Flags.getNumOperandRegisters();
3797 ++i; // Skip the ID value.
3798
3799 switch (Flags.getKind()) {
3800 default:
3801 llvm_unreachable("Bad flags!");
3802 case InlineAsm::Kind::RegUse:
3803 case InlineAsm::Kind::Imm:
3804 case InlineAsm::Kind::Mem:
3805 i += NumVals;
3806 break;
3807 case InlineAsm::Kind::Clobber:
3808 case InlineAsm::Kind::RegDef:
3809 case InlineAsm::Kind::RegDefEarlyClobber: {
3810 for (; NumVals; --NumVals, ++i) {
3811 Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
3812 if (Reg != PPC::LR && Reg != PPC::LR8)
3813 continue;
3814 MFI.setLRStoreRequired();
3815 return Op;
3816 }
3817 break;
3818 }
3819 }
3820 }
3821
3822 return Op;
3823}
3824
3825SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3826 SelectionDAG &DAG) const {
3827 SDValue Chain = Op.getOperand(i: 0);
3828 SDValue Trmp = Op.getOperand(i: 1); // trampoline
3829 SDValue FPtr = Op.getOperand(i: 2); // nested function
3830 SDValue Nest = Op.getOperand(i: 3); // 'nest' parameter value
3831 SDLoc dl(Op);
3832
3833 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3834
3835 if (Subtarget.isAIXABI()) {
3836 // On AIX we create a trampoline descriptor by combining the
3837 // entry point and TOC from the global descriptor (FPtr) with the
3838 // nest argument as the environment pointer.
3839 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3840 MaybeAlign PointerAlign(PointerSize);
3841 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3842 ? (MachineMemOperand::MODereferenceable |
3843 MachineMemOperand::MOInvariant)
3844 : MachineMemOperand::MONone;
3845
3846 uint64_t TOCPointerOffset = 1 * PointerSize;
3847 uint64_t EnvPointerOffset = 2 * PointerSize;
3848 SDValue SDTOCPtrOffset = DAG.getConstant(Val: TOCPointerOffset, DL: dl, VT: PtrVT);
3849 SDValue SDEnvPtrOffset = DAG.getConstant(Val: EnvPointerOffset, DL: dl, VT: PtrVT);
3850
3851 const Value *TrampolineAddr =
3852 cast<SrcValueSDNode>(Val: Op.getOperand(i: 4))->getValue();
3853 const Function *Func =
3854 cast<Function>(Val: cast<SrcValueSDNode>(Val: Op.getOperand(i: 5))->getValue());
3855
3856 SDValue OutChains[3];
3857
3858 // Copy the entry point address from the global descriptor to the
3859 // trampoline buffer.
3860 SDValue LoadEntryPoint =
3861 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: FPtr, PtrInfo: MachinePointerInfo(Func, 0),
3862 Alignment: PointerAlign, MMOFlags);
3863 SDValue EPLoadChain = LoadEntryPoint.getValue(R: 1);
3864 OutChains[0] = DAG.getStore(Chain: EPLoadChain, dl, Val: LoadEntryPoint, Ptr: Trmp,
3865 PtrInfo: MachinePointerInfo(TrampolineAddr, 0));
3866
3867 // Copy the TOC pointer from the global descriptor to the trampoline
3868 // buffer.
3869 SDValue TOCFromDescriptorPtr =
3870 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FPtr, N2: SDTOCPtrOffset);
3871 SDValue TOCReg = DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: TOCFromDescriptorPtr,
3872 PtrInfo: MachinePointerInfo(Func, TOCPointerOffset),
3873 Alignment: PointerAlign, MMOFlags);
3874 SDValue TrampolineTOCPointer =
3875 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Trmp, N2: SDTOCPtrOffset);
3876 SDValue TOCLoadChain = TOCReg.getValue(R: 1);
3877 OutChains[1] =
3878 DAG.getStore(Chain: TOCLoadChain, dl, Val: TOCReg, Ptr: TrampolineTOCPointer,
3879 PtrInfo: MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3880
3881 // Store the nest argument into the environment pointer in the trampoline
3882 // buffer.
3883 SDValue EnvPointer = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Trmp, N2: SDEnvPtrOffset);
3884 OutChains[2] =
3885 DAG.getStore(Chain, dl, Val: Nest, Ptr: EnvPointer,
3886 PtrInfo: MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3887
3888 SDValue TokenFactor =
3889 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OutChains);
3890 return TokenFactor;
3891 }
3892
3893 bool isPPC64 = (PtrVT == MVT::i64);
3894 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext());
3895
3896 TargetLowering::ArgListTy Args;
3897 Args.emplace_back(args&: Trmp, args&: IntPtrTy);
3898 // TrampSize == (isPPC64 ? 48 : 40);
3899 Args.emplace_back(
3900 args: DAG.getConstant(Val: isPPC64 ? 48 : 40, DL: dl, VT: Subtarget.getScalarIntVT()),
3901 args&: IntPtrTy);
3902 Args.emplace_back(args&: FPtr, args&: IntPtrTy);
3903 Args.emplace_back(args&: Nest, args&: IntPtrTy);
3904
3905 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3906 TargetLowering::CallLoweringInfo CLI(DAG);
3907 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3908 CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *DAG.getContext()),
3909 Target: DAG.getExternalSymbol(Sym: "__trampoline_setup", VT: PtrVT), ArgsList: std::move(Args));
3910
3911 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3912 return CallResult.second;
3913}
3914
3915SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3916 MachineFunction &MF = DAG.getMachineFunction();
3917 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3918 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
3919
3920 SDLoc dl(Op);
3921
3922 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3923 // vastart just stores the address of the VarArgsFrameIndex slot into the
3924 // memory location argument.
3925 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
3926 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
3927 return DAG.getStore(Chain: Op.getOperand(i: 0), dl, Val: FR, Ptr: Op.getOperand(i: 1),
3928 PtrInfo: MachinePointerInfo(SV));
3929 }
3930
3931 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3932 // We suppose the given va_list is already allocated.
3933 //
3934 // typedef struct {
3935 // char gpr; /* index into the array of 8 GPRs
3936 // * stored in the register save area
3937 // * gpr=0 corresponds to r3,
3938 // * gpr=1 to r4, etc.
3939 // */
3940 // char fpr; /* index into the array of 8 FPRs
3941 // * stored in the register save area
3942 // * fpr=0 corresponds to f1,
3943 // * fpr=1 to f2, etc.
3944 // */
3945 // char *overflow_arg_area;
3946 // /* location on stack that holds
3947 // * the next overflow argument
3948 // */
3949 // char *reg_save_area;
3950 // /* where r3:r10 and f1:f8 (if saved)
3951 // * are stored
3952 // */
3953 // } va_list[1];
3954
3955 SDValue ArgGPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumGPR(), DL: dl, VT: MVT::i32);
3956 SDValue ArgFPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumFPR(), DL: dl, VT: MVT::i32);
3957 SDValue StackOffsetFI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsStackOffset(),
3958 VT: PtrVT);
3959 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
3960 VT: PtrVT);
3961
3962 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3963 SDValue ConstFrameOffset = DAG.getConstant(Val: FrameOffset, DL: dl, VT: PtrVT);
3964
3965 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3966 SDValue ConstStackOffset = DAG.getConstant(Val: StackOffset, DL: dl, VT: PtrVT);
3967
3968 uint64_t FPROffset = 1;
3969 SDValue ConstFPROffset = DAG.getConstant(Val: FPROffset, DL: dl, VT: PtrVT);
3970
3971 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
3972
3973 // Store first byte : number of int regs
3974 SDValue firstStore =
3975 DAG.getTruncStore(Chain: Op.getOperand(i: 0), dl, Val: ArgGPR, Ptr: Op.getOperand(i: 1),
3976 PtrInfo: MachinePointerInfo(SV), SVT: MVT::i8);
3977 uint64_t nextOffset = FPROffset;
3978 SDValue nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Op.getOperand(i: 1),
3979 N2: ConstFPROffset);
3980
3981 // Store second byte : number of float regs
3982 SDValue secondStore =
3983 DAG.getTruncStore(Chain: firstStore, dl, Val: ArgFPR, Ptr: nextPtr,
3984 PtrInfo: MachinePointerInfo(SV, nextOffset), SVT: MVT::i8);
3985 nextOffset += StackOffset;
3986 nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstStackOffset);
3987
3988 // Store second word : arguments given on stack
3989 SDValue thirdStore = DAG.getStore(Chain: secondStore, dl, Val: StackOffsetFI, Ptr: nextPtr,
3990 PtrInfo: MachinePointerInfo(SV, nextOffset));
3991 nextOffset += FrameOffset;
3992 nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstFrameOffset);
3993
3994 // Store third word : arguments given in registers
3995 return DAG.getStore(Chain: thirdStore, dl, Val: FR, Ptr: nextPtr,
3996 PtrInfo: MachinePointerInfo(SV, nextOffset));
3997}
3998
3999/// FPR - The set of FP registers that should be allocated for arguments
4000/// on Darwin and AIX.
4001static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4002 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4003 PPC::F11, PPC::F12, PPC::F13};
4004
4005/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4006/// the stack.
4007static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4008 unsigned PtrByteSize) {
4009 unsigned ArgSize = ArgVT.getStoreSize();
4010 if (Flags.isByVal())
4011 ArgSize = Flags.getByValSize();
4012
4013 // Round up to multiples of the pointer size, except for array members,
4014 // which are always packed.
4015 if (!Flags.isInConsecutiveRegs())
4016 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4017
4018 return ArgSize;
4019}
4020
4021/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4022/// on the stack.
4023static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4024 ISD::ArgFlagsTy Flags,
4025 unsigned PtrByteSize) {
4026 Align Alignment(PtrByteSize);
4027
4028 // Altivec parameters are padded to a 16 byte boundary.
4029 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4030 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4031 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4032 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4033 Alignment = Align(16);
4034
4035 // ByVal parameters are aligned as requested.
4036 if (Flags.isByVal()) {
4037 auto BVAlign = Flags.getNonZeroByValAlign();
4038 if (BVAlign > PtrByteSize) {
4039 if (BVAlign.value() % PtrByteSize != 0)
4040 llvm_unreachable(
4041 "ByVal alignment is not a multiple of the pointer size");
4042
4043 Alignment = BVAlign;
4044 }
4045 }
4046
4047 // Array members are always packed to their original alignment.
4048 if (Flags.isInConsecutiveRegs()) {
4049 // If the array member was split into multiple registers, the first
4050 // needs to be aligned to the size of the full type. (Except for
4051 // ppcf128, which is only aligned as its f64 components.)
4052 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4053 Alignment = Align(OrigVT.getStoreSize());
4054 else
4055 Alignment = Align(ArgVT.getStoreSize());
4056 }
4057
4058 return Alignment;
4059}
4060
4061/// CalculateStackSlotUsed - Return whether this argument will use its
4062/// stack slot (instead of being passed in registers). ArgOffset,
4063/// AvailableFPRs, and AvailableVRs must hold the current argument
4064/// position, and will be updated to account for this argument.
4065static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4066 unsigned PtrByteSize, unsigned LinkageSize,
4067 unsigned ParamAreaSize, unsigned &ArgOffset,
4068 unsigned &AvailableFPRs,
4069 unsigned &AvailableVRs) {
4070 bool UseMemory = false;
4071
4072 // Respect alignment of argument on the stack.
4073 Align Alignment =
4074 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4075 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4076 // If there's no space left in the argument save area, we must
4077 // use memory (this check also catches zero-sized arguments).
4078 if (ArgOffset >= LinkageSize + ParamAreaSize)
4079 UseMemory = true;
4080
4081 // Allocate argument on the stack.
4082 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4083 if (Flags.isInConsecutiveRegsLast())
4084 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4085 // If we overran the argument save area, we must use memory
4086 // (this check catches arguments passed partially in memory)
4087 if (ArgOffset > LinkageSize + ParamAreaSize)
4088 UseMemory = true;
4089
4090 // However, if the argument is actually passed in an FPR or a VR,
4091 // we don't use memory after all.
4092 if (!Flags.isByVal()) {
4093 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4094 if (AvailableFPRs > 0) {
4095 --AvailableFPRs;
4096 return false;
4097 }
4098 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4099 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4100 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4101 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4102 if (AvailableVRs > 0) {
4103 --AvailableVRs;
4104 return false;
4105 }
4106 }
4107
4108 return UseMemory;
4109}
4110
4111/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4112/// ensure minimum alignment required for target.
4113static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4114 unsigned NumBytes) {
4115 return alignTo(Size: NumBytes, A: Lowering->getStackAlign());
4116}
4117
4118SDValue PPCTargetLowering::LowerFormalArguments(
4119 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4120 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4121 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4122 if (Subtarget.isAIXABI())
4123 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4124 InVals);
4125 if (Subtarget.is64BitELFABI())
4126 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127 InVals);
4128 assert(Subtarget.is32BitELFABI());
4129 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4130 InVals);
4131}
4132
4133SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4134 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4135 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4136 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4137
4138 // 32-bit SVR4 ABI Stack Frame Layout:
4139 // +-----------------------------------+
4140 // +--> | Back chain |
4141 // | +-----------------------------------+
4142 // | | Floating-point register save area |
4143 // | +-----------------------------------+
4144 // | | General register save area |
4145 // | +-----------------------------------+
4146 // | | CR save word |
4147 // | +-----------------------------------+
4148 // | | VRSAVE save word |
4149 // | +-----------------------------------+
4150 // | | Alignment padding |
4151 // | +-----------------------------------+
4152 // | | Vector register save area |
4153 // | +-----------------------------------+
4154 // | | Local variable space |
4155 // | +-----------------------------------+
4156 // | | Parameter list area |
4157 // | +-----------------------------------+
4158 // | | LR save word |
4159 // | +-----------------------------------+
4160 // SP--> +--- | Back chain |
4161 // +-----------------------------------+
4162 //
4163 // Specifications:
4164 // System V Application Binary Interface PowerPC Processor Supplement
4165 // AltiVec Technology Programming Interface Manual
4166
4167 MachineFunction &MF = DAG.getMachineFunction();
4168 MachineFrameInfo &MFI = MF.getFrameInfo();
4169 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4170
4171 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4172 // Potential tail calls could cause overwriting of argument stack slots.
4173 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4174 (CallConv == CallingConv::Fast));
4175 const Align PtrAlign(4);
4176
4177 // Assign locations to all of the incoming arguments.
4178 SmallVector<CCValAssign, 16> ArgLocs;
4179 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4180 *DAG.getContext());
4181
4182 // Reserve space for the linkage area on the stack.
4183 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4184 CCInfo.AllocateStack(Size: LinkageSize, Alignment: PtrAlign);
4185 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4);
4186
4187 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4188 CCValAssign &VA = ArgLocs[i];
4189
4190 // Arguments stored in registers.
4191 if (VA.isRegLoc()) {
4192 const TargetRegisterClass *RC;
4193 EVT ValVT = VA.getValVT();
4194
4195 switch (ValVT.getSimpleVT().SimpleTy) {
4196 default:
4197 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4198 case MVT::i1:
4199 case MVT::i32:
4200 RC = &PPC::GPRCRegClass;
4201 break;
4202 case MVT::f32:
4203 if (Subtarget.hasP8Vector())
4204 RC = &PPC::VSSRCRegClass;
4205 else if (Subtarget.hasSPE())
4206 RC = &PPC::GPRCRegClass;
4207 else
4208 RC = &PPC::F4RCRegClass;
4209 break;
4210 case MVT::f64:
4211 if (Subtarget.hasVSX())
4212 RC = &PPC::VSFRCRegClass;
4213 else if (Subtarget.hasSPE())
4214 // SPE passes doubles in GPR pairs.
4215 RC = &PPC::GPRCRegClass;
4216 else
4217 RC = &PPC::F8RCRegClass;
4218 break;
4219 case MVT::v16i8:
4220 case MVT::v8i16:
4221 case MVT::v4i32:
4222 RC = &PPC::VRRCRegClass;
4223 break;
4224 case MVT::v4f32:
4225 RC = &PPC::VRRCRegClass;
4226 break;
4227 case MVT::v2f64:
4228 case MVT::v2i64:
4229 RC = &PPC::VRRCRegClass;
4230 break;
4231 }
4232
4233 SDValue ArgValue;
4234 // Transform the arguments stored in physical registers into
4235 // virtual ones.
4236 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4237 assert(i + 1 < e && "No second half of double precision argument");
4238 Register RegLo = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4239 Register RegHi = MF.addLiveIn(PReg: ArgLocs[++i].getLocReg(), RC);
4240 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, Reg: RegLo, VT: MVT::i32);
4241 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, Reg: RegHi, VT: MVT::i32);
4242 if (!Subtarget.isLittleEndian())
4243 std::swap (a&: ArgValueLo, b&: ArgValueHi);
4244 ArgValue = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: ArgValueLo,
4245 N2: ArgValueHi);
4246 } else {
4247 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4248 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4249 VT: ValVT == MVT::i1 ? MVT::i32 : ValVT);
4250 if (ValVT == MVT::i1)
4251 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: ArgValue);
4252 }
4253
4254 InVals.push_back(Elt: ArgValue);
4255 } else {
4256 // Argument stored in memory.
4257 assert(VA.isMemLoc());
4258
4259 // Get the extended size of the argument type in stack
4260 unsigned ArgSize = VA.getLocVT().getStoreSize();
4261 // Get the actual size of the argument type
4262 unsigned ObjSize = VA.getValVT().getStoreSize();
4263 unsigned ArgOffset = VA.getLocMemOffset();
4264 // Stack objects in PPC32 are right justified.
4265 ArgOffset += ArgSize - ObjSize;
4266 int FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: isImmutable);
4267
4268 // Create load nodes to retrieve arguments from the stack.
4269 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4270 InVals.push_back(
4271 Elt: DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo()));
4272 }
4273 }
4274
4275 // Assign locations to all of the incoming aggregate by value arguments.
4276 // Aggregates passed by value are stored in the local variable space of the
4277 // caller's stack frame, right above the parameter list area.
4278 SmallVector<CCValAssign, 16> ByValArgLocs;
4279 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4280 ByValArgLocs, *DAG.getContext());
4281
4282 // Reserve stack space for the allocations in CCInfo.
4283 CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
4284
4285 CCByValInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4_ByVal);
4286
4287 // Area that is at least reserved in the caller of this function.
4288 unsigned MinReservedArea = CCByValInfo.getStackSize();
4289 MinReservedArea = std::max(a: MinReservedArea, b: LinkageSize);
4290
4291 // Set the size that is at least reserved in caller of this function. Tail
4292 // call optimized function's reserved stack space needs to be aligned so that
4293 // taking the difference between two stack areas will result in an aligned
4294 // stack.
4295 MinReservedArea =
4296 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4297 FuncInfo->setMinReservedArea(MinReservedArea);
4298
4299 SmallVector<SDValue, 8> MemOps;
4300
4301 // If the function takes variable number of arguments, make a frame index for
4302 // the start of the first vararg value... for expansion of llvm.va_start.
4303 if (isVarArg) {
4304 static const MCPhysReg GPArgRegs[] = {
4305 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4306 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4307 };
4308 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4309
4310 static const MCPhysReg FPArgRegs[] = {
4311 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4312 PPC::F8
4313 };
4314 unsigned NumFPArgRegs = std::size(FPArgRegs);
4315
4316 if (useSoftFloat() || hasSPE())
4317 NumFPArgRegs = 0;
4318
4319 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(Regs: GPArgRegs));
4320 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(Regs: FPArgRegs));
4321
4322 // Make room for NumGPArgRegs and NumFPArgRegs.
4323 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4324 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4325
4326 FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4327 Size: PtrVT.getSizeInBits() / 8, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
4328
4329 FuncInfo->setVarArgsFrameIndex(
4330 MFI.CreateStackObject(Size: Depth, Alignment: Align(8), isSpillSlot: false));
4331 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4332
4333 // The fixed integer arguments of a variadic function are stored to the
4334 // VarArgsFrameIndex on the stack so that they may be loaded by
4335 // dereferencing the result of va_next.
4336 for (MCPhysReg GPArgReg : GPArgRegs) {
4337 // Get an existing live-in vreg, or add a new one.
4338 Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: GPArgReg);
4339 if (!VReg)
4340 VReg = MF.addLiveIn(PReg: GPArgReg, RC: &PPC::GPRCRegClass);
4341
4342 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4343 SDValue Store =
4344 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4345 MemOps.push_back(Elt: Store);
4346 // Increment the address by four for the next argument to store
4347 SDValue PtrOff = DAG.getConstant(Val: PtrVT.getSizeInBits()/8, DL: dl, VT: PtrVT);
4348 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4349 }
4350
4351 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4352 // is set.
4353 // The double arguments are stored to the VarArgsFrameIndex
4354 // on the stack.
4355 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4356 // Get an existing live-in vreg, or add a new one.
4357 Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: FPArgRegs[FPRIndex]);
4358 if (!VReg)
4359 VReg = MF.addLiveIn(PReg: FPArgRegs[FPRIndex], RC: &PPC::F8RCRegClass);
4360
4361 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::f64);
4362 SDValue Store =
4363 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4364 MemOps.push_back(Elt: Store);
4365 // Increment the address by eight for the next argument to store
4366 SDValue PtrOff = DAG.getConstant(Val: MVT(MVT::f64).getSizeInBits()/8, DL: dl,
4367 VT: PtrVT);
4368 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4369 }
4370 }
4371
4372 if (!MemOps.empty())
4373 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4374
4375 return Chain;
4376}
4377
4378// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4379// value to MVT::i64 and then truncate to the correct register size.
4380SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4381 EVT ObjectVT, SelectionDAG &DAG,
4382 SDValue ArgVal,
4383 const SDLoc &dl) const {
4384 if (Flags.isSExt())
4385 ArgVal = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: MVT::i64, N1: ArgVal,
4386 N2: DAG.getValueType(ObjectVT));
4387 else if (Flags.isZExt())
4388 ArgVal = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: MVT::i64, N1: ArgVal,
4389 N2: DAG.getValueType(ObjectVT));
4390
4391 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ObjectVT, Operand: ArgVal);
4392}
4393
4394SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4395 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4396 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4397 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4398 // TODO: add description of PPC stack frame format, or at least some docs.
4399 //
4400 bool isELFv2ABI = Subtarget.isELFv2ABI();
4401 bool isLittleEndian = Subtarget.isLittleEndian();
4402 MachineFunction &MF = DAG.getMachineFunction();
4403 MachineFrameInfo &MFI = MF.getFrameInfo();
4404 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4405
4406 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4407 "fastcc not supported on varargs functions");
4408
4409 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4410 // Potential tail calls could cause overwriting of argument stack slots.
4411 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4412 (CallConv == CallingConv::Fast));
4413 unsigned PtrByteSize = 8;
4414 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4415
4416 static const MCPhysReg GPR[] = {
4417 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4418 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4419 };
4420 static const MCPhysReg VR[] = {
4421 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4422 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4423 };
4424
4425 const unsigned Num_GPR_Regs = std::size(GPR);
4426 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4427 const unsigned Num_VR_Regs = std::size(VR);
4428
4429 // Do a first pass over the arguments to determine whether the ABI
4430 // guarantees that our caller has allocated the parameter save area
4431 // on its stack frame. In the ELFv1 ABI, this is always the case;
4432 // in the ELFv2 ABI, it is true if this is a vararg function or if
4433 // any parameter is located in a stack slot.
4434
4435 bool HasParameterArea = !isELFv2ABI || isVarArg;
4436 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4437 unsigned NumBytes = LinkageSize;
4438 unsigned AvailableFPRs = Num_FPR_Regs;
4439 unsigned AvailableVRs = Num_VR_Regs;
4440 for (const ISD::InputArg &In : Ins) {
4441 if (In.Flags.isNest())
4442 continue;
4443
4444 if (CalculateStackSlotUsed(ArgVT: In.VT, OrigVT: In.ArgVT, Flags: In.Flags, PtrByteSize,
4445 LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4446 AvailableFPRs, AvailableVRs))
4447 HasParameterArea = true;
4448 }
4449
4450 // Add DAG nodes to load the arguments or copy them out of registers. On
4451 // entry to a function on PPC, the arguments start after the linkage area,
4452 // although the first ones are often in registers.
4453
4454 unsigned ArgOffset = LinkageSize;
4455 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4456 SmallVector<SDValue, 8> MemOps;
4457 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4458 unsigned CurArgIdx = 0;
4459 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4460 SDValue ArgVal;
4461 bool needsLoad = false;
4462 EVT ObjectVT = Ins[ArgNo].VT;
4463 EVT OrigVT = Ins[ArgNo].ArgVT;
4464 unsigned ObjSize = ObjectVT.getStoreSize();
4465 unsigned ArgSize = ObjSize;
4466 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4467 if (Ins[ArgNo].isOrigArg()) {
4468 std::advance(i&: FuncArg, n: Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4469 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4470 }
4471 // We re-align the argument offset for each argument, except when using the
4472 // fast calling convention, when we need to make sure we do that only when
4473 // we'll actually use a stack slot.
4474 unsigned CurArgOffset;
4475 Align Alignment;
4476 auto ComputeArgOffset = [&]() {
4477 /* Respect alignment of argument on the stack. */
4478 Alignment =
4479 CalculateStackSlotAlignment(ArgVT: ObjectVT, OrigVT, Flags, PtrByteSize);
4480 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4481 CurArgOffset = ArgOffset;
4482 };
4483
4484 if (CallConv != CallingConv::Fast) {
4485 ComputeArgOffset();
4486
4487 /* Compute GPR index associated with argument offset. */
4488 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4489 GPR_idx = std::min(a: GPR_idx, b: Num_GPR_Regs);
4490 }
4491
4492 // FIXME the codegen can be much improved in some cases.
4493 // We do not have to keep everything in memory.
4494 if (Flags.isByVal()) {
4495 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4496
4497 if (CallConv == CallingConv::Fast)
4498 ComputeArgOffset();
4499
4500 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4501 ObjSize = Flags.getByValSize();
4502 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4503 // Empty aggregate parameters do not take up registers. Examples:
4504 // struct { } a;
4505 // union { } b;
4506 // int c[0];
4507 // etc. However, we have to provide a place-holder in InVals, so
4508 // pretend we have an 8-byte item at the current address for that
4509 // purpose.
4510 if (!ObjSize) {
4511 int FI = MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: ArgOffset, IsImmutable: true);
4512 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4513 InVals.push_back(Elt: FIN);
4514 continue;
4515 }
4516
4517 // Create a stack object covering all stack doublewords occupied
4518 // by the argument. If the argument is (fully or partially) on
4519 // the stack, or if the argument is fully in registers but the
4520 // caller has allocated the parameter save anyway, we can refer
4521 // directly to the caller's stack frame. Otherwise, create a
4522 // local copy in our own frame.
4523 int FI;
4524 if (HasParameterArea ||
4525 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4526 FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: false, isAliased: true);
4527 else
4528 FI = MFI.CreateStackObject(Size: ArgSize, Alignment, isSpillSlot: false);
4529 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4530
4531 // Handle aggregates smaller than 8 bytes.
4532 if (ObjSize < PtrByteSize) {
4533 // The value of the object is its address, which differs from the
4534 // address of the enclosing doubleword on big-endian systems.
4535 SDValue Arg = FIN;
4536 if (!isLittleEndian) {
4537 SDValue ArgOff = DAG.getConstant(Val: PtrByteSize - ObjSize, DL: dl, VT: PtrVT);
4538 Arg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ArgOff.getValueType(), N1: Arg, N2: ArgOff);
4539 }
4540 InVals.push_back(Elt: Arg);
4541
4542 if (GPR_idx != Num_GPR_Regs) {
4543 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4544 FuncInfo->addLiveInAttr(VReg, Flags);
4545 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4546 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ObjSize * 8);
4547 SDValue Store =
4548 DAG.getTruncStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: Arg,
4549 PtrInfo: MachinePointerInfo(&*FuncArg), SVT: ObjType);
4550 MemOps.push_back(Elt: Store);
4551 }
4552 // Whether we copied from a register or not, advance the offset
4553 // into the parameter save area by a full doubleword.
4554 ArgOffset += PtrByteSize;
4555 continue;
4556 }
4557
4558 // The value of the object is its address, which is the address of
4559 // its first stack doubleword.
4560 InVals.push_back(Elt: FIN);
4561
4562 // Store whatever pieces of the object are in registers to memory.
4563 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4564 if (GPR_idx == Num_GPR_Regs)
4565 break;
4566
4567 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4568 FuncInfo->addLiveInAttr(VReg, Flags);
4569 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4570 SDValue Addr = FIN;
4571 if (j) {
4572 SDValue Off = DAG.getConstant(Val: j, DL: dl, VT: PtrVT);
4573 Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Off.getValueType(), N1: Addr, N2: Off);
4574 }
4575 unsigned StoreSizeInBits = std::min(a: PtrByteSize, b: (ObjSize - j)) * 8;
4576 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreSizeInBits);
4577 SDValue Store =
4578 DAG.getTruncStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: Addr,
4579 PtrInfo: MachinePointerInfo(&*FuncArg, j), SVT: ObjType);
4580 MemOps.push_back(Elt: Store);
4581 ++GPR_idx;
4582 }
4583 ArgOffset += ArgSize;
4584 continue;
4585 }
4586
4587 switch (ObjectVT.getSimpleVT().SimpleTy) {
4588 default: llvm_unreachable("Unhandled argument type!");
4589 case MVT::i1:
4590 case MVT::i32:
4591 case MVT::i64:
4592 if (Flags.isNest()) {
4593 // The 'nest' parameter, if any, is passed in R11.
4594 Register VReg = MF.addLiveIn(PReg: PPC::X11, RC: &PPC::G8RCRegClass);
4595 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4596
4597 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4598 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4599
4600 break;
4601 }
4602
4603 // These can be scalar arguments or elements of an integer array type
4604 // passed directly. Clang may use those instead of "byval" aggregate
4605 // types to avoid forcing arguments to memory unnecessarily.
4606 if (GPR_idx != Num_GPR_Regs) {
4607 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4608 FuncInfo->addLiveInAttr(VReg, Flags);
4609 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4610
4611 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4612 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4613 // value to MVT::i64 and then truncate to the correct register size.
4614 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4615 } else {
4616 if (CallConv == CallingConv::Fast)
4617 ComputeArgOffset();
4618
4619 needsLoad = true;
4620 ArgSize = PtrByteSize;
4621 }
4622 if (CallConv != CallingConv::Fast || needsLoad)
4623 ArgOffset += 8;
4624 break;
4625
4626 case MVT::f32:
4627 case MVT::f64:
4628 // These can be scalar arguments or elements of a float array type
4629 // passed directly. The latter are used to implement ELFv2 homogenous
4630 // float aggregates.
4631 if (FPR_idx != Num_FPR_Regs) {
4632 unsigned VReg;
4633
4634 if (ObjectVT == MVT::f32)
4635 VReg = MF.addLiveIn(PReg: FPR[FPR_idx],
4636 RC: Subtarget.hasP8Vector()
4637 ? &PPC::VSSRCRegClass
4638 : &PPC::F4RCRegClass);
4639 else
4640 VReg = MF.addLiveIn(PReg: FPR[FPR_idx], RC: Subtarget.hasVSX()
4641 ? &PPC::VSFRCRegClass
4642 : &PPC::F8RCRegClass);
4643
4644 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4645 ++FPR_idx;
4646 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4647 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4648 // once we support fp <-> gpr moves.
4649
4650 // This can only ever happen in the presence of f32 array types,
4651 // since otherwise we never run out of FPRs before running out
4652 // of GPRs.
4653 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4654 FuncInfo->addLiveInAttr(VReg, Flags);
4655 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4656
4657 if (ObjectVT == MVT::f32) {
4658 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4659 ArgVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgVal,
4660 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
4661 ArgVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: ArgVal);
4662 }
4663
4664 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ObjectVT, Operand: ArgVal);
4665 } else {
4666 if (CallConv == CallingConv::Fast)
4667 ComputeArgOffset();
4668
4669 needsLoad = true;
4670 }
4671
4672 // When passing an array of floats, the array occupies consecutive
4673 // space in the argument area; only round up to the next doubleword
4674 // at the end of the array. Otherwise, each float takes 8 bytes.
4675 if (CallConv != CallingConv::Fast || needsLoad) {
4676 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4677 ArgOffset += ArgSize;
4678 if (Flags.isInConsecutiveRegsLast())
4679 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4680 }
4681 break;
4682 case MVT::v4f32:
4683 case MVT::v4i32:
4684 case MVT::v8i16:
4685 case MVT::v16i8:
4686 case MVT::v2f64:
4687 case MVT::v2i64:
4688 case MVT::v1i128:
4689 case MVT::f128:
4690 // These can be scalar arguments or elements of a vector array type
4691 // passed directly. The latter are used to implement ELFv2 homogenous
4692 // vector aggregates.
4693 if (VR_idx != Num_VR_Regs) {
4694 Register VReg = MF.addLiveIn(PReg: VR[VR_idx], RC: &PPC::VRRCRegClass);
4695 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4696 ++VR_idx;
4697 } else {
4698 if (CallConv == CallingConv::Fast)
4699 ComputeArgOffset();
4700 needsLoad = true;
4701 }
4702 if (CallConv != CallingConv::Fast || needsLoad)
4703 ArgOffset += 16;
4704 break;
4705 }
4706
4707 // We need to load the argument to a virtual register if we determined
4708 // above that we ran out of physical registers of the appropriate type.
4709 if (needsLoad) {
4710 if (ObjSize < ArgSize && !isLittleEndian)
4711 CurArgOffset += ArgSize - ObjSize;
4712 int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: CurArgOffset, IsImmutable: isImmutable);
4713 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4714 ArgVal = DAG.getLoad(VT: ObjectVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo());
4715 }
4716
4717 InVals.push_back(Elt: ArgVal);
4718 }
4719
4720 // Area that is at least reserved in the caller of this function.
4721 unsigned MinReservedArea;
4722 if (HasParameterArea)
4723 MinReservedArea = std::max(a: ArgOffset, b: LinkageSize + 8 * PtrByteSize);
4724 else
4725 MinReservedArea = LinkageSize;
4726
4727 // Set the size that is at least reserved in caller of this function. Tail
4728 // call optimized functions' reserved stack space needs to be aligned so that
4729 // taking the difference between two stack areas will result in an aligned
4730 // stack.
4731 MinReservedArea =
4732 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4733 FuncInfo->setMinReservedArea(MinReservedArea);
4734
4735 // If the function takes variable number of arguments, make a frame index for
4736 // the start of the first vararg value... for expansion of llvm.va_start.
4737 // On ELFv2ABI spec, it writes:
4738 // C programs that are intended to be *portable* across different compilers
4739 // and architectures must use the header file <stdarg.h> to deal with variable
4740 // argument lists.
4741 if (isVarArg && MFI.hasVAStart()) {
4742 int Depth = ArgOffset;
4743
4744 FuncInfo->setVarArgsFrameIndex(
4745 MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: Depth, IsImmutable: true));
4746 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4747
4748 // If this function is vararg, store any remaining integer argument regs
4749 // to their spots on the stack so that they may be loaded by dereferencing
4750 // the result of va_next.
4751 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4752 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4753 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4754 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4755 SDValue Store =
4756 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4757 MemOps.push_back(Elt: Store);
4758 // Increment the address by four for the next argument to store
4759 SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
4760 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4761 }
4762 }
4763
4764 if (!MemOps.empty())
4765 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4766
4767 return Chain;
4768}
4769
4770/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4771/// adjusted to accommodate the arguments for the tailcall.
4772static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4773 unsigned ParamSize) {
4774
4775 if (!isTailCall) return 0;
4776
4777 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4778 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4779 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4780 // Remember only if the new adjustment is bigger.
4781 if (SPDiff < FI->getTailCallSPDelta())
4782 FI->setTailCallSPDelta(SPDiff);
4783
4784 return SPDiff;
4785}
4786
4787static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4788
4789static bool callsShareTOCBase(const Function *Caller,
4790 const GlobalValue *CalleeGV,
4791 const TargetMachine &TM) {
4792 // It does not make sense to call callsShareTOCBase() with a caller that
4793 // is PC Relative since PC Relative callers do not have a TOC.
4794#ifndef NDEBUG
4795 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4796 assert(!STICaller->isUsingPCRelativeCalls() &&
4797 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4798#endif
4799
4800 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4801 // don't have enough information to determine if the caller and callee share
4802 // the same TOC base, so we have to pessimistically assume they don't for
4803 // correctness.
4804 if (!CalleeGV)
4805 return false;
4806
4807 // If the callee is preemptable, then the static linker will use a plt-stub
4808 // which saves the toc to the stack, and needs a nop after the call
4809 // instruction to convert to a toc-restore.
4810 if (!TM.shouldAssumeDSOLocal(GV: CalleeGV))
4811 return false;
4812
4813 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4814 // We may need a TOC restore in the situation where the caller requires a
4815 // valid TOC but the callee is PC Relative and does not.
4816 const Function *F = dyn_cast<Function>(Val: CalleeGV);
4817 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(Val: CalleeGV);
4818
4819 // If we have an Alias we can try to get the function from there.
4820 if (Alias) {
4821 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4822 F = dyn_cast<Function>(Val: GlobalObj);
4823 }
4824
4825 // If we still have no valid function pointer we do not have enough
4826 // information to determine if the callee uses PC Relative calls so we must
4827 // assume that it does.
4828 if (!F)
4829 return false;
4830
4831 // If the callee uses PC Relative we cannot guarantee that the callee won't
4832 // clobber the TOC of the caller and so we must assume that the two
4833 // functions do not share a TOC base.
4834 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(F: *F);
4835 if (STICallee->isUsingPCRelativeCalls())
4836 return false;
4837
4838 // If the GV is not a strong definition then we need to assume it can be
4839 // replaced by another function at link time. The function that replaces
4840 // it may not share the same TOC as the caller since the callee may be
4841 // replaced by a PC Relative version of the same function.
4842 if (!CalleeGV->isStrongDefinitionForLinker())
4843 return false;
4844
4845 // The medium and large code models are expected to provide a sufficiently
4846 // large TOC to provide all data addressing needs of a module with a
4847 // single TOC.
4848 if (CodeModel::Medium == TM.getCodeModel() ||
4849 CodeModel::Large == TM.getCodeModel())
4850 return true;
4851
4852 // Any explicitly-specified sections and section prefixes must also match.
4853 // Also, if we're using -ffunction-sections, then each function is always in
4854 // a different section (the same is true for COMDAT functions).
4855 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4856 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4857 return false;
4858 if (const auto *F = dyn_cast<Function>(Val: CalleeGV)) {
4859 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4860 return false;
4861 }
4862
4863 return true;
4864}
4865
4866static bool
4867needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4868 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4869 assert(Subtarget.is64BitELFABI());
4870
4871 const unsigned PtrByteSize = 8;
4872 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4873
4874 static const MCPhysReg GPR[] = {
4875 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4876 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4877 };
4878 static const MCPhysReg VR[] = {
4879 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4880 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4881 };
4882
4883 const unsigned NumGPRs = std::size(GPR);
4884 const unsigned NumFPRs = 13;
4885 const unsigned NumVRs = std::size(VR);
4886 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4887
4888 unsigned NumBytes = LinkageSize;
4889 unsigned AvailableFPRs = NumFPRs;
4890 unsigned AvailableVRs = NumVRs;
4891
4892 for (const ISD::OutputArg& Param : Outs) {
4893 if (Param.Flags.isNest()) continue;
4894
4895 if (CalculateStackSlotUsed(ArgVT: Param.VT, OrigVT: Param.ArgVT, Flags: Param.Flags, PtrByteSize,
4896 LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4897 AvailableFPRs, AvailableVRs))
4898 return true;
4899 }
4900 return false;
4901}
4902
4903static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4904 if (CB.arg_size() != CallerFn->arg_size())
4905 return false;
4906
4907 auto CalleeArgIter = CB.arg_begin();
4908 auto CalleeArgEnd = CB.arg_end();
4909 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4910
4911 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4912 const Value* CalleeArg = *CalleeArgIter;
4913 const Value* CallerArg = &(*CallerArgIter);
4914 if (CalleeArg == CallerArg)
4915 continue;
4916
4917 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4918 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4919 // }
4920 // 1st argument of callee is undef and has the same type as caller.
4921 if (CalleeArg->getType() == CallerArg->getType() &&
4922 isa<UndefValue>(Val: CalleeArg))
4923 continue;
4924
4925 return false;
4926 }
4927
4928 return true;
4929}
4930
4931// Returns true if TCO is possible between the callers and callees
4932// calling conventions.
4933static bool
4934areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4935 CallingConv::ID CalleeCC) {
4936 // Tail calls are possible with fastcc and ccc.
4937 auto isTailCallableCC = [] (CallingConv::ID CC){
4938 return CC == CallingConv::C || CC == CallingConv::Fast;
4939 };
4940 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4941 return false;
4942
4943 // We can safely tail call both fastcc and ccc callees from a c calling
4944 // convention caller. If the caller is fastcc, we may have less stack space
4945 // than a non-fastcc caller with the same signature so disable tail-calls in
4946 // that case.
4947 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4948}
4949
4950bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4951 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4952 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4953 const SmallVectorImpl<ISD::OutputArg> &Outs,
4954 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4955 bool isCalleeExternalSymbol) const {
4956 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4957
4958 if (DisableSCO && !TailCallOpt) return false;
4959
4960 // Variadic argument functions are not supported.
4961 if (isVarArg) return false;
4962
4963 // Check that the calling conventions are compatible for tco.
4964 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4965 return false;
4966
4967 // Caller contains any byval parameter is not supported.
4968 if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4969 return false;
4970
4971 // Callee contains any byval parameter is not supported, too.
4972 // Note: This is a quick work around, because in some cases, e.g.
4973 // caller's stack size > callee's stack size, we are still able to apply
4974 // sibling call optimization. For example, gcc is able to do SCO for caller1
4975 // in the following example, but not for caller2.
4976 // struct test {
4977 // long int a;
4978 // char ary[56];
4979 // } gTest;
4980 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4981 // b->a = v.a;
4982 // return 0;
4983 // }
4984 // void caller1(struct test a, struct test c, struct test *b) {
4985 // callee(gTest, b); }
4986 // void caller2(struct test *b) { callee(gTest, b); }
4987 if (any_of(Range: Outs, P: [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4988 return false;
4989
4990 // If callee and caller use different calling conventions, we cannot pass
4991 // parameters on stack since offsets for the parameter area may be different.
4992 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4993 return false;
4994
4995 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4996 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4997 // callee potentially have different TOC bases then we cannot tail call since
4998 // we need to restore the TOC pointer after the call.
4999 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5000 // We cannot guarantee this for indirect calls or calls to external functions.
5001 // When PC-Relative addressing is used, the concept of the TOC is no longer
5002 // applicable so this check is not required.
5003 // Check first for indirect calls.
5004 if (!Subtarget.isUsingPCRelativeCalls() &&
5005 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5006 return false;
5007
5008 // Check if we share the TOC base.
5009 if (!Subtarget.isUsingPCRelativeCalls() &&
5010 !callsShareTOCBase(Caller: CallerFunc, CalleeGV, TM: getTargetMachine()))
5011 return false;
5012
5013 // TCO allows altering callee ABI, so we don't have to check further.
5014 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5015 return true;
5016
5017 if (DisableSCO) return false;
5018
5019 // If callee use the same argument list that caller is using, then we can
5020 // apply SCO on this case. If it is not, then we need to check if callee needs
5021 // stack for passing arguments.
5022 // PC Relative tail calls may not have a CallBase.
5023 // If there is no CallBase we cannot verify if we have the same argument
5024 // list so assume that we don't have the same argument list.
5025 if (CB && !hasSameArgumentList(CallerFn: CallerFunc, CB: *CB) &&
5026 needStackSlotPassParameters(Subtarget, Outs))
5027 return false;
5028 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5029 return false;
5030
5031 return true;
5032}
5033
5034/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5035/// for tail call optimization. Targets which want to do tail call
5036/// optimization should implement this function.
5037bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5038 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5039 CallingConv::ID CallerCC, bool isVarArg,
5040 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5041 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5042 return false;
5043
5044 // Variable argument functions are not supported.
5045 if (isVarArg)
5046 return false;
5047
5048 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5049 // Functions containing by val parameters are not supported.
5050 if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5051 return false;
5052
5053 // Non-PIC/GOT tail calls are supported.
5054 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5055 return true;
5056
5057 // At the moment we can only do local tail calls (in same module, hidden
5058 // or protected) if we are generating PIC.
5059 if (CalleeGV)
5060 return CalleeGV->hasHiddenVisibility() ||
5061 CalleeGV->hasProtectedVisibility();
5062 }
5063
5064 return false;
5065}
5066
5067/// isCallCompatibleAddress - Return the immediate to use if the specified
5068/// 32-bit value is representable in the immediate field of a BxA instruction.
5069static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5070 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Op);
5071 if (!C) return nullptr;
5072
5073 int Addr = C->getZExtValue();
5074 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5075 SignExtend32<26>(X: Addr) != Addr)
5076 return nullptr; // Top 6 bits have to be sext of immediate.
5077
5078 return DAG
5079 .getSignedConstant(
5080 Val: (int)C->getZExtValue() >> 2, DL: SDLoc(Op),
5081 VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()))
5082 .getNode();
5083}
5084
5085namespace {
5086
5087struct TailCallArgumentInfo {
5088 SDValue Arg;
5089 SDValue FrameIdxOp;
5090 int FrameIdx = 0;
5091
5092 TailCallArgumentInfo() = default;
5093};
5094
5095} // end anonymous namespace
5096
5097/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5098static void StoreTailCallArgumentsToStackSlot(
5099 SelectionDAG &DAG, SDValue Chain,
5100 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5101 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5102 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5103 SDValue Arg = TailCallArgs[i].Arg;
5104 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5105 int FI = TailCallArgs[i].FrameIdx;
5106 // Store relative to framepointer.
5107 MemOpChains.push_back(Elt: DAG.getStore(
5108 Chain, dl, Val: Arg, Ptr: FIN,
5109 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
5110 }
5111}
5112
5113/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5114/// the appropriate stack slot for the tail call optimized function call.
5115static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5116 SDValue OldRetAddr, SDValue OldFP,
5117 int SPDiff, const SDLoc &dl) {
5118 if (SPDiff) {
5119 // Calculate the new stack slot for the return address.
5120 MachineFunction &MF = DAG.getMachineFunction();
5121 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5122 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5123 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5124 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5125 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(Size: SlotSize,
5126 SPOffset: NewRetAddrLoc, IsImmutable: true);
5127 SDValue NewRetAddrFrIdx =
5128 DAG.getFrameIndex(FI: NewRetAddr, VT: Subtarget.getScalarIntVT());
5129 Chain = DAG.getStore(Chain, dl, Val: OldRetAddr, Ptr: NewRetAddrFrIdx,
5130 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: NewRetAddr));
5131 }
5132 return Chain;
5133}
5134
5135/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5136/// the position of the argument.
5137static void CalculateTailCallArgDest(
5138 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5139 int SPDiff, unsigned ArgOffset,
5140 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5141 int Offset = ArgOffset + SPDiff;
5142 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5143 int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
5144 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5145 SDValue FIN = DAG.getFrameIndex(FI, VT);
5146 TailCallArgumentInfo Info;
5147 Info.Arg = Arg;
5148 Info.FrameIdxOp = FIN;
5149 Info.FrameIdx = FI;
5150 TailCallArguments.push_back(Elt: Info);
5151}
5152
5153/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5154/// stack slot. Returns the chain as result and the loaded frame pointers in
5155/// LROpOut/FPOpout. Used when tail calling.
5156SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5157 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5158 SDValue &FPOpOut, const SDLoc &dl) const {
5159 if (SPDiff) {
5160 // Load the LR and FP stack slot for later adjusting.
5161 LROpOut = getReturnAddrFrameIndex(DAG);
5162 LROpOut = DAG.getLoad(VT: Subtarget.getScalarIntVT(), dl, Chain, Ptr: LROpOut,
5163 PtrInfo: MachinePointerInfo());
5164 Chain = SDValue(LROpOut.getNode(), 1);
5165 }
5166 return Chain;
5167}
5168
5169/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5170/// by "Src" to address "Dst" of size "Size". Alignment information is
5171/// specified by the specific parameter attribute. The copy will be passed as
5172/// a byval function parameter.
5173/// Sometimes what we are copying is the end of a larger object, the part that
5174/// does not fit in registers.
5175static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5176 SDValue Chain, ISD::ArgFlagsTy Flags,
5177 SelectionDAG &DAG, const SDLoc &dl) {
5178 SDValue SizeNode = DAG.getConstant(Val: Flags.getByValSize(), DL: dl, VT: MVT::i32);
5179 return DAG.getMemcpy(
5180 Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(), isVol: false, AlwaysInline: false,
5181 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
5182}
5183
5184/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5185/// tail calls.
5186static void LowerMemOpCallTo(
5187 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5188 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5189 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5190 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5191 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5192 if (!isTailCall) {
5193 if (isVector) {
5194 SDValue StackPtr;
5195 if (isPPC64)
5196 StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
5197 else
5198 StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5199 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr,
5200 N2: DAG.getConstant(Val: ArgOffset, DL: dl, VT: PtrVT));
5201 }
5202 MemOpChains.push_back(
5203 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
5204 // Calculate and remember argument location.
5205 } else
5206 CalculateTailCallArgDest(DAG, MF, IsPPC64: isPPC64, Arg, SPDiff, ArgOffset,
5207 TailCallArguments);
5208}
5209
5210static void
5211PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5212 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5213 SDValue FPOp,
5214 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5215 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5216 // might overwrite each other in case of tail call optimization.
5217 SmallVector<SDValue, 8> MemOpChains2;
5218 // Do not flag preceding copytoreg stuff together with the following stuff.
5219 InGlue = SDValue();
5220 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArgs: TailCallArguments,
5221 MemOpChains&: MemOpChains2, dl);
5222 if (!MemOpChains2.empty())
5223 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
5224
5225 // Store the return address to the appropriate stack slot.
5226 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, OldRetAddr: LROp, OldFP: FPOp, SPDiff, dl);
5227
5228 // Emit callseq_end just before tailcall node.
5229 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue: InGlue, DL: dl);
5230 InGlue = Chain.getValue(R: 1);
5231}
5232
5233// Is this global address that of a function that can be called by name? (as
5234// opposed to something that must hold a descriptor for an indirect call).
5235static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5236 if (GV) {
5237 if (GV->isThreadLocal())
5238 return false;
5239
5240 return GV->getValueType()->isFunctionTy();
5241 }
5242
5243 return false;
5244}
5245
5246SDValue PPCTargetLowering::LowerCallResult(
5247 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5248 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5249 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5250 SmallVector<CCValAssign, 16> RVLocs;
5251 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5252 *DAG.getContext());
5253
5254 CCRetInfo.AnalyzeCallResult(
5255 Ins, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5256 ? RetCC_PPC_Cold
5257 : RetCC_PPC);
5258
5259 // Copy all of the result registers out of their specified physreg.
5260 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5261 CCValAssign &VA = RVLocs[i];
5262 assert(VA.isRegLoc() && "Can only return in registers!");
5263
5264 SDValue Val;
5265
5266 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5267 SDValue Lo = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5268 Glue: InGlue);
5269 Chain = Lo.getValue(R: 1);
5270 InGlue = Lo.getValue(R: 2);
5271 VA = RVLocs[++i]; // skip ahead to next loc
5272 SDValue Hi = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5273 Glue: InGlue);
5274 Chain = Hi.getValue(R: 1);
5275 InGlue = Hi.getValue(R: 2);
5276 if (!Subtarget.isLittleEndian())
5277 std::swap (a&: Lo, b&: Hi);
5278 Val = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
5279 } else {
5280 Val = DAG.getCopyFromReg(Chain, dl,
5281 Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue: InGlue);
5282 Chain = Val.getValue(R: 1);
5283 InGlue = Val.getValue(R: 2);
5284 }
5285
5286 switch (VA.getLocInfo()) {
5287 default: llvm_unreachable("Unknown loc info!");
5288 case CCValAssign::Full: break;
5289 case CCValAssign::AExt:
5290 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5291 break;
5292 case CCValAssign::ZExt:
5293 Val = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: VA.getLocVT(), N1: Val,
5294 N2: DAG.getValueType(VA.getValVT()));
5295 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5296 break;
5297 case CCValAssign::SExt:
5298 Val = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: VA.getLocVT(), N1: Val,
5299 N2: DAG.getValueType(VA.getValVT()));
5300 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5301 break;
5302 }
5303
5304 InVals.push_back(Elt: Val);
5305 }
5306
5307 return Chain;
5308}
5309
5310static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5311 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5312 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5313 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5314
5315 // PatchPoint calls are not indirect.
5316 if (isPatchPoint)
5317 return false;
5318
5319 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Val: Callee))
5320 return false;
5321
5322 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5323 // becuase the immediate function pointer points to a descriptor instead of
5324 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5325 // pointer immediate points to the global entry point, while the BLA would
5326 // need to jump to the local entry point (see rL211174).
5327 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5328 isBLACompatibleAddress(Op: Callee, DAG))
5329 return false;
5330
5331 return true;
5332}
5333
5334// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5335static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5336 return Subtarget.isAIXABI() ||
5337 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5338}
5339
5340static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5341 const Function &Caller, const SDValue &Callee,
5342 const PPCSubtarget &Subtarget,
5343 const TargetMachine &TM,
5344 bool IsStrictFPCall = false) {
5345 if (CFlags.IsTailCall)
5346 return PPCISD::TC_RETURN;
5347
5348 unsigned RetOpc = 0;
5349 // This is a call through a function pointer.
5350 if (CFlags.IsIndirect) {
5351 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5352 // indirect calls. The save of the caller's TOC pointer to the stack will be
5353 // inserted into the DAG as part of call lowering. The restore of the TOC
5354 // pointer is modeled by using a pseudo instruction for the call opcode that
5355 // represents the 2 instruction sequence of an indirect branch and link,
5356 // immediately followed by a load of the TOC pointer from the stack save
5357 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5358 // as it is not saved or used.
5359 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5360 : PPCISD::BCTRL;
5361 } else if (Subtarget.isUsingPCRelativeCalls()) {
5362 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5363 RetOpc = PPCISD::CALL_NOTOC;
5364 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5365 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5366 // immediately following the call instruction if the caller and callee may
5367 // have different TOC bases. At link time if the linker determines the calls
5368 // may not share a TOC base, the call is redirected to a trampoline inserted
5369 // by the linker. The trampoline will (among other things) save the callers
5370 // TOC pointer at an ABI designated offset in the linkage area and the
5371 // linker will rewrite the nop to be a load of the TOC pointer from the
5372 // linkage area into gpr2.
5373 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5374 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5375 RetOpc =
5376 callsShareTOCBase(Caller: &Caller, CalleeGV: GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5377 } else
5378 RetOpc = PPCISD::CALL;
5379 if (IsStrictFPCall) {
5380 switch (RetOpc) {
5381 default:
5382 llvm_unreachable("Unknown call opcode");
5383 case PPCISD::BCTRL_LOAD_TOC:
5384 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5385 break;
5386 case PPCISD::BCTRL:
5387 RetOpc = PPCISD::BCTRL_RM;
5388 break;
5389 case PPCISD::CALL_NOTOC:
5390 RetOpc = PPCISD::CALL_NOTOC_RM;
5391 break;
5392 case PPCISD::CALL:
5393 RetOpc = PPCISD::CALL_RM;
5394 break;
5395 case PPCISD::CALL_NOP:
5396 RetOpc = PPCISD::CALL_NOP_RM;
5397 break;
5398 }
5399 }
5400 return RetOpc;
5401}
5402
5403static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5404 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5405 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5406 if (SDNode *Dest = isBLACompatibleAddress(Op: Callee, DAG))
5407 return SDValue(Dest, 0);
5408
5409 // Returns true if the callee is local, and false otherwise.
5410 auto isLocalCallee = [&]() {
5411 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5412 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5413
5414 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5415 !isa_and_nonnull<GlobalIFunc>(Val: GV);
5416 };
5417
5418 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5419 // a static relocation model causes some versions of GNU LD (2.17.50, at
5420 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5421 // built with secure-PLT.
5422 bool UsePlt =
5423 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5424 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5425
5426 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5427 const TargetMachine &TM = Subtarget.getTargetMachine();
5428 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5429 auto *S =
5430 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(Func: GV, TM));
5431
5432 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5433 return DAG.getMCSymbol(Sym: S, VT: PtrVT);
5434 };
5435
5436 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5437 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5438 if (isFunctionGlobalAddress(GV)) {
5439 const GlobalValue *GV = cast<GlobalAddressSDNode>(Val: Callee)->getGlobal();
5440
5441 if (Subtarget.isAIXABI()) {
5442 return getAIXFuncEntryPointSymbolSDNode(GV);
5443 }
5444 return DAG.getTargetGlobalAddress(GV, DL: dl, VT: Callee.getValueType(), offset: 0,
5445 TargetFlags: UsePlt ? PPCII::MO_PLT : 0);
5446 }
5447
5448 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val: Callee)) {
5449 const char *SymName = S->getSymbol();
5450 if (Subtarget.isAIXABI()) {
5451 // If there exists a user-declared function whose name is the same as the
5452 // ExternalSymbol's, then we pick up the user-declared version.
5453 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5454 if (const Function *F =
5455 dyn_cast_or_null<Function>(Val: Mod->getNamedValue(Name: SymName)))
5456 return getAIXFuncEntryPointSymbolSDNode(F);
5457
5458 // On AIX, direct function calls reference the symbol for the function's
5459 // entry point, which is named by prepending a "." before the function's
5460 // C-linkage name. A Qualname is returned here because an external
5461 // function entry point is a csect with XTY_ER property.
5462 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5463 auto &Context = DAG.getMachineFunction().getContext();
5464 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5465 Section: (Twine(".") + Twine(SymName)).str(), K: SectionKind::getMetadata(),
5466 CsectProp: XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5467 return Sec->getQualNameSymbol();
5468 };
5469
5470 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5471 }
5472 return DAG.getTargetExternalSymbol(Sym: SymName, VT: Callee.getValueType(),
5473 TargetFlags: UsePlt ? PPCII::MO_PLT : 0);
5474 }
5475
5476 // No transformation needed.
5477 assert(Callee.getNode() && "What no callee?");
5478 return Callee;
5479}
5480
5481static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5482 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5483 "Expected a CALLSEQ_STARTSDNode.");
5484
5485 // The last operand is the chain, except when the node has glue. If the node
5486 // has glue, then the last operand is the glue, and the chain is the second
5487 // last operand.
5488 SDValue LastValue = CallSeqStart.getValue(R: CallSeqStart->getNumValues() - 1);
5489 if (LastValue.getValueType() != MVT::Glue)
5490 return LastValue;
5491
5492 return CallSeqStart.getValue(R: CallSeqStart->getNumValues() - 2);
5493}
5494
5495// Creates the node that moves a functions address into the count register
5496// to prepare for an indirect call instruction.
5497static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5498 SDValue &Glue, SDValue &Chain,
5499 const SDLoc &dl) {
5500 SDValue MTCTROps[] = {Chain, Callee, Glue};
5501 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5502 Chain = DAG.getNode(Opcode: PPCISD::MTCTR, DL: dl, ResultTys: ReturnTypes,
5503 Ops: ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5504 // The glue is the second value produced.
5505 Glue = Chain.getValue(R: 1);
5506}
5507
5508static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5509 SDValue &Glue, SDValue &Chain,
5510 SDValue CallSeqStart,
5511 const CallBase *CB, const SDLoc &dl,
5512 bool hasNest,
5513 const PPCSubtarget &Subtarget) {
5514 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5515 // entry point, but to the function descriptor (the function entry point
5516 // address is part of the function descriptor though).
5517 // The function descriptor is a three doubleword structure with the
5518 // following fields: function entry point, TOC base address and
5519 // environment pointer.
5520 // Thus for a call through a function pointer, the following actions need
5521 // to be performed:
5522 // 1. Save the TOC of the caller in the TOC save area of its stack
5523 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5524 // 2. Load the address of the function entry point from the function
5525 // descriptor.
5526 // 3. Load the TOC of the callee from the function descriptor into r2.
5527 // 4. Load the environment pointer from the function descriptor into
5528 // r11.
5529 // 5. Branch to the function entry point address.
5530 // 6. On return of the callee, the TOC of the caller needs to be
5531 // restored (this is done in FinishCall()).
5532 //
5533 // The loads are scheduled at the beginning of the call sequence, and the
5534 // register copies are flagged together to ensure that no other
5535 // operations can be scheduled in between. E.g. without flagging the
5536 // copies together, a TOC access in the caller could be scheduled between
5537 // the assignment of the callee TOC and the branch to the callee, which leads
5538 // to incorrect code.
5539
5540 // Start by loading the function address from the descriptor.
5541 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5542 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5543 ? (MachineMemOperand::MODereferenceable |
5544 MachineMemOperand::MOInvariant)
5545 : MachineMemOperand::MONone;
5546
5547 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5548
5549 // Registers used in building the DAG.
5550 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5551 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5552
5553 // Offsets of descriptor members.
5554 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5555 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5556
5557 const MVT RegVT = Subtarget.getScalarIntVT();
5558 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5559
5560 // One load for the functions entry point address.
5561 SDValue LoadFuncPtr = DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: Callee, PtrInfo: MPI,
5562 Alignment, MMOFlags);
5563
5564 // One for loading the TOC anchor for the module that contains the called
5565 // function.
5566 SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCAnchorOffset, DL: dl);
5567 SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: TOCOff);
5568 SDValue TOCPtr =
5569 DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddTOC,
5570 PtrInfo: MPI.getWithOffset(O: TOCAnchorOffset), Alignment, MMOFlags);
5571
5572 // One for loading the environment pointer.
5573 SDValue PtrOff = DAG.getIntPtrConstant(Val: EnvPtrOffset, DL: dl);
5574 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: PtrOff);
5575 SDValue LoadEnvPtr =
5576 DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddPtr,
5577 PtrInfo: MPI.getWithOffset(O: EnvPtrOffset), Alignment, MMOFlags);
5578
5579
5580 // Then copy the newly loaded TOC anchor to the TOC pointer.
5581 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, Reg: TOCReg, N: TOCPtr, Glue);
5582 Chain = TOCVal.getValue(R: 0);
5583 Glue = TOCVal.getValue(R: 1);
5584
5585 // If the function call has an explicit 'nest' parameter, it takes the
5586 // place of the environment pointer.
5587 assert((!hasNest || !Subtarget.isAIXABI()) &&
5588 "Nest parameter is not supported on AIX.");
5589 if (!hasNest) {
5590 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, Reg: EnvPtrReg, N: LoadEnvPtr, Glue);
5591 Chain = EnvVal.getValue(R: 0);
5592 Glue = EnvVal.getValue(R: 1);
5593 }
5594
5595 // The rest of the indirect call sequence is the same as the non-descriptor
5596 // DAG.
5597 prepareIndirectCall(DAG, Callee&: LoadFuncPtr, Glue, Chain, dl);
5598}
5599
5600static void
5601buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5602 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5603 SelectionDAG &DAG,
5604 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5605 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5606 const PPCSubtarget &Subtarget) {
5607 const bool IsPPC64 = Subtarget.isPPC64();
5608 // MVT for a general purpose register.
5609 const MVT RegVT = Subtarget.getScalarIntVT();
5610
5611 // First operand is always the chain.
5612 Ops.push_back(Elt: Chain);
5613
5614 // If it's a direct call pass the callee as the second operand.
5615 if (!CFlags.IsIndirect)
5616 Ops.push_back(Elt: Callee);
5617 else {
5618 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5619
5620 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5621 // on the stack (this would have been done in `LowerCall_64SVR4` or
5622 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5623 // represents both the indirect branch and a load that restores the TOC
5624 // pointer from the linkage area. The operand for the TOC restore is an add
5625 // of the TOC save offset to the stack pointer. This must be the second
5626 // operand: after the chain input but before any other variadic arguments.
5627 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5628 // saved or used.
5629 if (isTOCSaveRestoreRequired(Subtarget)) {
5630 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5631
5632 SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: RegVT);
5633 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5634 SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
5635 SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: StackPtr, N2: TOCOff);
5636 Ops.push_back(Elt: AddTOC);
5637 }
5638
5639 // Add the register used for the environment pointer.
5640 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5641 Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getEnvironmentPointerRegister(),
5642 VT: RegVT));
5643
5644
5645 // Add CTR register as callee so a bctr can be emitted later.
5646 if (CFlags.IsTailCall)
5647 Ops.push_back(Elt: DAG.getRegister(Reg: IsPPC64 ? PPC::CTR8 : PPC::CTR, VT: RegVT));
5648 }
5649
5650 // If this is a tail call add stack pointer delta.
5651 if (CFlags.IsTailCall)
5652 Ops.push_back(Elt: DAG.getConstant(Val: SPDiff, DL: dl, VT: MVT::i32));
5653
5654 // Add argument registers to the end of the list so that they are known live
5655 // into the call.
5656 for (const auto &[Reg, N] : RegsToPass)
5657 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
5658
5659 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5660 // no way to mark dependencies as implicit here.
5661 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5662 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5663 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5664 Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getTOCPointerRegister(), VT: RegVT));
5665
5666 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5667 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5668 Ops.push_back(Elt: DAG.getRegister(Reg: PPC::CR1EQ, VT: MVT::i32));
5669
5670 // Add a register mask operand representing the call-preserved registers.
5671 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5672 const uint32_t *Mask =
5673 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CFlags.CallConv);
5674 assert(Mask && "Missing call preserved mask for calling convention");
5675 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5676
5677 // If the glue is valid, it is the last operand.
5678 if (Glue.getNode())
5679 Ops.push_back(Elt: Glue);
5680}
5681
5682SDValue PPCTargetLowering::FinishCall(
5683 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5684 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5685 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5686 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5687 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5688
5689 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5690 Subtarget.isAIXABI())
5691 setUsesTOCBasePtr(DAG);
5692
5693 unsigned CallOpc =
5694 getCallOpcode(CFlags, Caller: DAG.getMachineFunction().getFunction(), Callee,
5695 Subtarget, TM: DAG.getTarget(), IsStrictFPCall: CB ? CB->isStrictFP() : false);
5696
5697 if (!CFlags.IsIndirect)
5698 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5699 else if (Subtarget.usesFunctionDescriptors())
5700 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5701 dl, hasNest: CFlags.HasNest, Subtarget);
5702 else
5703 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5704
5705 // Build the operand list for the call instruction.
5706 SmallVector<SDValue, 8> Ops;
5707 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5708 SPDiff, Subtarget);
5709
5710 // Emit tail call.
5711 if (CFlags.IsTailCall) {
5712 // Indirect tail call when using PC Relative calls do not have the same
5713 // constraints.
5714 assert(((Callee.getOpcode() == ISD::Register &&
5715 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5716 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5717 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5718 isa<ConstantSDNode>(Callee) ||
5719 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5720 "Expecting a global address, external symbol, absolute value, "
5721 "register or an indirect tail call when PC Relative calls are "
5722 "used.");
5723 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5724 assert(CallOpc == PPCISD::TC_RETURN &&
5725 "Unexpected call opcode for a tail call.");
5726 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5727 SDValue Ret = DAG.getNode(Opcode: CallOpc, DL: dl, VT: MVT::Other, Ops);
5728 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CFlags.NoMerge);
5729 return Ret;
5730 }
5731
5732 std::array<EVT, 2> ReturnTypes = {._M_elems: {MVT::Other, MVT::Glue}};
5733 Chain = DAG.getNode(Opcode: CallOpc, DL: dl, ResultTys: ReturnTypes, Ops);
5734 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CFlags.NoMerge);
5735 Glue = Chain.getValue(R: 1);
5736
5737 // When performing tail call optimization the callee pops its arguments off
5738 // the stack. Account for this here so these bytes can be pushed back on in
5739 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5740 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5741 getTargetMachine().Options.GuaranteedTailCallOpt)
5742 ? NumBytes
5743 : 0;
5744
5745 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: BytesCalleePops, Glue, DL: dl);
5746 Glue = Chain.getValue(R: 1);
5747
5748 return LowerCallResult(Chain, InGlue: Glue, CallConv: CFlags.CallConv, isVarArg: CFlags.IsVarArg, Ins, dl,
5749 DAG, InVals);
5750}
5751
5752bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5753 CallingConv::ID CalleeCC = CB->getCallingConv();
5754 const Function *CallerFunc = CB->getCaller();
5755 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5756 const Function *CalleeFunc = CB->getCalledFunction();
5757 if (!CalleeFunc)
5758 return false;
5759 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(Val: CalleeFunc);
5760
5761 SmallVector<ISD::OutputArg, 2> Outs;
5762 SmallVector<ISD::InputArg, 2> Ins;
5763
5764 GetReturnInfo(CC: CalleeCC, ReturnType: CalleeFunc->getReturnType(),
5765 attr: CalleeFunc->getAttributes(), Outs, TLI: *this,
5766 DL: CalleeFunc->getDataLayout());
5767
5768 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5769 isVarArg: CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5770 isCalleeExternalSymbol: false /*isCalleeExternalSymbol*/);
5771}
5772
5773bool PPCTargetLowering::isEligibleForTCO(
5774 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5775 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5776 const SmallVectorImpl<ISD::OutputArg> &Outs,
5777 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5778 bool isCalleeExternalSymbol) const {
5779 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5780 return false;
5781
5782 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5783 return IsEligibleForTailCallOptimization_64SVR4(
5784 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5785 isCalleeExternalSymbol);
5786 else
5787 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5788 isVarArg, Ins);
5789}
5790
5791SDValue
5792PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5793 SmallVectorImpl<SDValue> &InVals) const {
5794 SelectionDAG &DAG = CLI.DAG;
5795 SDLoc &dl = CLI.DL;
5796 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5797 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5798 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5799 SDValue Chain = CLI.Chain;
5800 SDValue Callee = CLI.Callee;
5801 bool &isTailCall = CLI.IsTailCall;
5802 CallingConv::ID CallConv = CLI.CallConv;
5803 bool isVarArg = CLI.IsVarArg;
5804 bool isPatchPoint = CLI.IsPatchPoint;
5805 const CallBase *CB = CLI.CB;
5806
5807 if (isTailCall) {
5808 MachineFunction &MF = DAG.getMachineFunction();
5809 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5810 auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
5811 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5812 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Val: Callee);
5813
5814 isTailCall =
5815 isEligibleForTCO(CalleeGV: GV, CalleeCC: CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5816 CallerFunc: &(MF.getFunction()), isCalleeExternalSymbol: IsCalleeExternalSymbol);
5817 if (isTailCall) {
5818 ++NumTailCalls;
5819 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5820 ++NumSiblingCalls;
5821
5822 // PC Relative calls no longer guarantee that the callee is a Global
5823 // Address Node. The callee could be an indirect tail call in which
5824 // case the SDValue for the callee could be a load (to load the address
5825 // of a function pointer) or it may be a register copy (to move the
5826 // address of the callee from a function parameter into a virtual
5827 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5828 assert((Subtarget.isUsingPCRelativeCalls() ||
5829 isa<GlobalAddressSDNode>(Callee)) &&
5830 "Callee should be an llvm::Function object.");
5831
5832 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5833 << "\nTCO callee: ");
5834 LLVM_DEBUG(Callee.dump());
5835 }
5836 }
5837
5838 if (!isTailCall && CB && CB->isMustTailCall())
5839 report_fatal_error(reason: "failed to perform tail call elimination on a call "
5840 "site marked musttail");
5841
5842 // When long calls (i.e. indirect calls) are always used, calls are always
5843 // made via function pointer. If we have a function name, first translate it
5844 // into a pointer.
5845 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Val: Callee) &&
5846 !isTailCall)
5847 Callee = LowerGlobalAddress(Op: Callee, DAG);
5848
5849 CallFlags CFlags(
5850 CallConv, isTailCall, isVarArg, isPatchPoint,
5851 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5852 // hasNest
5853 Subtarget.is64BitELFABI() &&
5854 any_of(Range&: Outs, P: [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5855 CLI.NoMerge);
5856
5857 if (Subtarget.isAIXABI())
5858 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5859 InVals, CB);
5860
5861 assert(Subtarget.isSVR4ABI());
5862 if (Subtarget.isPPC64())
5863 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5864 InVals, CB);
5865 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5866 InVals, CB);
5867}
5868
5869SDValue PPCTargetLowering::LowerCall_32SVR4(
5870 SDValue Chain, SDValue Callee, CallFlags CFlags,
5871 const SmallVectorImpl<ISD::OutputArg> &Outs,
5872 const SmallVectorImpl<SDValue> &OutVals,
5873 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5874 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5875 const CallBase *CB) const {
5876 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5877 // of the 32-bit SVR4 ABI stack frame layout.
5878
5879 const CallingConv::ID CallConv = CFlags.CallConv;
5880 const bool IsVarArg = CFlags.IsVarArg;
5881 const bool IsTailCall = CFlags.IsTailCall;
5882
5883 assert((CallConv == CallingConv::C ||
5884 CallConv == CallingConv::Cold ||
5885 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5886
5887 const Align PtrAlign(4);
5888
5889 MachineFunction &MF = DAG.getMachineFunction();
5890
5891 // Mark this function as potentially containing a function that contains a
5892 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5893 // and restoring the callers stack pointer in this functions epilog. This is
5894 // done because by tail calling the called function might overwrite the value
5895 // in this function's (MF) stack pointer stack slot 0(SP).
5896 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5897 CallConv == CallingConv::Fast)
5898 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5899
5900 // Count how many bytes are to be pushed on the stack, including the linkage
5901 // area, parameter list area and the part of the local variable space which
5902 // contains copies of aggregates which are passed by value.
5903
5904 // Assign locations to all of the outgoing arguments.
5905 SmallVector<CCValAssign, 16> ArgLocs;
5906 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5907
5908 // Reserve space for the linkage area on the stack.
5909 CCInfo.AllocateStack(Size: Subtarget.getFrameLowering()->getLinkageSize(),
5910 Alignment: PtrAlign);
5911
5912 if (IsVarArg) {
5913 // Handle fixed and variable vector arguments differently.
5914 // Fixed vector arguments go into registers as long as registers are
5915 // available. Variable vector arguments always go into memory.
5916 unsigned NumArgs = Outs.size();
5917
5918 for (unsigned i = 0; i != NumArgs; ++i) {
5919 MVT ArgVT = Outs[i].VT;
5920 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5921 bool Result;
5922
5923 if (!ArgFlags.isVarArg()) {
5924 Result = CC_PPC32_SVR4(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full, ArgFlags,
5925 OrigTy: Outs[i].OrigTy, State&: CCInfo);
5926 } else {
5927 Result = CC_PPC32_SVR4_VarArg(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full,
5928 ArgFlags, OrigTy: Outs[i].OrigTy, State&: CCInfo);
5929 }
5930
5931 if (Result) {
5932#ifndef NDEBUG
5933 errs() << "Call operand #" << i << " has unhandled type "
5934 << ArgVT << "\n";
5935#endif
5936 llvm_unreachable(nullptr);
5937 }
5938 }
5939 } else {
5940 // All arguments are treated the same.
5941 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4);
5942 }
5943
5944 // Assign locations to all of the outgoing aggregate by value arguments.
5945 SmallVector<CCValAssign, 16> ByValArgLocs;
5946 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5947
5948 // Reserve stack space for the allocations in CCInfo.
5949 CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
5950
5951 CCByValInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4_ByVal);
5952
5953 // Size of the linkage area, parameter list area and the part of the local
5954 // space variable where copies of aggregates which are passed by value are
5955 // stored.
5956 unsigned NumBytes = CCByValInfo.getStackSize();
5957
5958 // Calculate by how many bytes the stack has to be adjusted in case of tail
5959 // call optimization.
5960 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: IsTailCall, ParamSize: NumBytes);
5961
5962 // Adjust the stack pointer for the new arguments...
5963 // These operations are automatically eliminated by the prolog/epilog pass
5964 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
5965 SDValue CallSeqStart = Chain;
5966
5967 // Load the return address and frame pointer so it can be moved somewhere else
5968 // later.
5969 SDValue LROp, FPOp;
5970 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
5971
5972 // Set up a copy of the stack pointer for use loading and storing any
5973 // arguments that may not fit in the registers available for argument
5974 // passing.
5975 SDValue StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5976
5977 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5978 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5979 SmallVector<SDValue, 8> MemOpChains;
5980
5981 bool seenFloatArg = false;
5982 // Walk the register/memloc assignments, inserting copies/loads.
5983 // i - Tracks the index into the list of registers allocated for the call
5984 // RealArgIdx - Tracks the index into the list of actual function arguments
5985 // j - Tracks the index into the list of byval arguments
5986 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5987 i != e;
5988 ++i, ++RealArgIdx) {
5989 CCValAssign &VA = ArgLocs[i];
5990 SDValue Arg = OutVals[RealArgIdx];
5991 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5992
5993 if (Flags.isByVal()) {
5994 // Argument is an aggregate which is passed by value, thus we need to
5995 // create a copy of it in the local variable space of the current stack
5996 // frame (which is the stack frame of the caller) and pass the address of
5997 // this copy to the callee.
5998 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5999 CCValAssign &ByValVA = ByValArgLocs[j++];
6000 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6001
6002 // Memory reserved in the local variable space of the callers stack frame.
6003 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6004
6005 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6006 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6007 N1: StackPtr, N2: PtrOff);
6008
6009 // Create a copy of the argument in the local area of the current
6010 // stack frame.
6011 SDValue MemcpyCall =
6012 CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6013 Chain: CallSeqStart.getNode()->getOperand(Num: 0),
6014 Flags, DAG, dl);
6015
6016 // This must go outside the CALLSEQ_START..END.
6017 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: NumBytes, OutSize: 0,
6018 DL: SDLoc(MemcpyCall));
6019 DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6020 To: NewCallSeqStart.getNode());
6021 Chain = CallSeqStart = NewCallSeqStart;
6022
6023 // Pass the address of the aggregate copy on the stack either in a
6024 // physical register or in the parameter list area of the current stack
6025 // frame to the callee.
6026 Arg = PtrOff;
6027 }
6028
6029 // When useCRBits() is true, there can be i1 arguments.
6030 // It is because getRegisterType(MVT::i1) => MVT::i1,
6031 // and for other integer types getRegisterType() => MVT::i32.
6032 // Extend i1 and ensure callee will get i32.
6033 if (Arg.getValueType() == MVT::i1)
6034 Arg = DAG.getNode(Opcode: Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6035 DL: dl, VT: MVT::i32, Operand: Arg);
6036
6037 if (VA.isRegLoc()) {
6038 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6039 // Put argument in a physical register.
6040 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6041 bool IsLE = Subtarget.isLittleEndian();
6042 SDValue SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6043 N2: DAG.getIntPtrConstant(Val: IsLE ? 0 : 1, DL: dl));
6044 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y: SVal.getValue(R: 0)));
6045 SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6046 N2: DAG.getIntPtrConstant(Val: IsLE ? 1 : 0, DL: dl));
6047 RegsToPass.push_back(Elt: std::make_pair(x: ArgLocs[++i].getLocReg(),
6048 y: SVal.getValue(R: 0)));
6049 } else
6050 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
6051 } else {
6052 // Put argument in the parameter list area of the current stack frame.
6053 assert(VA.isMemLoc());
6054 unsigned LocMemOffset = VA.getLocMemOffset();
6055
6056 if (!IsTailCall) {
6057 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6058 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6059 N1: StackPtr, N2: PtrOff);
6060
6061 MemOpChains.push_back(
6062 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
6063 } else {
6064 // Calculate and remember argument location.
6065 CalculateTailCallArgDest(DAG, MF, IsPPC64: false, Arg, SPDiff, ArgOffset: LocMemOffset,
6066 TailCallArguments);
6067 }
6068 }
6069 }
6070
6071 if (!MemOpChains.empty())
6072 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6073
6074 // Build a sequence of copy-to-reg nodes chained together with token chain
6075 // and flag operands which copy the outgoing args into the appropriate regs.
6076 SDValue InGlue;
6077 for (const auto &[Reg, N] : RegsToPass) {
6078 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6079 InGlue = Chain.getValue(R: 1);
6080 }
6081
6082 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6083 // registers.
6084 if (IsVarArg) {
6085 SDVTList VTs = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
6086 SDValue Ops[] = { Chain, InGlue };
6087
6088 Chain = DAG.getNode(Opcode: seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, DL: dl,
6089 VTList: VTs, Ops: ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6090
6091 InGlue = Chain.getValue(R: 1);
6092 }
6093
6094 if (IsTailCall)
6095 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6096 TailCallArguments);
6097
6098 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6099 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6100}
6101
6102// Copy an argument into memory, being careful to do this outside the
6103// call sequence for the call to which the argument belongs.
6104SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6105 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6106 SelectionDAG &DAG, const SDLoc &dl) const {
6107 SDValue MemcpyCall = CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6108 Chain: CallSeqStart.getNode()->getOperand(Num: 0),
6109 Flags, DAG, dl);
6110 // The MEMCPY must go outside the CALLSEQ_START..END.
6111 int64_t FrameSize = CallSeqStart.getConstantOperandVal(i: 1);
6112 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: FrameSize, OutSize: 0,
6113 DL: SDLoc(MemcpyCall));
6114 DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6115 To: NewCallSeqStart.getNode());
6116 return NewCallSeqStart;
6117}
6118
6119SDValue PPCTargetLowering::LowerCall_64SVR4(
6120 SDValue Chain, SDValue Callee, CallFlags CFlags,
6121 const SmallVectorImpl<ISD::OutputArg> &Outs,
6122 const SmallVectorImpl<SDValue> &OutVals,
6123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6124 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6125 const CallBase *CB) const {
6126 bool isELFv2ABI = Subtarget.isELFv2ABI();
6127 bool isLittleEndian = Subtarget.isLittleEndian();
6128 unsigned NumOps = Outs.size();
6129 bool IsSibCall = false;
6130 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6131
6132 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
6133 unsigned PtrByteSize = 8;
6134
6135 MachineFunction &MF = DAG.getMachineFunction();
6136
6137 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6138 IsSibCall = true;
6139
6140 // Mark this function as potentially containing a function that contains a
6141 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6142 // and restoring the callers stack pointer in this functions epilog. This is
6143 // done because by tail calling the called function might overwrite the value
6144 // in this function's (MF) stack pointer stack slot 0(SP).
6145 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6146 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6147
6148 assert(!(IsFastCall && CFlags.IsVarArg) &&
6149 "fastcc not supported on varargs functions");
6150
6151 // Count how many bytes are to be pushed on the stack, including the linkage
6152 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6153 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6154 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6155 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6156 unsigned NumBytes = LinkageSize;
6157 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6158
6159 static const MCPhysReg GPR[] = {
6160 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6161 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6162 };
6163 static const MCPhysReg VR[] = {
6164 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6165 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6166 };
6167
6168 const unsigned NumGPRs = std::size(GPR);
6169 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6170 const unsigned NumVRs = std::size(VR);
6171
6172 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6173 // can be passed to the callee in registers.
6174 // For the fast calling convention, there is another check below.
6175 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6176 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6177 if (!HasParameterArea) {
6178 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6179 unsigned AvailableFPRs = NumFPRs;
6180 unsigned AvailableVRs = NumVRs;
6181 unsigned NumBytesTmp = NumBytes;
6182 for (unsigned i = 0; i != NumOps; ++i) {
6183 if (Outs[i].Flags.isNest()) continue;
6184 if (CalculateStackSlotUsed(ArgVT: Outs[i].VT, OrigVT: Outs[i].ArgVT, Flags: Outs[i].Flags,
6185 PtrByteSize, LinkageSize, ParamAreaSize,
6186 ArgOffset&: NumBytesTmp, AvailableFPRs, AvailableVRs))
6187 HasParameterArea = true;
6188 }
6189 }
6190
6191 // When using the fast calling convention, we don't provide backing for
6192 // arguments that will be in registers.
6193 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6194
6195 // Avoid allocating parameter area for fastcc functions if all the arguments
6196 // can be passed in the registers.
6197 if (IsFastCall)
6198 HasParameterArea = false;
6199
6200 // Add up all the space actually used.
6201 for (unsigned i = 0; i != NumOps; ++i) {
6202 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6203 EVT ArgVT = Outs[i].VT;
6204 EVT OrigVT = Outs[i].ArgVT;
6205
6206 if (Flags.isNest())
6207 continue;
6208
6209 if (IsFastCall) {
6210 if (Flags.isByVal()) {
6211 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6212 if (NumGPRsUsed > NumGPRs)
6213 HasParameterArea = true;
6214 } else {
6215 switch (ArgVT.getSimpleVT().SimpleTy) {
6216 default: llvm_unreachable("Unexpected ValueType for argument!");
6217 case MVT::i1:
6218 case MVT::i32:
6219 case MVT::i64:
6220 if (++NumGPRsUsed <= NumGPRs)
6221 continue;
6222 break;
6223 case MVT::v4i32:
6224 case MVT::v8i16:
6225 case MVT::v16i8:
6226 case MVT::v2f64:
6227 case MVT::v2i64:
6228 case MVT::v1i128:
6229 case MVT::f128:
6230 if (++NumVRsUsed <= NumVRs)
6231 continue;
6232 break;
6233 case MVT::v4f32:
6234 if (++NumVRsUsed <= NumVRs)
6235 continue;
6236 break;
6237 case MVT::f32:
6238 case MVT::f64:
6239 if (++NumFPRsUsed <= NumFPRs)
6240 continue;
6241 break;
6242 }
6243 HasParameterArea = true;
6244 }
6245 }
6246
6247 /* Respect alignment of argument on the stack. */
6248 auto Alignement =
6249 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6250 NumBytes = alignTo(Size: NumBytes, A: Alignement);
6251
6252 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6253 if (Flags.isInConsecutiveRegsLast())
6254 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6255 }
6256
6257 unsigned NumBytesActuallyUsed = NumBytes;
6258
6259 // In the old ELFv1 ABI,
6260 // the prolog code of the callee may store up to 8 GPR argument registers to
6261 // the stack, allowing va_start to index over them in memory if its varargs.
6262 // Because we cannot tell if this is needed on the caller side, we have to
6263 // conservatively assume that it is needed. As such, make sure we have at
6264 // least enough stack space for the caller to store the 8 GPRs.
6265 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6266 // really requires memory operands, e.g. a vararg function.
6267 if (HasParameterArea)
6268 NumBytes = std::max(a: NumBytes, b: LinkageSize + 8 * PtrByteSize);
6269 else
6270 NumBytes = LinkageSize;
6271
6272 // Tail call needs the stack to be aligned.
6273 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6274 NumBytes = EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes);
6275
6276 int SPDiff = 0;
6277
6278 // Calculate by how many bytes the stack has to be adjusted in case of tail
6279 // call optimization.
6280 if (!IsSibCall)
6281 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: CFlags.IsTailCall, ParamSize: NumBytes);
6282
6283 // To protect arguments on the stack from being clobbered in a tail call,
6284 // force all the loads to happen before doing any other lowering.
6285 if (CFlags.IsTailCall)
6286 Chain = DAG.getStackArgumentTokenFactor(Chain);
6287
6288 // Adjust the stack pointer for the new arguments...
6289 // These operations are automatically eliminated by the prolog/epilog pass
6290 if (!IsSibCall)
6291 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
6292 SDValue CallSeqStart = Chain;
6293
6294 // Load the return address and frame pointer so it can be move somewhere else
6295 // later.
6296 SDValue LROp, FPOp;
6297 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6298
6299 // Set up a copy of the stack pointer for use loading and storing any
6300 // arguments that may not fit in the registers available for argument
6301 // passing.
6302 SDValue StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
6303
6304 // Figure out which arguments are going to go in registers, and which in
6305 // memory. Also, if this is a vararg function, floating point operations
6306 // must be stored to our stack, and loaded into integer regs as well, if
6307 // any integer regs are available for argument passing.
6308 unsigned ArgOffset = LinkageSize;
6309
6310 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6311 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6312
6313 SmallVector<SDValue, 8> MemOpChains;
6314 for (unsigned i = 0; i != NumOps; ++i) {
6315 SDValue Arg = OutVals[i];
6316 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6317 EVT ArgVT = Outs[i].VT;
6318 EVT OrigVT = Outs[i].ArgVT;
6319
6320 // PtrOff will be used to store the current argument to the stack if a
6321 // register cannot be found for it.
6322 SDValue PtrOff;
6323
6324 // We re-align the argument offset for each argument, except when using the
6325 // fast calling convention, when we need to make sure we do that only when
6326 // we'll actually use a stack slot.
6327 auto ComputePtrOff = [&]() {
6328 /* Respect alignment of argument on the stack. */
6329 auto Alignment =
6330 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6331 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
6332
6333 PtrOff = DAG.getConstant(Val: ArgOffset, DL: dl, VT: StackPtr.getValueType());
6334
6335 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6336 };
6337
6338 if (!IsFastCall) {
6339 ComputePtrOff();
6340
6341 /* Compute GPR index associated with argument offset. */
6342 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6343 GPR_idx = std::min(a: GPR_idx, b: NumGPRs);
6344 }
6345
6346 // Promote integers to 64-bit values.
6347 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6348 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6349 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6350 Arg = DAG.getNode(Opcode: ExtOp, DL: dl, VT: MVT::i64, Operand: Arg);
6351 }
6352
6353 // FIXME memcpy is used way more than necessary. Correctness first.
6354 // Note: "by value" is code for passing a structure by value, not
6355 // basic types.
6356 if (Flags.isByVal()) {
6357 // Note: Size includes alignment padding, so
6358 // struct x { short a; char b; }
6359 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6360 // These are the proper values we need for right-justifying the
6361 // aggregate in a parameter register.
6362 unsigned Size = Flags.getByValSize();
6363
6364 // An empty aggregate parameter takes up no storage and no
6365 // registers.
6366 if (Size == 0)
6367 continue;
6368
6369 if (IsFastCall)
6370 ComputePtrOff();
6371
6372 // All aggregates smaller than 8 bytes must be passed right-justified.
6373 if (Size==1 || Size==2 || Size==4) {
6374 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6375 if (GPR_idx != NumGPRs) {
6376 SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: Arg,
6377 PtrInfo: MachinePointerInfo(), MemVT: VT);
6378 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6379 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6380
6381 ArgOffset += PtrByteSize;
6382 continue;
6383 }
6384 }
6385
6386 if (GPR_idx == NumGPRs && Size < 8) {
6387 SDValue AddPtr = PtrOff;
6388 if (!isLittleEndian) {
6389 SDValue Const = DAG.getConstant(Val: PtrByteSize - Size, DL: dl,
6390 VT: PtrOff.getValueType());
6391 AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6392 }
6393 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6394 CallSeqStart,
6395 Flags, DAG, dl);
6396 ArgOffset += PtrByteSize;
6397 continue;
6398 }
6399 // Copy the object to parameter save area if it can not be entirely passed
6400 // by registers.
6401 // FIXME: we only need to copy the parts which need to be passed in
6402 // parameter save area. For the parts passed by registers, we don't need
6403 // to copy them to the stack although we need to allocate space for them
6404 // in parameter save area.
6405 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6406 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6407 CallSeqStart,
6408 Flags, DAG, dl);
6409
6410 // When a register is available, pass a small aggregate right-justified.
6411 if (Size < 8 && GPR_idx != NumGPRs) {
6412 // The easiest way to get this right-justified in a register
6413 // is to copy the structure into the rightmost portion of a
6414 // local variable slot, then load the whole slot into the
6415 // register.
6416 // FIXME: The memcpy seems to produce pretty awful code for
6417 // small aggregates, particularly for packed ones.
6418 // FIXME: It would be preferable to use the slot in the
6419 // parameter save area instead of a new local variable.
6420 SDValue AddPtr = PtrOff;
6421 if (!isLittleEndian) {
6422 SDValue Const = DAG.getConstant(Val: 8 - Size, DL: dl, VT: PtrOff.getValueType());
6423 AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6424 }
6425 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6426 CallSeqStart,
6427 Flags, DAG, dl);
6428
6429 // Load the slot into the register.
6430 SDValue Load =
6431 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6432 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6433 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6434
6435 // Done with this argument.
6436 ArgOffset += PtrByteSize;
6437 continue;
6438 }
6439
6440 // For aggregates larger than PtrByteSize, copy the pieces of the
6441 // object that fit into registers from the parameter save area.
6442 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6443 SDValue Const = DAG.getConstant(Val: j, DL: dl, VT: PtrOff.getValueType());
6444 SDValue AddArg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Arg, N2: Const);
6445 if (GPR_idx != NumGPRs) {
6446 unsigned LoadSizeInBits = std::min(a: PtrByteSize, b: (Size - j)) * 8;
6447 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadSizeInBits);
6448 SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: AddArg,
6449 PtrInfo: MachinePointerInfo(), MemVT: ObjType);
6450
6451 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6452 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6453 ArgOffset += PtrByteSize;
6454 } else {
6455 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6456 break;
6457 }
6458 }
6459 continue;
6460 }
6461
6462 switch (Arg.getSimpleValueType().SimpleTy) {
6463 default: llvm_unreachable("Unexpected ValueType for argument!");
6464 case MVT::i1:
6465 case MVT::i32:
6466 case MVT::i64:
6467 if (Flags.isNest()) {
6468 // The 'nest' parameter, if any, is passed in R11.
6469 RegsToPass.push_back(Elt: std::make_pair(x: PPC::X11, y&: Arg));
6470 break;
6471 }
6472
6473 // These can be scalar arguments or elements of an integer array type
6474 // passed directly. Clang may use those instead of "byval" aggregate
6475 // types to avoid forcing arguments to memory unnecessarily.
6476 if (GPR_idx != NumGPRs) {
6477 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Arg));
6478 } else {
6479 if (IsFastCall)
6480 ComputePtrOff();
6481
6482 assert(HasParameterArea &&
6483 "Parameter area must exist to pass an argument in memory.");
6484 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6485 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6486 TailCallArguments, dl);
6487 if (IsFastCall)
6488 ArgOffset += PtrByteSize;
6489 }
6490 if (!IsFastCall)
6491 ArgOffset += PtrByteSize;
6492 break;
6493 case MVT::f32:
6494 case MVT::f64: {
6495 // These can be scalar arguments or elements of a float array type
6496 // passed directly. The latter are used to implement ELFv2 homogenous
6497 // float aggregates.
6498
6499 // Named arguments go into FPRs first, and once they overflow, the
6500 // remaining arguments go into GPRs and then the parameter save area.
6501 // Unnamed arguments for vararg functions always go to GPRs and
6502 // then the parameter save area. For now, put all arguments to vararg
6503 // routines always in both locations (FPR *and* GPR or stack slot).
6504 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6505 bool NeededLoad = false;
6506
6507 // First load the argument into the next available FPR.
6508 if (FPR_idx != NumFPRs)
6509 RegsToPass.push_back(Elt: std::make_pair(x: FPR[FPR_idx++], y&: Arg));
6510
6511 // Next, load the argument into GPR or stack slot if needed.
6512 if (!NeedGPROrStack)
6513 ;
6514 else if (GPR_idx != NumGPRs && !IsFastCall) {
6515 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6516 // once we support fp <-> gpr moves.
6517
6518 // In the non-vararg case, this can only ever happen in the
6519 // presence of f32 array types, since otherwise we never run
6520 // out of FPRs before running out of GPRs.
6521 SDValue ArgVal;
6522
6523 // Double values are always passed in a single GPR.
6524 if (Arg.getValueType() != MVT::f32) {
6525 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: Arg);
6526
6527 // Non-array float values are extended and passed in a GPR.
6528 } else if (!Flags.isInConsecutiveRegs()) {
6529 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6530 ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6531
6532 // If we have an array of floats, we collect every odd element
6533 // together with its predecessor into one GPR.
6534 } else if (ArgOffset % PtrByteSize != 0) {
6535 SDValue Lo, Hi;
6536 Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: OutVals[i - 1]);
6537 Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6538 if (!isLittleEndian)
6539 std::swap(a&: Lo, b&: Hi);
6540 ArgVal = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
6541
6542 // The final element, if even, goes into the first half of a GPR.
6543 } else if (Flags.isInConsecutiveRegsLast()) {
6544 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6545 ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6546 if (!isLittleEndian)
6547 ArgVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: ArgVal,
6548 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
6549
6550 // Non-final even elements are skipped; they will be handled
6551 // together the with subsequent argument on the next go-around.
6552 } else
6553 ArgVal = SDValue();
6554
6555 if (ArgVal.getNode())
6556 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: ArgVal));
6557 } else {
6558 if (IsFastCall)
6559 ComputePtrOff();
6560
6561 // Single-precision floating-point values are mapped to the
6562 // second (rightmost) word of the stack doubleword.
6563 if (Arg.getValueType() == MVT::f32 &&
6564 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6565 SDValue ConstFour = DAG.getConstant(Val: 4, DL: dl, VT: PtrOff.getValueType());
6566 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: ConstFour);
6567 }
6568
6569 assert(HasParameterArea &&
6570 "Parameter area must exist to pass an argument in memory.");
6571 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6573 TailCallArguments, dl);
6574
6575 NeededLoad = true;
6576 }
6577 // When passing an array of floats, the array occupies consecutive
6578 // space in the argument area; only round up to the next doubleword
6579 // at the end of the array. Otherwise, each float takes 8 bytes.
6580 if (!IsFastCall || NeededLoad) {
6581 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6582 Flags.isInConsecutiveRegs()) ? 4 : 8;
6583 if (Flags.isInConsecutiveRegsLast())
6584 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6585 }
6586 break;
6587 }
6588 case MVT::v4f32:
6589 case MVT::v4i32:
6590 case MVT::v8i16:
6591 case MVT::v16i8:
6592 case MVT::v2f64:
6593 case MVT::v2i64:
6594 case MVT::v1i128:
6595 case MVT::f128:
6596 // These can be scalar arguments or elements of a vector array type
6597 // passed directly. The latter are used to implement ELFv2 homogenous
6598 // vector aggregates.
6599
6600 // For a varargs call, named arguments go into VRs or on the stack as
6601 // usual; unnamed arguments always go to the stack or the corresponding
6602 // GPRs when within range. For now, we always put the value in both
6603 // locations (or even all three).
6604 if (CFlags.IsVarArg) {
6605 assert(HasParameterArea &&
6606 "Parameter area must exist if we have a varargs call.");
6607 // We could elide this store in the case where the object fits
6608 // entirely in R registers. Maybe later.
6609 SDValue Store =
6610 DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6611 MemOpChains.push_back(Elt: Store);
6612 if (VR_idx != NumVRs) {
6613 SDValue Load =
6614 DAG.getLoad(VT: MVT::v4f32, dl, Chain: Store, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6615 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6616 RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Load));
6617 }
6618 ArgOffset += 16;
6619 for (unsigned i=0; i<16; i+=PtrByteSize) {
6620 if (GPR_idx == NumGPRs)
6621 break;
6622 SDValue Ix = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
6623 N2: DAG.getConstant(Val: i, DL: dl, VT: PtrVT));
6624 SDValue Load =
6625 DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Ix, PtrInfo: MachinePointerInfo());
6626 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6627 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6628 }
6629 break;
6630 }
6631
6632 // Non-varargs Altivec params go into VRs or on the stack.
6633 if (VR_idx != NumVRs) {
6634 RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Arg));
6635 } else {
6636 if (IsFastCall)
6637 ComputePtrOff();
6638
6639 assert(HasParameterArea &&
6640 "Parameter area must exist to pass an argument in memory.");
6641 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6642 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: true, MemOpChains,
6643 TailCallArguments, dl);
6644 if (IsFastCall)
6645 ArgOffset += 16;
6646 }
6647
6648 if (!IsFastCall)
6649 ArgOffset += 16;
6650 break;
6651 }
6652 }
6653
6654 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6655 "mismatch in size of parameter area");
6656 (void)NumBytesActuallyUsed;
6657
6658 if (!MemOpChains.empty())
6659 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6660
6661 // Check if this is an indirect call (MTCTR/BCTRL).
6662 // See prepareDescriptorIndirectCall and buildCallOperands for more
6663 // information about calls through function pointers in the 64-bit SVR4 ABI.
6664 if (CFlags.IsIndirect) {
6665 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6666 // caller in the TOC save area.
6667 if (isTOCSaveRestoreRequired(Subtarget)) {
6668 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6669 // Load r2 into a virtual register and store it to the TOC save area.
6670 setUsesTOCBasePtr(DAG);
6671 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: PPC::X2, VT: MVT::i64);
6672 // TOC save area offset.
6673 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6674 SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
6675 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6676 Chain = DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: AddPtr,
6677 PtrInfo: MachinePointerInfo::getStack(
6678 MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
6679 }
6680 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6681 // This does not mean the MTCTR instruction must use R12; it's easier
6682 // to model this as an extra parameter, so do that.
6683 if (isELFv2ABI && !CFlags.IsPatchPoint)
6684 RegsToPass.push_back(Elt: std::make_pair(x: (unsigned)PPC::X12, y&: Callee));
6685 }
6686
6687 // Build a sequence of copy-to-reg nodes chained together with token chain
6688 // and flag operands which copy the outgoing args into the appropriate regs.
6689 SDValue InGlue;
6690 for (const auto &[Reg, N] : RegsToPass) {
6691 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6692 InGlue = Chain.getValue(R: 1);
6693 }
6694
6695 if (CFlags.IsTailCall && !IsSibCall)
6696 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6697 TailCallArguments);
6698
6699 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6700 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6701}
6702
6703// Returns true when the shadow of a general purpose argument register
6704// in the parameter save area is aligned to at least 'RequiredAlign'.
6705static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6706 assert(RequiredAlign.value() <= 16 &&
6707 "Required alignment greater than stack alignment.");
6708 switch (Reg) {
6709 default:
6710 report_fatal_error(reason: "called on invalid register.");
6711 case PPC::R5:
6712 case PPC::R9:
6713 case PPC::X3:
6714 case PPC::X5:
6715 case PPC::X7:
6716 case PPC::X9:
6717 // These registers are 16 byte aligned which is the most strict aligment
6718 // we can support.
6719 return true;
6720 case PPC::R3:
6721 case PPC::R7:
6722 case PPC::X4:
6723 case PPC::X6:
6724 case PPC::X8:
6725 case PPC::X10:
6726 // The shadow of these registers in the PSA is 8 byte aligned.
6727 return RequiredAlign <= 8;
6728 case PPC::R4:
6729 case PPC::R6:
6730 case PPC::R8:
6731 case PPC::R10:
6732 return RequiredAlign <= 4;
6733 }
6734}
6735
6736static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6737 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6738 Type *OrigTy, CCState &State) {
6739 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6740 State.getMachineFunction().getSubtarget());
6741 const bool IsPPC64 = Subtarget.isPPC64();
6742 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6743 const Align PtrAlign(PtrSize);
6744 const Align StackAlign(16);
6745 const MVT RegVT = Subtarget.getScalarIntVT();
6746
6747 if (ValVT == MVT::f128)
6748 report_fatal_error(reason: "f128 is unimplemented on AIX.");
6749
6750 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6751 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6752 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6753 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6754 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6755 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6756
6757 static const MCPhysReg VR[] = {// Vector registers.
6758 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6759 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6760 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6761
6762 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6763
6764 if (ArgFlags.isNest()) {
6765 MCRegister EnvReg = State.AllocateReg(Reg: IsPPC64 ? PPC::X11 : PPC::R11);
6766 if (!EnvReg)
6767 report_fatal_error(reason: "More then one nest argument.");
6768 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: EnvReg, LocVT: RegVT, HTP: LocInfo));
6769 return false;
6770 }
6771
6772 if (ArgFlags.isByVal()) {
6773 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6774 if (ByValAlign > StackAlign)
6775 report_fatal_error(reason: "Pass-by-value arguments with alignment greater than "
6776 "16 are not supported.");
6777
6778 const unsigned ByValSize = ArgFlags.getByValSize();
6779 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6780
6781 // An empty aggregate parameter takes up no storage and no registers,
6782 // but needs a MemLoc for a stack slot for the formal arguments side.
6783 if (ByValSize == 0) {
6784 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6785 Offset: State.getStackSize(), LocVT: RegVT, HTP: LocInfo));
6786 return false;
6787 }
6788
6789 // Shadow allocate any registers that are not properly aligned.
6790 unsigned NextReg = State.getFirstUnallocated(Regs: GPRs);
6791 while (NextReg != GPRs.size() &&
6792 !isGPRShadowAligned(Reg: GPRs[NextReg], RequiredAlign: ObjAlign)) {
6793 // Shadow allocate next registers since its aligment is not strict enough.
6794 MCRegister Reg = State.AllocateReg(Regs: GPRs);
6795 // Allocate the stack space shadowed by said register.
6796 State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6797 assert(Reg && "Alocating register unexpectedly failed.");
6798 (void)Reg;
6799 NextReg = State.getFirstUnallocated(Regs: GPRs);
6800 }
6801
6802 const unsigned StackSize = alignTo(Size: ByValSize, A: ObjAlign);
6803 unsigned Offset = State.AllocateStack(Size: StackSize, Alignment: ObjAlign);
6804 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6805 if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6806 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6807 else {
6808 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6809 Offset, LocVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6810 HTP: LocInfo));
6811 break;
6812 }
6813 }
6814 return false;
6815 }
6816
6817 // Arguments always reserve parameter save area.
6818 switch (ValVT.SimpleTy) {
6819 default:
6820 report_fatal_error(reason: "Unhandled value type for argument.");
6821 case MVT::i64:
6822 // i64 arguments should have been split to i32 for PPC32.
6823 assert(IsPPC64 && "PPC32 should have split i64 values.");
6824 [[fallthrough]];
6825 case MVT::i1:
6826 case MVT::i32: {
6827 const unsigned Offset = State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6828 // AIX integer arguments are always passed in register width.
6829 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6830 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6831 : CCValAssign::LocInfo::ZExt;
6832 if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6833 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6834 else
6835 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT: RegVT, HTP: LocInfo));
6836
6837 return false;
6838 }
6839 case MVT::f32:
6840 case MVT::f64: {
6841 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6842 const unsigned StoreSize = LocVT.getStoreSize();
6843 // Floats are always 4-byte aligned in the PSA on AIX.
6844 // This includes f64 in 64-bit mode for ABI compatibility.
6845 const unsigned Offset =
6846 State.AllocateStack(Size: IsPPC64 ? 8 : StoreSize, Alignment: Align(4));
6847 MCRegister FReg = State.AllocateReg(Regs: FPR);
6848 if (FReg)
6849 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: FReg, LocVT, HTP: LocInfo));
6850
6851 // Reserve and initialize GPRs or initialize the PSA as required.
6852 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6853 if (MCRegister Reg = State.AllocateReg(Regs: GPRs)) {
6854 assert(FReg && "An FPR should be available when a GPR is reserved.");
6855 if (State.isVarArg()) {
6856 // Successfully reserved GPRs are only initialized for vararg calls.
6857 // Custom handling is required for:
6858 // f64 in PPC32 needs to be split into 2 GPRs.
6859 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6860 State.addLoc(
6861 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6862 }
6863 } else {
6864 // If there are insufficient GPRs, the PSA needs to be initialized.
6865 // Initialization occurs even if an FPR was initialized for
6866 // compatibility with the AIX XL compiler. The full memory for the
6867 // argument will be initialized even if a prior word is saved in GPR.
6868 // A custom memLoc is used when the argument also passes in FPR so
6869 // that the callee handling can skip over it easily.
6870 State.addLoc(
6871 V: FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6872 HTP: LocInfo)
6873 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6874 break;
6875 }
6876 }
6877
6878 return false;
6879 }
6880 case MVT::v4f32:
6881 case MVT::v4i32:
6882 case MVT::v8i16:
6883 case MVT::v16i8:
6884 case MVT::v2i64:
6885 case MVT::v2f64:
6886 case MVT::v1i128: {
6887 const unsigned VecSize = 16;
6888 const Align VecAlign(VecSize);
6889
6890 if (!State.isVarArg()) {
6891 // If there are vector registers remaining we don't consume any stack
6892 // space.
6893 if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
6894 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
6895 return false;
6896 }
6897 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6898 // might be allocated in the portion of the PSA that is shadowed by the
6899 // GPRs.
6900 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6901 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6902 return false;
6903 }
6904
6905 unsigned NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6906 // Burn any underaligned registers and their shadowed stack space until
6907 // we reach the required alignment.
6908 while (NextRegIndex != GPRs.size() &&
6909 !isGPRShadowAligned(Reg: GPRs[NextRegIndex], RequiredAlign: VecAlign)) {
6910 // Shadow allocate register and its stack shadow.
6911 MCRegister Reg = State.AllocateReg(Regs: GPRs);
6912 State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6913 assert(Reg && "Allocating register unexpectedly failed.");
6914 (void)Reg;
6915 NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6916 }
6917
6918 // Vectors that are passed as fixed arguments are handled differently.
6919 // They are passed in VRs if any are available (unlike arguments passed
6920 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6921 // functions)
6922 if (!ArgFlags.isVarArg()) {
6923 if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
6924 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
6925 // Shadow allocate GPRs and stack space even though we pass in a VR.
6926 for (unsigned I = 0; I != VecSize; I += PtrSize)
6927 State.AllocateReg(Regs: GPRs);
6928 State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6929 return false;
6930 }
6931 // No vector registers remain so pass on the stack.
6932 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6933 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6934 return false;
6935 }
6936
6937 // If all GPRS are consumed then we pass the argument fully on the stack.
6938 if (NextRegIndex == GPRs.size()) {
6939 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6940 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6941 return false;
6942 }
6943
6944 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6945 // half of the argument, and then need to pass the remaining half on the
6946 // stack.
6947 if (GPRs[NextRegIndex] == PPC::R9) {
6948 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6949 State.addLoc(
6950 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6951
6952 const MCRegister FirstReg = State.AllocateReg(Reg: PPC::R9);
6953 const MCRegister SecondReg = State.AllocateReg(Reg: PPC::R10);
6954 assert(FirstReg && SecondReg &&
6955 "Allocating R9 or R10 unexpectedly failed.");
6956 State.addLoc(
6957 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: FirstReg, LocVT: RegVT, HTP: LocInfo));
6958 State.addLoc(
6959 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: SecondReg, LocVT: RegVT, HTP: LocInfo));
6960 return false;
6961 }
6962
6963 // We have enough GPRs to fully pass the vector argument, and we have
6964 // already consumed any underaligned registers. Start with the custom
6965 // MemLoc and then the custom RegLocs.
6966 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6967 State.addLoc(
6968 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6969 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6970 const MCRegister Reg = State.AllocateReg(Regs: GPRs);
6971 assert(Reg && "Failed to allocated register for vararg vector argument");
6972 State.addLoc(
6973 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6974 }
6975 return false;
6976 }
6977 }
6978 return true;
6979}
6980
6981// So far, this function is only used by LowerFormalArguments_AIX()
6982static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6983 bool IsPPC64,
6984 bool HasP8Vector,
6985 bool HasVSX) {
6986 assert((IsPPC64 || SVT != MVT::i64) &&
6987 "i64 should have been split for 32-bit codegen.");
6988
6989 switch (SVT) {
6990 default:
6991 report_fatal_error(reason: "Unexpected value type for formal argument");
6992 case MVT::i1:
6993 case MVT::i32:
6994 case MVT::i64:
6995 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6996 case MVT::f32:
6997 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6998 case MVT::f64:
6999 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7000 case MVT::v4f32:
7001 case MVT::v4i32:
7002 case MVT::v8i16:
7003 case MVT::v16i8:
7004 case MVT::v2i64:
7005 case MVT::v2f64:
7006 case MVT::v1i128:
7007 return &PPC::VRRCRegClass;
7008 }
7009}
7010
7011static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7012 SelectionDAG &DAG, SDValue ArgValue,
7013 MVT LocVT, const SDLoc &dl) {
7014 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7015 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7016
7017 if (Flags.isSExt())
7018 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: LocVT, N1: ArgValue,
7019 N2: DAG.getValueType(ValVT));
7020 else if (Flags.isZExt())
7021 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: LocVT, N1: ArgValue,
7022 N2: DAG.getValueType(ValVT));
7023
7024 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ValVT, Operand: ArgValue);
7025}
7026
7027static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7028 const unsigned LASize = FL->getLinkageSize();
7029
7030 if (PPC::GPRCRegClass.contains(Reg)) {
7031 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7032 "Reg must be a valid argument register!");
7033 return LASize + 4 * (Reg - PPC::R3);
7034 }
7035
7036 if (PPC::G8RCRegClass.contains(Reg)) {
7037 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7038 "Reg must be a valid argument register!");
7039 return LASize + 8 * (Reg - PPC::X3);
7040 }
7041
7042 llvm_unreachable("Only general purpose registers expected.");
7043}
7044
7045// AIX ABI Stack Frame Layout:
7046//
7047// Low Memory +--------------------------------------------+
7048// SP +---> | Back chain | ---+
7049// | +--------------------------------------------+ |
7050// | | Saved Condition Register | |
7051// | +--------------------------------------------+ |
7052// | | Saved Linkage Register | |
7053// | +--------------------------------------------+ | Linkage Area
7054// | | Reserved for compilers | |
7055// | +--------------------------------------------+ |
7056// | | Reserved for binders | |
7057// | +--------------------------------------------+ |
7058// | | Saved TOC pointer | ---+
7059// | +--------------------------------------------+
7060// | | Parameter save area |
7061// | +--------------------------------------------+
7062// | | Alloca space |
7063// | +--------------------------------------------+
7064// | | Local variable space |
7065// | +--------------------------------------------+
7066// | | Float/int conversion temporary |
7067// | +--------------------------------------------+
7068// | | Save area for AltiVec registers |
7069// | +--------------------------------------------+
7070// | | AltiVec alignment padding |
7071// | +--------------------------------------------+
7072// | | Save area for VRSAVE register |
7073// | +--------------------------------------------+
7074// | | Save area for General Purpose registers |
7075// | +--------------------------------------------+
7076// | | Save area for Floating Point registers |
7077// | +--------------------------------------------+
7078// +---- | Back chain |
7079// High Memory +--------------------------------------------+
7080//
7081// Specifications:
7082// AIX 7.2 Assembler Language Reference
7083// Subroutine linkage convention
7084
7085SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7086 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7087 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7088 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7089
7090 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7091 CallConv == CallingConv::Fast) &&
7092 "Unexpected calling convention!");
7093
7094 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7095 report_fatal_error(reason: "Tail call support is unimplemented on AIX.");
7096
7097 if (useSoftFloat())
7098 report_fatal_error(reason: "Soft float support is unimplemented on AIX.");
7099
7100 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7101
7102 const bool IsPPC64 = Subtarget.isPPC64();
7103 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7104
7105 // Assign locations to all of the incoming arguments.
7106 SmallVector<CCValAssign, 16> ArgLocs;
7107 MachineFunction &MF = DAG.getMachineFunction();
7108 MachineFrameInfo &MFI = MF.getFrameInfo();
7109 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7110 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7111
7112 const EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7113 // Reserve space for the linkage area on the stack.
7114 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7115 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(PtrByteSize));
7116 uint64_t SaveStackPos = CCInfo.getStackSize();
7117 bool SaveParams = MF.getFunction().hasFnAttribute(Kind: "save-reg-params");
7118 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_AIX);
7119
7120 SmallVector<SDValue, 8> MemOps;
7121
7122 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7123 CCValAssign &VA = ArgLocs[I++];
7124 MVT LocVT = VA.getLocVT();
7125 MVT ValVT = VA.getValVT();
7126 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7127
7128 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7129 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7130 // For compatibility with the AIX XL compiler, the float args in the
7131 // parameter save area are initialized even if the argument is available
7132 // in register. The caller is required to initialize both the register
7133 // and memory, however, the callee can choose to expect it in either.
7134 // The memloc is dismissed here because the argument is retrieved from
7135 // the register.
7136 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7137 continue;
7138
7139 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7140 const TargetRegisterClass *RegClass = getRegClassForSVT(
7141 SVT: LocVT.SimpleTy, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(), HasVSX: Subtarget.hasVSX());
7142 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7143 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7144 const Register VReg = MF.addLiveIn(PReg: VA.getLocReg(), RC: RegClass);
7145 SDValue Parm = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: SaveVT);
7146 int FI = MFI.CreateFixedObject(Size: SaveVT.getStoreSize(), SPOffset: SaveStackPos, IsImmutable: true);
7147 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7148 SDValue StoreReg = DAG.getStore(Chain, dl, Val: Parm, Ptr: FIN,
7149 PtrInfo: MachinePointerInfo(), Alignment: Align(PtrByteSize));
7150 SaveStackPos = alignTo(Value: SaveStackPos + SaveVT.getStoreSize(), Align: PtrByteSize);
7151 MemOps.push_back(Elt: StoreReg);
7152 }
7153
7154 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7155 unsigned StoreSize =
7156 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7157 SaveStackPos = alignTo(Value: SaveStackPos + StoreSize, Align: PtrByteSize);
7158 }
7159
7160 auto HandleMemLoc = [&]() {
7161 const unsigned LocSize = LocVT.getStoreSize();
7162 const unsigned ValSize = ValVT.getStoreSize();
7163 assert((ValSize <= LocSize) &&
7164 "Object size is larger than size of MemLoc");
7165 int CurArgOffset = VA.getLocMemOffset();
7166 // Objects are right-justified because AIX is big-endian.
7167 if (LocSize > ValSize)
7168 CurArgOffset += LocSize - ValSize;
7169 // Potential tail calls could cause overwriting of argument stack slots.
7170 const bool IsImmutable =
7171 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7172 (CallConv == CallingConv::Fast));
7173 int FI = MFI.CreateFixedObject(Size: ValSize, SPOffset: CurArgOffset, IsImmutable);
7174 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7175 SDValue ArgValue =
7176 DAG.getLoad(VT: ValVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo());
7177
7178 // While the ABI specifies the argument type is (sign or zero) extended
7179 // out to register width, not all code is compliant. We truncate and
7180 // re-extend to be more forgiving of these callers when the argument type
7181 // is smaller than register width.
7182 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7183 ValVT.isInteger() &&
7184 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7185 // It is possible to have either real integer values
7186 // or integers that were not originally integers.
7187 // In the latter case, these could have came from structs,
7188 // and these integers would not have an extend on the parameter.
7189 // Since these types of integers do not have an extend specified
7190 // in the first place, the type of extend that we do should not matter.
7191 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7192 ? MVT::i8
7193 : ArgVT;
7194 SDValue ArgValueTrunc =
7195 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncatedArgVT, Operand: ArgValue);
7196 SDValue ArgValueExt =
7197 ArgSignExt ? DAG.getSExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT)
7198 : DAG.getZExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT);
7199 InVals.push_back(Elt: ArgValueExt);
7200 } else {
7201 InVals.push_back(Elt: ArgValue);
7202 }
7203 };
7204
7205 // Vector arguments to VaArg functions are passed both on the stack, and
7206 // in any available GPRs. Load the value from the stack and add the GPRs
7207 // as live ins.
7208 if (VA.isMemLoc() && VA.needsCustom()) {
7209 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7210 assert(isVarArg && "Only use custom memloc for vararg.");
7211 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7212 // matching custom RegLocs.
7213 const unsigned OriginalValNo = VA.getValNo();
7214 (void)OriginalValNo;
7215
7216 auto HandleCustomVecRegLoc = [&]() {
7217 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7218 "Missing custom RegLoc.");
7219 VA = ArgLocs[I++];
7220 assert(VA.getValVT().isVector() &&
7221 "Unexpected Val type for custom RegLoc.");
7222 assert(VA.getValNo() == OriginalValNo &&
7223 "ValNo mismatch between custom MemLoc and RegLoc.");
7224 MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7225 MF.addLiveIn(PReg: VA.getLocReg(),
7226 RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7227 HasVSX: Subtarget.hasVSX()));
7228 };
7229
7230 HandleMemLoc();
7231 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7232 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7233 // R10.
7234 HandleCustomVecRegLoc();
7235 HandleCustomVecRegLoc();
7236
7237 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7238 // we passed the vector in R5, R6, R7 and R8.
7239 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7240 assert(!IsPPC64 &&
7241 "Only 2 custom RegLocs expected for 64-bit codegen.");
7242 HandleCustomVecRegLoc();
7243 HandleCustomVecRegLoc();
7244 }
7245
7246 continue;
7247 }
7248
7249 if (VA.isRegLoc()) {
7250 if (VA.getValVT().isScalarInteger())
7251 FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7252 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7253 switch (VA.getValVT().SimpleTy) {
7254 default:
7255 report_fatal_error(reason: "Unhandled value type for argument.");
7256 case MVT::f32:
7257 FuncInfo->appendParameterType(Type: PPCFunctionInfo::ShortFloatingPoint);
7258 break;
7259 case MVT::f64:
7260 FuncInfo->appendParameterType(Type: PPCFunctionInfo::LongFloatingPoint);
7261 break;
7262 }
7263 } else if (VA.getValVT().isVector()) {
7264 switch (VA.getValVT().SimpleTy) {
7265 default:
7266 report_fatal_error(reason: "Unhandled value type for argument.");
7267 case MVT::v16i8:
7268 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorChar);
7269 break;
7270 case MVT::v8i16:
7271 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorShort);
7272 break;
7273 case MVT::v4i32:
7274 case MVT::v2i64:
7275 case MVT::v1i128:
7276 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorInt);
7277 break;
7278 case MVT::v4f32:
7279 case MVT::v2f64:
7280 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorFloat);
7281 break;
7282 }
7283 }
7284 }
7285
7286 if (Flags.isByVal() && VA.isMemLoc()) {
7287 const unsigned Size =
7288 alignTo(Value: Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7289 Align: PtrByteSize);
7290 const int FI = MF.getFrameInfo().CreateFixedObject(
7291 Size, SPOffset: VA.getLocMemOffset(), /* IsImmutable */ false,
7292 /* IsAliased */ isAliased: true);
7293 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7294 InVals.push_back(Elt: FIN);
7295
7296 continue;
7297 }
7298
7299 if (Flags.isByVal()) {
7300 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7301
7302 const MCPhysReg ArgReg = VA.getLocReg();
7303 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7304
7305 const unsigned StackSize = alignTo(Value: Flags.getByValSize(), Align: PtrByteSize);
7306 const int FI = MF.getFrameInfo().CreateFixedObject(
7307 Size: StackSize, SPOffset: mapArgRegToOffsetAIX(Reg: ArgReg, FL), /* IsImmutable */ false,
7308 /* IsAliased */ isAliased: true);
7309 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7310 InVals.push_back(Elt: FIN);
7311
7312 // Add live ins for all the RegLocs for the same ByVal.
7313 const TargetRegisterClass *RegClass =
7314 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7315
7316 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7317 unsigned Offset) {
7318 const Register VReg = MF.addLiveIn(PReg: PhysReg, RC: RegClass);
7319 // Since the callers side has left justified the aggregate in the
7320 // register, we can simply store the entire register into the stack
7321 // slot.
7322 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7323 // The store to the fixedstack object is needed becuase accessing a
7324 // field of the ByVal will use a gep and load. Ideally we will optimize
7325 // to extracting the value from the register directly, and elide the
7326 // stores when the arguments address is not taken, but that will need to
7327 // be future work.
7328 SDValue Store = DAG.getStore(
7329 Chain: CopyFrom.getValue(R: 1), dl, Val: CopyFrom,
7330 Ptr: DAG.getObjectPtrOffset(SL: dl, Ptr: FIN, Offset: TypeSize::getFixed(ExactSize: Offset)),
7331 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset));
7332
7333 MemOps.push_back(Elt: Store);
7334 };
7335
7336 unsigned Offset = 0;
7337 HandleRegLoc(VA.getLocReg(), Offset);
7338 Offset += PtrByteSize;
7339 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7340 Offset += PtrByteSize) {
7341 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342 "RegLocs should be for ByVal argument.");
7343
7344 const CCValAssign RL = ArgLocs[I++];
7345 HandleRegLoc(RL.getLocReg(), Offset);
7346 FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7347 }
7348
7349 if (Offset != StackSize) {
7350 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7351 "Expected MemLoc for remaining bytes.");
7352 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7353 // Consume the MemLoc.The InVal has already been emitted, so nothing
7354 // more needs to be done.
7355 ++I;
7356 }
7357
7358 continue;
7359 }
7360
7361 if (VA.isRegLoc() && !VA.needsCustom()) {
7362 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7363 Register VReg =
7364 MF.addLiveIn(PReg: VA.getLocReg(),
7365 RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7366 HasVSX: Subtarget.hasVSX()));
7367 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7368 if (ValVT.isScalarInteger() &&
7369 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7370 ArgValue =
7371 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7372 }
7373 InVals.push_back(Elt: ArgValue);
7374 continue;
7375 }
7376 if (VA.isMemLoc()) {
7377 HandleMemLoc();
7378 continue;
7379 }
7380 }
7381
7382 // On AIX a minimum of 8 words is saved to the parameter save area.
7383 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7384 // Area that is at least reserved in the caller of this function.
7385 unsigned CallerReservedArea = std::max<unsigned>(
7386 a: CCInfo.getStackSize(), b: LinkageSize + MinParameterSaveArea);
7387
7388 // Set the size that is at least reserved in caller of this function. Tail
7389 // call optimized function's reserved stack space needs to be aligned so
7390 // that taking the difference between two stack areas will result in an
7391 // aligned stack.
7392 CallerReservedArea =
7393 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: CallerReservedArea);
7394 FuncInfo->setMinReservedArea(CallerReservedArea);
7395
7396 if (isVarArg) {
7397 int VAListIndex = 0;
7398 // If any of the optional arguments are passed in register then the fixed
7399 // stack object we spill into is not immutable. Create a fixed stack object
7400 // that overlaps the remainder of the parameter save area.
7401 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7402 unsigned FixedStackSize =
7403 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7404 VAListIndex =
7405 MFI.CreateFixedObject(Size: FixedStackSize, SPOffset: CCInfo.getStackSize(),
7406 /* IsImmutable */ false, /* IsAliased */ isAliased: true);
7407 } else {
7408 // All the arguments passed through ellipses are on the stack. Create a
7409 // dummy fixed stack object the same size as a pointer since we don't
7410 // know the actual size.
7411 VAListIndex =
7412 MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: CCInfo.getStackSize(),
7413 /* IsImmutable */ true, /* IsAliased */ isAliased: true);
7414 }
7415
7416 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7417 SDValue FIN = DAG.getFrameIndex(FI: VAListIndex, VT: PtrVT);
7418
7419 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7420 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7421
7422 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7423 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7424 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7425
7426 // The fixed integer arguments of a variadic function are stored to the
7427 // VarArgsFrameIndex on the stack so that they may be loaded by
7428 // dereferencing the result of va_next.
7429 for (unsigned
7430 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7431 Offset = 0;
7432 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7433
7434 const Register VReg =
7435 IsPPC64 ? MF.addLiveIn(PReg: GPR_64[GPRIndex], RC: &PPC::G8RCRegClass)
7436 : MF.addLiveIn(PReg: GPR_32[GPRIndex], RC: &PPC::GPRCRegClass);
7437
7438 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
7439 MachinePointerInfo MPI =
7440 MachinePointerInfo::getFixedStack(MF, FI: VAListIndex, Offset);
7441 SDValue Store = DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MPI);
7442 MemOps.push_back(Elt: Store);
7443 // Increment the address for the next argument to store.
7444 SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
7445 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
7446 }
7447 }
7448
7449 if (!MemOps.empty())
7450 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
7451
7452 return Chain;
7453}
7454
7455SDValue PPCTargetLowering::LowerCall_AIX(
7456 SDValue Chain, SDValue Callee, CallFlags CFlags,
7457 const SmallVectorImpl<ISD::OutputArg> &Outs,
7458 const SmallVectorImpl<SDValue> &OutVals,
7459 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7460 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7461 const CallBase *CB) const {
7462 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7463 // AIX ABI stack frame layout.
7464
7465 assert((CFlags.CallConv == CallingConv::C ||
7466 CFlags.CallConv == CallingConv::Cold ||
7467 CFlags.CallConv == CallingConv::Fast) &&
7468 "Unexpected calling convention!");
7469
7470 if (CFlags.IsPatchPoint)
7471 report_fatal_error(reason: "This call type is unimplemented on AIX.");
7472
7473 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7474
7475 MachineFunction &MF = DAG.getMachineFunction();
7476 SmallVector<CCValAssign, 16> ArgLocs;
7477 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7478 *DAG.getContext());
7479
7480 // Reserve space for the linkage save area (LSA) on the stack.
7481 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7482 // [SP][CR][LR][2 x reserved][TOC].
7483 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7484 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7485 const bool IsPPC64 = Subtarget.isPPC64();
7486 const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7487 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7488 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(PtrByteSize));
7489 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_AIX);
7490
7491 // The prolog code of the callee may store up to 8 GPR argument registers to
7492 // the stack, allowing va_start to index over them in memory if the callee
7493 // is variadic.
7494 // Because we cannot tell if this is needed on the caller side, we have to
7495 // conservatively assume that it is needed. As such, make sure we have at
7496 // least enough stack space for the caller to store the 8 GPRs.
7497 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7498 const unsigned NumBytes = std::max<unsigned>(
7499 a: LinkageSize + MinParameterSaveAreaSize, b: CCInfo.getStackSize());
7500
7501 // Adjust the stack pointer for the new arguments...
7502 // These operations are automatically eliminated by the prolog/epilog pass.
7503 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
7504 SDValue CallSeqStart = Chain;
7505
7506 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7507 SmallVector<SDValue, 8> MemOpChains;
7508
7509 // Set up a copy of the stack pointer for loading and storing any
7510 // arguments that may not fit in the registers available for argument
7511 // passing.
7512 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(Reg: PPC::X1, VT: MVT::i64)
7513 : DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
7514
7515 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7516 const unsigned ValNo = ArgLocs[I].getValNo();
7517 SDValue Arg = OutVals[ValNo];
7518 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7519
7520 if (Flags.isByVal()) {
7521 const unsigned ByValSize = Flags.getByValSize();
7522
7523 // Nothing to do for zero-sized ByVals on the caller side.
7524 if (!ByValSize) {
7525 ++I;
7526 continue;
7527 }
7528
7529 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7530 return DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: PtrVT, Chain,
7531 Ptr: (LoadOffset != 0)
7532 ? DAG.getObjectPtrOffset(
7533 SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7534 : Arg,
7535 PtrInfo: MachinePointerInfo(), MemVT: VT);
7536 };
7537
7538 unsigned LoadOffset = 0;
7539
7540 // Initialize registers, which are fully occupied by the by-val argument.
7541 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7542 SDValue Load = GetLoad(PtrVT, LoadOffset);
7543 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7544 LoadOffset += PtrByteSize;
7545 const CCValAssign &ByValVA = ArgLocs[I++];
7546 assert(ByValVA.getValNo() == ValNo &&
7547 "Unexpected location for pass-by-value argument.");
7548 RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: Load));
7549 }
7550
7551 if (LoadOffset == ByValSize)
7552 continue;
7553
7554 // There must be one more loc to handle the remainder.
7555 assert(ArgLocs[I].getValNo() == ValNo &&
7556 "Expected additional location for by-value argument.");
7557
7558 if (ArgLocs[I].isMemLoc()) {
7559 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7560 const CCValAssign &ByValVA = ArgLocs[I++];
7561 ISD::ArgFlagsTy MemcpyFlags = Flags;
7562 // Only memcpy the bytes that don't pass in register.
7563 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7564 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7565 Arg: (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7566 SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7567 : Arg,
7568 PtrOff: DAG.getObjectPtrOffset(
7569 SL: dl, Ptr: StackPtr, Offset: TypeSize::getFixed(ExactSize: ByValVA.getLocMemOffset())),
7570 CallSeqStart, Flags: MemcpyFlags, DAG, dl);
7571 continue;
7572 }
7573
7574 // Initialize the final register residue.
7575 // Any residue that occupies the final by-val arg register must be
7576 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7577 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7578 // 2 and 1 byte loads.
7579 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7580 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7581 "Unexpected register residue for by-value argument.");
7582 SDValue ResidueVal;
7583 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7584 const unsigned N = llvm::bit_floor(Value: ResidueBytes - Bytes);
7585 const MVT VT =
7586 N == 1 ? MVT::i8
7587 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7588 SDValue Load = GetLoad(VT, LoadOffset);
7589 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7590 LoadOffset += N;
7591 Bytes += N;
7592
7593 // By-val arguments are passed left-justfied in register.
7594 // Every load here needs to be shifted, otherwise a full register load
7595 // should have been used.
7596 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7597 "Unexpected load emitted during handling of pass-by-value "
7598 "argument.");
7599 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7600 EVT ShiftAmountTy =
7601 getShiftAmountTy(LHSTy: Load->getValueType(ResNo: 0), DL: DAG.getDataLayout());
7602 SDValue SHLAmt = DAG.getConstant(Val: NumSHLBits, DL: dl, VT: ShiftAmountTy);
7603 SDValue ShiftedLoad =
7604 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: Load.getValueType(), N1: Load, N2: SHLAmt);
7605 ResidueVal = ResidueVal ? DAG.getNode(Opcode: ISD::OR, DL: dl, VT: PtrVT, N1: ResidueVal,
7606 N2: ShiftedLoad)
7607 : ShiftedLoad;
7608 }
7609
7610 const CCValAssign &ByValVA = ArgLocs[I++];
7611 RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: ResidueVal));
7612 continue;
7613 }
7614
7615 CCValAssign &VA = ArgLocs[I++];
7616 const MVT LocVT = VA.getLocVT();
7617 const MVT ValVT = VA.getValVT();
7618
7619 switch (VA.getLocInfo()) {
7620 default:
7621 report_fatal_error(reason: "Unexpected argument extension type.");
7622 case CCValAssign::Full:
7623 break;
7624 case CCValAssign::ZExt:
7625 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7626 break;
7627 case CCValAssign::SExt:
7628 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7629 break;
7630 }
7631
7632 if (VA.isRegLoc() && !VA.needsCustom()) {
7633 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
7634 continue;
7635 }
7636
7637 // Vector arguments passed to VarArg functions need custom handling when
7638 // they are passed (at least partially) in GPRs.
7639 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7640 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7641 // Store value to its stack slot.
7642 SDValue PtrOff =
7643 DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7644 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7645 SDValue Store =
7646 DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
7647 MemOpChains.push_back(Elt: Store);
7648 const unsigned OriginalValNo = VA.getValNo();
7649 // Then load the GPRs from the stack
7650 unsigned LoadOffset = 0;
7651 auto HandleCustomVecRegLoc = [&]() {
7652 assert(I != E && "Unexpected end of CCvalAssigns.");
7653 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7654 "Expected custom RegLoc.");
7655 CCValAssign RegVA = ArgLocs[I++];
7656 assert(RegVA.getValNo() == OriginalValNo &&
7657 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7658 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
7659 N2: DAG.getConstant(Val: LoadOffset, DL: dl, VT: PtrVT));
7660 SDValue Load = DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Add, PtrInfo: MachinePointerInfo());
7661 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7662 RegsToPass.push_back(Elt: std::make_pair(x: RegVA.getLocReg(), y&: Load));
7663 LoadOffset += PtrByteSize;
7664 };
7665
7666 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7667 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7668 // R10.
7669 HandleCustomVecRegLoc();
7670 HandleCustomVecRegLoc();
7671
7672 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7673 ArgLocs[I].getValNo() == OriginalValNo) {
7674 assert(!IsPPC64 &&
7675 "Only 2 custom RegLocs expected for 64-bit codegen.");
7676 HandleCustomVecRegLoc();
7677 HandleCustomVecRegLoc();
7678 }
7679
7680 continue;
7681 }
7682
7683 if (VA.isMemLoc()) {
7684 SDValue PtrOff =
7685 DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7686 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7687 MemOpChains.push_back(
7688 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff,
7689 PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset()),
7690 Alignment: Subtarget.getFrameLowering()->getStackAlign()));
7691
7692 continue;
7693 }
7694
7695 if (!ValVT.isFloatingPoint())
7696 report_fatal_error(
7697 reason: "Unexpected register handling for calling convention.");
7698
7699 // Custom handling is used for GPR initializations for vararg float
7700 // arguments.
7701 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7702 LocVT.isInteger() &&
7703 "Custom register handling only expected for VarArg.");
7704
7705 SDValue ArgAsInt =
7706 DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: ValVT.getSizeInBits()), V: Arg);
7707
7708 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7709 // f32 in 32-bit GPR
7710 // f64 in 64-bit GPR
7711 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgAsInt));
7712 else if (Arg.getValueType().getFixedSizeInBits() <
7713 LocVT.getFixedSizeInBits())
7714 // f32 in 64-bit GPR.
7715 RegsToPass.push_back(Elt: std::make_pair(
7716 x: VA.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: LocVT)));
7717 else {
7718 // f64 in two 32-bit GPRs
7719 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7720 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7721 "Unexpected custom register for argument!");
7722 CCValAssign &GPR1 = VA;
7723 SDValue MSWAsI64 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgAsInt,
7724 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i8));
7725 RegsToPass.push_back(Elt: std::make_pair(
7726 x: GPR1.getLocReg(), y: DAG.getZExtOrTrunc(Op: MSWAsI64, DL: dl, VT: MVT::i32)));
7727
7728 if (I != E) {
7729 // If only 1 GPR was available, there will only be one custom GPR and
7730 // the argument will also pass in memory.
7731 CCValAssign &PeekArg = ArgLocs[I];
7732 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7733 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7734 CCValAssign &GPR2 = ArgLocs[I++];
7735 RegsToPass.push_back(Elt: std::make_pair(
7736 x: GPR2.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: MVT::i32)));
7737 }
7738 }
7739 }
7740 }
7741
7742 if (!MemOpChains.empty())
7743 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
7744
7745 // For indirect calls, we need to save the TOC base to the stack for
7746 // restoration after the call.
7747 if (CFlags.IsIndirect) {
7748 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7749 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7750 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7751 const MVT PtrVT = Subtarget.getScalarIntVT();
7752 const unsigned TOCSaveOffset =
7753 Subtarget.getFrameLowering()->getTOCSaveOffset();
7754
7755 setUsesTOCBasePtr(DAG);
7756 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: TOCBaseReg, VT: PtrVT);
7757 SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
7758 SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: PtrVT);
7759 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7760 Chain = DAG.getStore(
7761 Chain: Val.getValue(R: 1), dl, Val, Ptr: AddPtr,
7762 PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
7763 }
7764
7765 // Build a sequence of copy-to-reg nodes chained together with token chain
7766 // and flag operands which copy the outgoing args into the appropriate regs.
7767 SDValue InGlue;
7768 for (auto Reg : RegsToPass) {
7769 Chain = DAG.getCopyToReg(Chain, dl, Reg: Reg.first, N: Reg.second, Glue: InGlue);
7770 InGlue = Chain.getValue(R: 1);
7771 }
7772
7773 const int SPDiff = 0;
7774 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
7775 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7776}
7777
7778bool
7779PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7780 MachineFunction &MF, bool isVarArg,
7781 const SmallVectorImpl<ISD::OutputArg> &Outs,
7782 LLVMContext &Context,
7783 const Type *RetTy) const {
7784 SmallVector<CCValAssign, 16> RVLocs;
7785 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7786 return CCInfo.CheckReturn(
7787 Outs, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7788 ? RetCC_PPC_Cold
7789 : RetCC_PPC);
7790}
7791
7792SDValue
7793PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7794 bool isVarArg,
7795 const SmallVectorImpl<ISD::OutputArg> &Outs,
7796 const SmallVectorImpl<SDValue> &OutVals,
7797 const SDLoc &dl, SelectionDAG &DAG) const {
7798 SmallVector<CCValAssign, 16> RVLocs;
7799 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7800 *DAG.getContext());
7801 CCInfo.AnalyzeReturn(Outs,
7802 Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7803 ? RetCC_PPC_Cold
7804 : RetCC_PPC);
7805
7806 SDValue Glue;
7807 SmallVector<SDValue, 4> RetOps(1, Chain);
7808
7809 // Copy the result values into the output registers.
7810 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7811 CCValAssign &VA = RVLocs[i];
7812 assert(VA.isRegLoc() && "Can only return in registers!");
7813
7814 SDValue Arg = OutVals[RealResIdx];
7815
7816 switch (VA.getLocInfo()) {
7817 default: llvm_unreachable("Unknown loc info!");
7818 case CCValAssign::Full: break;
7819 case CCValAssign::AExt:
7820 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7821 break;
7822 case CCValAssign::ZExt:
7823 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7824 break;
7825 case CCValAssign::SExt:
7826 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7827 break;
7828 }
7829 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7830 bool isLittleEndian = Subtarget.isLittleEndian();
7831 // Legalize ret f64 -> ret 2 x i32.
7832 SDValue SVal =
7833 DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7834 N2: DAG.getIntPtrConstant(Val: isLittleEndian ? 0 : 1, DL: dl));
7835 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7836 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7837 SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7838 N2: DAG.getIntPtrConstant(Val: isLittleEndian ? 1 : 0, DL: dl));
7839 Glue = Chain.getValue(R: 1);
7840 VA = RVLocs[++i]; // skip ahead to next loc
7841 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7842 } else
7843 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Arg, Glue);
7844 Glue = Chain.getValue(R: 1);
7845 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7846 }
7847
7848 RetOps[0] = Chain; // Update chain.
7849
7850 // Add the glue if we have it.
7851 if (Glue.getNode())
7852 RetOps.push_back(Elt: Glue);
7853
7854 return DAG.getNode(Opcode: PPCISD::RET_GLUE, DL: dl, VT: MVT::Other, Ops: RetOps);
7855}
7856
7857SDValue
7858PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7859 SelectionDAG &DAG) const {
7860 SDLoc dl(Op);
7861
7862 // Get the correct type for integers.
7863 EVT IntVT = Op.getValueType();
7864
7865 // Get the inputs.
7866 SDValue Chain = Op.getOperand(i: 0);
7867 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7868 // Build a DYNAREAOFFSET node.
7869 SDValue Ops[2] = {Chain, FPSIdx};
7870 SDVTList VTs = DAG.getVTList(VT: IntVT);
7871 return DAG.getNode(Opcode: PPCISD::DYNAREAOFFSET, DL: dl, VTList: VTs, Ops);
7872}
7873
7874SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7875 SelectionDAG &DAG) const {
7876 // When we pop the dynamic allocation we need to restore the SP link.
7877 SDLoc dl(Op);
7878
7879 // Get the correct type for pointers.
7880 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7881
7882 // Construct the stack pointer operand.
7883 bool isPPC64 = Subtarget.isPPC64();
7884 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7885 SDValue StackPtr = DAG.getRegister(Reg: SP, VT: PtrVT);
7886
7887 // Get the operands for the STACKRESTORE.
7888 SDValue Chain = Op.getOperand(i: 0);
7889 SDValue SaveSP = Op.getOperand(i: 1);
7890
7891 // Load the old link SP.
7892 SDValue LoadLinkSP =
7893 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: StackPtr, PtrInfo: MachinePointerInfo());
7894
7895 // Restore the stack pointer.
7896 Chain = DAG.getCopyToReg(Chain: LoadLinkSP.getValue(R: 1), dl, Reg: SP, N: SaveSP);
7897
7898 // Store the old link SP.
7899 return DAG.getStore(Chain, dl, Val: LoadLinkSP, Ptr: StackPtr, PtrInfo: MachinePointerInfo());
7900}
7901
7902SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7903 MachineFunction &MF = DAG.getMachineFunction();
7904 bool isPPC64 = Subtarget.isPPC64();
7905 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7906
7907 // Get current frame pointer save index. The users of this index will be
7908 // primarily DYNALLOC instructions.
7909 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7910 int RASI = FI->getReturnAddrSaveIndex();
7911
7912 // If the frame pointer save index hasn't been defined yet.
7913 if (!RASI) {
7914 // Find out what the fix offset of the frame pointer save area.
7915 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7916 // Allocate the frame index for frame pointer save area.
7917 RASI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: LROffset, IsImmutable: false);
7918 // Save the result.
7919 FI->setReturnAddrSaveIndex(RASI);
7920 }
7921 return DAG.getFrameIndex(FI: RASI, VT: PtrVT);
7922}
7923
7924SDValue
7925PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7926 MachineFunction &MF = DAG.getMachineFunction();
7927 bool isPPC64 = Subtarget.isPPC64();
7928 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7929
7930 // Get current frame pointer save index. The users of this index will be
7931 // primarily DYNALLOC instructions.
7932 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7933 int FPSI = FI->getFramePointerSaveIndex();
7934
7935 // If the frame pointer save index hasn't been defined yet.
7936 if (!FPSI) {
7937 // Find out what the fix offset of the frame pointer save area.
7938 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7939 // Allocate the frame index for frame pointer save area.
7940 FPSI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: FPOffset, IsImmutable: true);
7941 // Save the result.
7942 FI->setFramePointerSaveIndex(FPSI);
7943 }
7944 return DAG.getFrameIndex(FI: FPSI, VT: PtrVT);
7945}
7946
7947SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 MachineFunction &MF = DAG.getMachineFunction();
7950 // Get the inputs.
7951 SDValue Chain = Op.getOperand(i: 0);
7952 SDValue Size = Op.getOperand(i: 1);
7953 SDLoc dl(Op);
7954
7955 // Get the correct type for pointers.
7956 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7957 // Negate the size.
7958 SDValue NegSize = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: PtrVT,
7959 N1: DAG.getConstant(Val: 0, DL: dl, VT: PtrVT), N2: Size);
7960 // Construct a node for the frame pointer save index.
7961 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7962 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7963 SDVTList VTs = DAG.getVTList(VT1: PtrVT, VT2: MVT::Other);
7964 if (hasInlineStackProbe(MF))
7965 return DAG.getNode(Opcode: PPCISD::PROBED_ALLOCA, DL: dl, VTList: VTs, Ops);
7966 return DAG.getNode(Opcode: PPCISD::DYNALLOC, DL: dl, VTList: VTs, Ops);
7967}
7968
7969SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7970 SelectionDAG &DAG) const {
7971 MachineFunction &MF = DAG.getMachineFunction();
7972
7973 bool isPPC64 = Subtarget.isPPC64();
7974 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7975
7976 int FI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64 ? 8 : 4, SPOffset: 0, IsImmutable: false);
7977 return DAG.getFrameIndex(FI, VT: PtrVT);
7978}
7979
7980SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7981 SelectionDAG &DAG) const {
7982 SDLoc DL(Op);
7983 return DAG.getNode(Opcode: PPCISD::EH_SJLJ_SETJMP, DL,
7984 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
7985 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
7986}
7987
7988SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7989 SelectionDAG &DAG) const {
7990 SDLoc DL(Op);
7991 return DAG.getNode(Opcode: PPCISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other,
7992 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
7993}
7994
7995SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7996 if (Op.getValueType().isVector())
7997 return LowerVectorLoad(Op, DAG);
7998
7999 assert(Op.getValueType() == MVT::i1 &&
8000 "Custom lowering only for i1 loads");
8001
8002 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8003
8004 SDLoc dl(Op);
8005 LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
8006
8007 SDValue Chain = LD->getChain();
8008 SDValue BasePtr = LD->getBasePtr();
8009 MachineMemOperand *MMO = LD->getMemOperand();
8010
8011 SDValue NewLD =
8012 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: getPointerTy(DL: DAG.getDataLayout()), Chain,
8013 Ptr: BasePtr, MemVT: MVT::i8, MMO);
8014 SDValue Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewLD);
8015
8016 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8017 return DAG.getMergeValues(Ops, dl);
8018}
8019
8020SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8021 if (Op.getOperand(i: 1).getValueType().isVector())
8022 return LowerVectorStore(Op, DAG);
8023
8024 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8025 "Custom lowering only for i1 stores");
8026
8027 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8028
8029 SDLoc dl(Op);
8030 StoreSDNode *ST = cast<StoreSDNode>(Val&: Op);
8031
8032 SDValue Chain = ST->getChain();
8033 SDValue BasePtr = ST->getBasePtr();
8034 SDValue Value = ST->getValue();
8035 MachineMemOperand *MMO = ST->getMemOperand();
8036
8037 Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
8038 Operand: Value);
8039 return DAG.getTruncStore(Chain, dl, Val: Value, Ptr: BasePtr, SVT: MVT::i8, MMO);
8040}
8041
8042// FIXME: Remove this once the ANDI glue bug is fixed:
8043SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8044 assert(Op.getValueType() == MVT::i1 &&
8045 "Custom lowering only for i1 results");
8046
8047 SDLoc DL(Op);
8048 return DAG.getNode(Opcode: PPCISD::ANDI_rec_1_GT_BIT, DL, VT: MVT::i1, Operand: Op.getOperand(i: 0));
8049}
8050
8051SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8052 SelectionDAG &DAG) const {
8053
8054 // Implements a vector truncate that fits in a vector register as a shuffle.
8055 // We want to legalize vector truncates down to where the source fits in
8056 // a vector register (and target is therefore smaller than vector register
8057 // size). At that point legalization will try to custom lower the sub-legal
8058 // result and get here - where we can contain the truncate as a single target
8059 // operation.
8060
8061 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8062 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8063 //
8064 // We will implement it for big-endian ordering as this (where x denotes
8065 // undefined):
8066 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8067 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8068 //
8069 // The same operation in little-endian ordering will be:
8070 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8071 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8072
8073 EVT TrgVT = Op.getValueType();
8074 assert(TrgVT.isVector() && "Vector type expected.");
8075 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8076 EVT EltVT = TrgVT.getVectorElementType();
8077 if (!isOperationCustom(Op: Op.getOpcode(), VT: TrgVT) ||
8078 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(Value: TrgNumElts) ||
8079 !llvm::has_single_bit<uint32_t>(Value: EltVT.getSizeInBits()))
8080 return SDValue();
8081
8082 SDValue N1 = Op.getOperand(i: 0);
8083 EVT SrcVT = N1.getValueType();
8084 unsigned SrcSize = SrcVT.getSizeInBits();
8085 if (SrcSize > 256 || !isPowerOf2_32(Value: SrcVT.getVectorNumElements()) ||
8086 !llvm::has_single_bit<uint32_t>(
8087 Value: SrcVT.getVectorElementType().getSizeInBits()))
8088 return SDValue();
8089 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8090 return SDValue();
8091
8092 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8093 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8094
8095 SDLoc DL(Op);
8096 SDValue Op1, Op2;
8097 if (SrcSize == 256) {
8098 EVT VecIdxTy = getVectorIdxTy(DL: DAG.getDataLayout());
8099 EVT SplitVT =
8100 N1.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
8101 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8102 Op1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8103 N2: DAG.getConstant(Val: 0, DL, VT: VecIdxTy));
8104 Op2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8105 N2: DAG.getConstant(Val: SplitNumElts, DL, VT: VecIdxTy));
8106 }
8107 else {
8108 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, Vec: N1, dl: DL);
8109 Op2 = DAG.getUNDEF(VT: WideVT);
8110 }
8111
8112 // First list the elements we want to keep.
8113 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8114 SmallVector<int, 16> ShuffV;
8115 if (Subtarget.isLittleEndian())
8116 for (unsigned i = 0; i < TrgNumElts; ++i)
8117 ShuffV.push_back(Elt: i * SizeMult);
8118 else
8119 for (unsigned i = 1; i <= TrgNumElts; ++i)
8120 ShuffV.push_back(Elt: i * SizeMult - 1);
8121
8122 // Populate the remaining elements with undefs.
8123 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8124 // ShuffV.push_back(i + WideNumElts);
8125 ShuffV.push_back(Elt: WideNumElts + 1);
8126
8127 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op1);
8128 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op2);
8129 return DAG.getVectorShuffle(VT: WideVT, dl: DL, N1: Op1, N2: Op2, Mask: ShuffV);
8130}
8131
8132/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8133/// possible.
8134SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8135 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get();
8136 EVT ResVT = Op.getValueType();
8137 EVT CmpVT = Op.getOperand(i: 0).getValueType();
8138 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
8139 SDValue TV = Op.getOperand(i: 2), FV = Op.getOperand(i: 3);
8140 SDLoc dl(Op);
8141
8142 // Without power9-vector, we don't have native instruction for f128 comparison.
8143 // Following transformation to libcall is needed for setcc:
8144 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8145 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8146 SDValue Z = DAG.getSetCC(
8147 DL: dl, VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: CmpVT),
8148 LHS, RHS, Cond: CC);
8149 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: Z.getValueType());
8150 return DAG.getSelectCC(DL: dl, LHS: Z, RHS: Zero, True: TV, False: FV, Cond: ISD::SETNE);
8151 }
8152
8153 // Not FP, or using SPE? Not a fsel.
8154 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8155 Subtarget.hasSPE())
8156 return Op;
8157
8158 SDNodeFlags Flags = Op.getNode()->getFlags();
8159
8160 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8161 // presence of infinities.
8162 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8163 switch (CC) {
8164 default:
8165 break;
8166 case ISD::SETOGT:
8167 case ISD::SETGT:
8168 return DAG.getNode(Opcode: PPCISD::XSMAXC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8169 case ISD::SETOLT:
8170 case ISD::SETLT:
8171 return DAG.getNode(Opcode: PPCISD::XSMINC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8172 }
8173 }
8174
8175 // We might be able to do better than this under some circumstances, but in
8176 // general, fsel-based lowering of select is a finite-math-only optimization.
8177 // For more information, see section F.3 of the 2.06 ISA specification.
8178 // With ISA 3.0
8179 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8180 return Op;
8181
8182 // If the RHS of the comparison is a 0.0, we don't need to do the
8183 // subtraction at all.
8184 SDValue Sel1;
8185 if (isFloatingPointZero(Op: RHS))
8186 switch (CC) {
8187 default: break; // SETUO etc aren't handled by fsel.
8188 case ISD::SETNE:
8189 std::swap(a&: TV, b&: FV);
8190 [[fallthrough]];
8191 case ISD::SETEQ:
8192 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8193 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8194 Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8195 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8196 Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8197 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8198 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: Sel1, N3: FV);
8199 case ISD::SETULT:
8200 case ISD::SETLT:
8201 std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8202 [[fallthrough]];
8203 case ISD::SETOGE:
8204 case ISD::SETGE:
8205 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8206 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8207 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8208 case ISD::SETUGT:
8209 case ISD::SETGT:
8210 std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8211 [[fallthrough]];
8212 case ISD::SETOLE:
8213 case ISD::SETLE:
8214 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8215 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8216 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8217 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: TV, N3: FV);
8218 }
8219
8220 SDValue Cmp;
8221 switch (CC) {
8222 default: break; // SETUO etc aren't handled by fsel.
8223 case ISD::SETNE:
8224 std::swap(a&: TV, b&: FV);
8225 [[fallthrough]];
8226 case ISD::SETEQ:
8227 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8228 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8229 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8230 Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8231 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8233 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8234 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: Cmp), N2: Sel1, N3: FV);
8235 case ISD::SETULT:
8236 case ISD::SETLT:
8237 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8238 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8239 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8240 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8241 case ISD::SETOGE:
8242 case ISD::SETGE:
8243 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8244 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8245 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8246 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8247 case ISD::SETUGT:
8248 case ISD::SETGT:
8249 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8250 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8251 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8252 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8253 case ISD::SETOLE:
8254 case ISD::SETLE:
8255 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8256 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8257 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8258 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8259 }
8260 return Op;
8261}
8262
8263static unsigned getPPCStrictOpcode(unsigned Opc) {
8264 switch (Opc) {
8265 default:
8266 llvm_unreachable("No strict version of this opcode!");
8267 case PPCISD::FCTIDZ:
8268 return PPCISD::STRICT_FCTIDZ;
8269 case PPCISD::FCTIWZ:
8270 return PPCISD::STRICT_FCTIWZ;
8271 case PPCISD::FCTIDUZ:
8272 return PPCISD::STRICT_FCTIDUZ;
8273 case PPCISD::FCTIWUZ:
8274 return PPCISD::STRICT_FCTIWUZ;
8275 case PPCISD::FCFID:
8276 return PPCISD::STRICT_FCFID;
8277 case PPCISD::FCFIDU:
8278 return PPCISD::STRICT_FCFIDU;
8279 case PPCISD::FCFIDS:
8280 return PPCISD::STRICT_FCFIDS;
8281 case PPCISD::FCFIDUS:
8282 return PPCISD::STRICT_FCFIDUS;
8283 }
8284}
8285
8286static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8287 const PPCSubtarget &Subtarget) {
8288 SDLoc dl(Op);
8289 bool IsStrict = Op->isStrictFPOpcode();
8290 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8291 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8292
8293 // TODO: Any other flags to propagate?
8294 SDNodeFlags Flags;
8295 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8296
8297 // For strict nodes, source is the second operand.
8298 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8299 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : SDValue();
8300 MVT DestTy = Op.getSimpleValueType();
8301 assert(Src.getValueType().isFloatingPoint() &&
8302 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8303 DestTy == MVT::i64) &&
8304 "Invalid FP_TO_INT types");
8305 if (Src.getValueType() == MVT::f32) {
8306 if (IsStrict) {
8307 Src =
8308 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL: dl,
8309 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8310 Chain = Src.getValue(R: 1);
8311 } else
8312 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
8313 }
8314 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8315 DestTy = Subtarget.getScalarIntVT();
8316 unsigned Opc = ISD::DELETED_NODE;
8317 switch (DestTy.SimpleTy) {
8318 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8319 case MVT::i32:
8320 Opc = IsSigned ? PPCISD::FCTIWZ
8321 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8322 break;
8323 case MVT::i64:
8324 assert((IsSigned || Subtarget.hasFPCVT()) &&
8325 "i64 FP_TO_UINT is supported only with FPCVT");
8326 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8327 }
8328 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8329 SDValue Conv;
8330 if (IsStrict) {
8331 Opc = getPPCStrictOpcode(Opc);
8332 Conv = DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src},
8333 Flags);
8334 } else {
8335 Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: ConvTy, Operand: Src);
8336 }
8337 return Conv;
8338}
8339
8340void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8341 SelectionDAG &DAG,
8342 const SDLoc &dl) const {
8343 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8344 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8345 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8346 bool IsStrict = Op->isStrictFPOpcode();
8347
8348 // Convert the FP value to an int value through memory.
8349 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8350 (IsSigned || Subtarget.hasFPCVT());
8351 SDValue FIPtr = DAG.CreateStackTemporary(VT: i32Stack ? MVT::i32 : MVT::f64);
8352 int FI = cast<FrameIndexSDNode>(Val&: FIPtr)->getIndex();
8353 MachinePointerInfo MPI =
8354 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
8355
8356 // Emit a store to the stack slot.
8357 SDValue Chain = IsStrict ? Tmp.getValue(R: 1) : DAG.getEntryNode();
8358 Align Alignment(DAG.getEVTAlign(MemoryVT: Tmp.getValueType()));
8359 if (i32Stack) {
8360 MachineFunction &MF = DAG.getMachineFunction();
8361 Alignment = Align(4);
8362 MachineMemOperand *MMO =
8363 MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Alignment);
8364 SDValue Ops[] = { Chain, Tmp, FIPtr };
8365 Chain = DAG.getMemIntrinsicNode(Opcode: PPCISD::STFIWX, dl,
8366 VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT: MVT::i32, MMO);
8367 } else
8368 Chain = DAG.getStore(Chain, dl, Val: Tmp, Ptr: FIPtr, PtrInfo: MPI, Alignment);
8369
8370 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8371 // add in a bias on big endian.
8372 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8373 !Subtarget.isLittleEndian()) {
8374 FIPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: FIPtr.getValueType(), N1: FIPtr,
8375 N2: DAG.getConstant(Val: 4, DL: dl, VT: FIPtr.getValueType()));
8376 MPI = MPI.getWithOffset(O: 4);
8377 }
8378
8379 RLI.Chain = Chain;
8380 RLI.Ptr = FIPtr;
8381 RLI.MPI = MPI;
8382 RLI.Alignment = Alignment;
8383}
8384
8385/// Custom lowers floating point to integer conversions to use
8386/// the direct move instructions available in ISA 2.07 to avoid the
8387/// need for load/store combinations.
8388SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8389 SelectionDAG &DAG,
8390 const SDLoc &dl) const {
8391 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8392 SDValue Mov = DAG.getNode(Opcode: PPCISD::MFVSR, DL: dl, VT: Op.getValueType(), Operand: Conv);
8393 if (Op->isStrictFPOpcode())
8394 return DAG.getMergeValues(Ops: {Mov, Conv.getValue(R: 1)}, dl);
8395 else
8396 return Mov;
8397}
8398
8399SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8400 const SDLoc &dl) const {
8401 bool IsStrict = Op->isStrictFPOpcode();
8402 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8403 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8404 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8405 EVT SrcVT = Src.getValueType();
8406 EVT DstVT = Op.getValueType();
8407
8408 // FP to INT conversions are legal for f128.
8409 if (SrcVT == MVT::f128)
8410 return Subtarget.hasP9Vector() ? Op : SDValue();
8411
8412 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8413 // PPC (the libcall is not available).
8414 if (SrcVT == MVT::ppcf128) {
8415 if (DstVT == MVT::i32) {
8416 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8417 // set other fast-math flags to FP operations in both strict and
8418 // non-strict cases. (FP_TO_SINT, FSUB)
8419 SDNodeFlags Flags;
8420 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8421
8422 if (IsSigned) {
8423 SDValue Lo, Hi;
8424 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: dl, LoVT: MVT::f64, HiVT: MVT::f64);
8425
8426 // Add the two halves of the long double in round-to-zero mode, and use
8427 // a smaller FP_TO_SINT.
8428 if (IsStrict) {
8429 SDValue Res = DAG.getNode(Opcode: PPCISD::STRICT_FADDRTZ, DL: dl,
8430 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8431 Ops: {Op.getOperand(i: 0), Lo, Hi}, Flags);
8432 return DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8433 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8434 Ops: {Res.getValue(R: 1), Res}, Flags);
8435 } else {
8436 SDValue Res = DAG.getNode(Opcode: PPCISD::FADDRTZ, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
8437 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Res);
8438 }
8439 } else {
8440 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8441 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8442 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8443 SDValue SignMask = DAG.getConstant(Val: 0x80000000, DL: dl, VT: DstVT);
8444 if (IsStrict) {
8445 // Sel = Src < 0x80000000
8446 // FltOfs = select Sel, 0.0, 0x80000000
8447 // IntOfs = select Sel, 0, 0x80000000
8448 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8449 SDValue Chain = Op.getOperand(i: 0);
8450 EVT SetCCVT =
8451 getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: SrcVT);
8452 EVT DstSetCCVT =
8453 getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: DstVT);
8454 SDValue Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8455 Chain, IsSignaling: true);
8456 Chain = Sel.getValue(R: 1);
8457
8458 SDValue FltOfs = DAG.getSelect(
8459 DL: dl, VT: SrcVT, Cond: Sel, LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8460 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8461
8462 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl,
8463 VTList: DAG.getVTList(VT1: SrcVT, VT2: MVT::Other),
8464 Ops: {Chain, Src, FltOfs}, Flags);
8465 Chain = Val.getValue(R: 1);
8466 SDValue SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8467 VTList: DAG.getVTList(VT1: DstVT, VT2: MVT::Other),
8468 Ops: {Chain, Val}, Flags);
8469 Chain = SInt.getValue(R: 1);
8470 SDValue IntOfs = DAG.getSelect(
8471 DL: dl, VT: DstVT, Cond: Sel, LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), RHS: SignMask);
8472 SDValue Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8473 return DAG.getMergeValues(Ops: {Result, Chain}, dl);
8474 } else {
8475 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8476 // FIXME: generated code sucks.
8477 SDValue True = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::ppcf128, N1: Src, N2: Cst);
8478 True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: True);
8479 True = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: True, N2: SignMask);
8480 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Src);
8481 return DAG.getSelectCC(DL: dl, LHS: Src, RHS: Cst, True, False, Cond: ISD::SETGE);
8482 }
8483 }
8484 }
8485
8486 return SDValue();
8487 }
8488
8489 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8490 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8491
8492 ReuseLoadInfo RLI;
8493 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8494
8495 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8496 Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8497}
8498
8499// We're trying to insert a regular store, S, and then a load, L. If the
8500// incoming value, O, is a load, we might just be able to have our load use the
8501// address used by O. However, we don't know if anything else will store to
8502// that address before we can load from it. To prevent this situation, we need
8503// to insert our load, L, into the chain as a peer of O. To do this, we give L
8504// the same chain operand as O, we create a token factor from the chain results
8505// of O and L, and we replace all uses of O's chain result with that token
8506// factor (this last part is handled by makeEquivalentMemoryOrdering).
8507bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8508 ReuseLoadInfo &RLI,
8509 SelectionDAG &DAG,
8510 ISD::LoadExtType ET) const {
8511 // Conservatively skip reusing for constrained FP nodes.
8512 if (Op->isStrictFPOpcode())
8513 return false;
8514
8515 SDLoc dl(Op);
8516 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8517 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8518 if (ET == ISD::NON_EXTLOAD &&
8519 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8520 isOperationLegalOrCustom(Op: Op.getOpcode(),
8521 VT: Op.getOperand(i: 0).getValueType())) {
8522
8523 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8524 return true;
8525 }
8526
8527 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op);
8528 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8529 LD->isNonTemporal())
8530 return false;
8531 if (LD->getMemoryVT() != MemVT)
8532 return false;
8533
8534 // If the result of the load is an illegal type, then we can't build a
8535 // valid chain for reuse since the legalised loads and token factor node that
8536 // ties the legalised loads together uses a different output chain then the
8537 // illegal load.
8538 if (!isTypeLegal(VT: LD->getValueType(ResNo: 0)))
8539 return false;
8540
8541 RLI.Ptr = LD->getBasePtr();
8542 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8543 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8544 "Non-pre-inc AM on PPC?");
8545 RLI.Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RLI.Ptr.getValueType(), N1: RLI.Ptr,
8546 N2: LD->getOffset());
8547 }
8548
8549 RLI.Chain = LD->getChain();
8550 RLI.MPI = LD->getPointerInfo();
8551 RLI.IsDereferenceable = LD->isDereferenceable();
8552 RLI.IsInvariant = LD->isInvariant();
8553 RLI.Alignment = LD->getAlign();
8554 RLI.AAInfo = LD->getAAInfo();
8555 RLI.Ranges = LD->getRanges();
8556
8557 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8558 return true;
8559}
8560
8561/// Analyze profitability of direct move
8562/// prefer float load to int load plus direct move
8563/// when there is no integer use of int load
8564bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8565 SDNode *Origin = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0).getNode();
8566 if (Origin->getOpcode() != ISD::LOAD)
8567 return true;
8568
8569 // If there is no LXSIBZX/LXSIHZX, like Power8,
8570 // prefer direct move if the memory size is 1 or 2 bytes.
8571 MachineMemOperand *MMO = cast<LoadSDNode>(Val: Origin)->getMemOperand();
8572 if (!Subtarget.hasP9Vector() &&
8573 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8574 return true;
8575
8576 for (SDUse &Use : Origin->uses()) {
8577
8578 // Only look at the users of the loaded value.
8579 if (Use.getResNo() != 0)
8580 continue;
8581
8582 SDNode *User = Use.getUser();
8583 if (User->getOpcode() != ISD::SINT_TO_FP &&
8584 User->getOpcode() != ISD::UINT_TO_FP &&
8585 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8586 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8587 return true;
8588 }
8589
8590 return false;
8591}
8592
8593static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8594 const PPCSubtarget &Subtarget,
8595 SDValue Chain = SDValue()) {
8596 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8597 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8598 SDLoc dl(Op);
8599
8600 // TODO: Any other flags to propagate?
8601 SDNodeFlags Flags;
8602 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8603
8604 // If we have FCFIDS, then use it when converting to single-precision.
8605 // Otherwise, convert to double-precision and then round.
8606 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8607 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8608 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8609 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8610 if (Op->isStrictFPOpcode()) {
8611 if (!Chain)
8612 Chain = Op.getOperand(i: 0);
8613 return DAG.getNode(Opcode: getPPCStrictOpcode(Opc: ConvOpc), DL: dl,
8614 VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8615 } else
8616 return DAG.getNode(Opcode: ConvOpc, DL: dl, VT: ConvTy, Operand: Src);
8617}
8618
8619/// Custom lowers integer to floating point conversions to use
8620/// the direct move instructions available in ISA 2.07 to avoid the
8621/// need for load/store combinations.
8622SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8623 SelectionDAG &DAG,
8624 const SDLoc &dl) const {
8625 assert((Op.getValueType() == MVT::f32 ||
8626 Op.getValueType() == MVT::f64) &&
8627 "Invalid floating point type as target of conversion");
8628 assert(Subtarget.hasFPCVT() &&
8629 "Int to FP conversions with direct moves require FPCVT");
8630 SDValue Src = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0);
8631 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8632 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8633 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8634 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8635 SDValue Mov = DAG.getNode(Opcode: MovOpc, DL: dl, VT: MVT::f64, Operand: Src);
8636 return convertIntToFP(Op, Src: Mov, DAG, Subtarget);
8637}
8638
8639static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8640
8641 EVT VecVT = Vec.getValueType();
8642 assert(VecVT.isVector() && "Expected a vector type.");
8643 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8644
8645 EVT EltVT = VecVT.getVectorElementType();
8646 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8647 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8648
8649 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8650 SmallVector<SDValue, 16> Ops(NumConcat);
8651 Ops[0] = Vec;
8652 SDValue UndefVec = DAG.getUNDEF(VT: VecVT);
8653 for (unsigned i = 1; i < NumConcat; ++i)
8654 Ops[i] = UndefVec;
8655
8656 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: WideVT, Ops);
8657}
8658
8659SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8660 const SDLoc &dl) const {
8661 bool IsStrict = Op->isStrictFPOpcode();
8662 unsigned Opc = Op.getOpcode();
8663 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8664 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8665 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8666 "Unexpected conversion type");
8667 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8668 "Supports conversions to v2f64/v4f32 only.");
8669
8670 // TODO: Any other flags to propagate?
8671 SDNodeFlags Flags;
8672 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8673
8674 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8675 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8676
8677 SDValue Wide = widenVec(DAG, Vec: Src, dl);
8678 EVT WideVT = Wide.getValueType();
8679 unsigned WideNumElts = WideVT.getVectorNumElements();
8680 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8681
8682 SmallVector<int, 16> ShuffV;
8683 for (unsigned i = 0; i < WideNumElts; ++i)
8684 ShuffV.push_back(Elt: i + WideNumElts);
8685
8686 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8687 int SaveElts = FourEltRes ? 4 : 2;
8688 if (Subtarget.isLittleEndian())
8689 for (int i = 0; i < SaveElts; i++)
8690 ShuffV[i * Stride] = i;
8691 else
8692 for (int i = 1; i <= SaveElts; i++)
8693 ShuffV[i * Stride - 1] = i - 1;
8694
8695 SDValue ShuffleSrc2 =
8696 SignedConv ? DAG.getUNDEF(VT: WideVT) : DAG.getConstant(Val: 0, DL: dl, VT: WideVT);
8697 SDValue Arrange = DAG.getVectorShuffle(VT: WideVT, dl, N1: Wide, N2: ShuffleSrc2, Mask: ShuffV);
8698
8699 SDValue Extend;
8700 if (SignedConv) {
8701 Arrange = DAG.getBitcast(VT: IntermediateVT, V: Arrange);
8702 EVT ExtVT = Src.getValueType();
8703 if (Subtarget.hasP9Altivec())
8704 ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT.getVectorElementType(),
8705 NumElements: IntermediateVT.getVectorNumElements());
8706
8707 Extend = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: IntermediateVT, N1: Arrange,
8708 N2: DAG.getValueType(ExtVT));
8709 } else
8710 Extend = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntermediateVT, Operand: Arrange);
8711
8712 if (IsStrict)
8713 return DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other),
8714 Ops: {Op.getOperand(i: 0), Extend}, Flags);
8715
8716 return DAG.getNode(Opcode: Opc, DL: dl, VT: Op.getValueType(), Operand: Extend);
8717}
8718
8719SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8720 SelectionDAG &DAG) const {
8721 SDLoc dl(Op);
8722 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8723 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724 bool IsStrict = Op->isStrictFPOpcode();
8725 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8726 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode();
8727
8728 // TODO: Any other flags to propagate?
8729 SDNodeFlags Flags;
8730 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8731
8732 EVT InVT = Src.getValueType();
8733 EVT OutVT = Op.getValueType();
8734 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8735 isOperationCustom(Op: Op.getOpcode(), VT: InVT))
8736 return LowerINT_TO_FPVector(Op, DAG, dl);
8737
8738 // Conversions to f128 are legal.
8739 if (Op.getValueType() == MVT::f128)
8740 return Subtarget.hasP9Vector() ? Op : SDValue();
8741
8742 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8743 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8744 return SDValue();
8745
8746 if (Src.getValueType() == MVT::i1) {
8747 SDValue Sel = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: Op.getValueType(), N1: Src,
8748 N2: DAG.getConstantFP(Val: 1.0, DL: dl, VT: Op.getValueType()),
8749 N3: DAG.getConstantFP(Val: 0.0, DL: dl, VT: Op.getValueType()));
8750 if (IsStrict)
8751 return DAG.getMergeValues(Ops: {Sel, Chain}, dl);
8752 else
8753 return Sel;
8754 }
8755
8756 // If we have direct moves, we can do all the conversion, skip the store/load
8757 // however, without FPCVT we can't do most conversions.
8758 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8759 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8760 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8761
8762 assert((IsSigned || Subtarget.hasFPCVT()) &&
8763 "UINT_TO_FP is supported only with FPCVT");
8764
8765 if (Src.getValueType() == MVT::i64) {
8766 SDValue SINT = Src;
8767 // When converting to single-precision, we actually need to convert
8768 // to double-precision first and then round to single-precision.
8769 // To avoid double-rounding effects during that operation, we have
8770 // to prepare the input operand. Bits that might be truncated when
8771 // converting to double-precision are replaced by a bit that won't
8772 // be lost at this stage, but is below the single-precision rounding
8773 // position.
8774 //
8775 // However, if afn is in effect, accept double
8776 // rounding to avoid the extra overhead.
8777 // FIXME: Currently INT_TO_FP can't support fast math flags because
8778 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8779 // false.
8780 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8781 !Op->getFlags().hasApproximateFuncs()) {
8782
8783 // Twiddle input to make sure the low 11 bits are zero. (If this
8784 // is the case, we are guaranteed the value will fit into the 53 bit
8785 // mantissa of an IEEE double-precision value without rounding.)
8786 // If any of those low 11 bits were not zero originally, make sure
8787 // bit 12 (value 2048) is set instead, so that the final rounding
8788 // to single-precision gets the correct result.
8789 SDValue Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64,
8790 N1: SINT, N2: DAG.getConstant(Val: 2047, DL: dl, VT: MVT::i64));
8791 Round = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8792 N1: Round, N2: DAG.getConstant(Val: 2047, DL: dl, VT: MVT::i64));
8793 Round = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i64, N1: Round, N2: SINT);
8794 Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64, N1: Round,
8795 N2: DAG.getSignedConstant(Val: -2048, DL: dl, VT: MVT::i64));
8796
8797 // However, we cannot use that value unconditionally: if the magnitude
8798 // of the input value is small, the bit-twiddling we did above might
8799 // end up visibly changing the output. Fortunately, in that case, we
8800 // don't need to twiddle bits since the original input will convert
8801 // exactly to double-precision floating-point already. Therefore,
8802 // construct a conditional to use the original value if the top 11
8803 // bits are all sign-bit copies, and use the rounded value computed
8804 // above otherwise.
8805 SDValue Cond = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: MVT::i64,
8806 N1: SINT, N2: DAG.getConstant(Val: 53, DL: dl, VT: MVT::i32));
8807 Cond = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8808 N1: Cond, N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i64));
8809 Cond = DAG.getSetCC(
8810 DL: dl,
8811 VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: MVT::i64),
8812 LHS: Cond, RHS: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i64), Cond: ISD::SETUGT);
8813
8814 SINT = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i64, N1: Cond, N2: Round, N3: SINT);
8815 }
8816
8817 ReuseLoadInfo RLI;
8818 SDValue Bits;
8819
8820 MachineFunction &MF = DAG.getMachineFunction();
8821 if (canReuseLoadAddress(Op: SINT, MemVT: MVT::i64, RLI, DAG)) {
8822 Bits = DAG.getLoad(VT: MVT::f64, dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8823 Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8824 if (RLI.ResChain)
8825 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8826 } else if (Subtarget.hasLFIWAX() &&
8827 canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::SEXTLOAD)) {
8828 MachineMemOperand *MMO =
8829 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8830 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8831 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8832 Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWAX, dl,
8833 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8834 Ops, MemVT: MVT::i32, MMO);
8835 if (RLI.ResChain)
8836 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8837 } else if (Subtarget.hasFPCVT() &&
8838 canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::ZEXTLOAD)) {
8839 MachineMemOperand *MMO =
8840 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8841 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8842 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8843 Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWZX, dl,
8844 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8845 Ops, MemVT: MVT::i32, MMO);
8846 if (RLI.ResChain)
8847 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8848 } else if (((Subtarget.hasLFIWAX() &&
8849 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8850 (Subtarget.hasFPCVT() &&
8851 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8852 SINT.getOperand(i: 0).getValueType() == MVT::i32) {
8853 MachineFrameInfo &MFI = MF.getFrameInfo();
8854 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8855
8856 int FrameIdx = MFI.CreateStackObject(Size: 4, Alignment: Align(4), isSpillSlot: false);
8857 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8858
8859 SDValue Store = DAG.getStore(Chain, dl, Val: SINT.getOperand(i: 0), Ptr: FIdx,
8860 PtrInfo: MachinePointerInfo::getFixedStack(
8861 MF&: DAG.getMachineFunction(), FI: FrameIdx));
8862 Chain = Store;
8863
8864 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8865 "Expected an i32 store");
8866
8867 RLI.Ptr = FIdx;
8868 RLI.Chain = Chain;
8869 RLI.MPI =
8870 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8871 RLI.Alignment = Align(4);
8872
8873 MachineMemOperand *MMO =
8874 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8875 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8876 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8877 Bits = DAG.getMemIntrinsicNode(Opcode: SINT.getOpcode() == ISD::ZERO_EXTEND ?
8878 PPCISD::LFIWZX : PPCISD::LFIWAX,
8879 dl, VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8880 Ops, MemVT: MVT::i32, MMO);
8881 Chain = Bits.getValue(R: 1);
8882 } else
8883 Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f64, Operand: SINT);
8884
8885 SDValue FP = convertIntToFP(Op, Src: Bits, DAG, Subtarget, Chain);
8886 if (IsStrict)
8887 Chain = FP.getValue(R: 1);
8888
8889 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8890 if (IsStrict)
8891 FP = DAG.getNode(
8892 Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8893 Ops: {Chain, FP, DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)},
8894 Flags);
8895 else
8896 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8897 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
8898 }
8899 return FP;
8900 }
8901
8902 assert(Src.getValueType() == MVT::i32 &&
8903 "Unhandled INT_TO_FP type in custom expander!");
8904 // Since we only generate this in 64-bit mode, we can take advantage of
8905 // 64-bit registers. In particular, sign extend the input value into the
8906 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8907 // then lfd it and fcfid it.
8908 MachineFunction &MF = DAG.getMachineFunction();
8909 MachineFrameInfo &MFI = MF.getFrameInfo();
8910 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8911
8912 SDValue Ld;
8913 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8914 ReuseLoadInfo RLI;
8915 bool ReusingLoad;
8916 if (!(ReusingLoad = canReuseLoadAddress(Op: Src, MemVT: MVT::i32, RLI, DAG))) {
8917 int FrameIdx = MFI.CreateStackObject(Size: 4, Alignment: Align(4), isSpillSlot: false);
8918 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8919
8920 SDValue Store = DAG.getStore(Chain, dl, Val: Src, Ptr: FIdx,
8921 PtrInfo: MachinePointerInfo::getFixedStack(
8922 MF&: DAG.getMachineFunction(), FI: FrameIdx));
8923 Chain = Store;
8924
8925 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8926 "Expected an i32 store");
8927
8928 RLI.Ptr = FIdx;
8929 RLI.Chain = Chain;
8930 RLI.MPI =
8931 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8932 RLI.Alignment = Align(4);
8933 }
8934
8935 MachineMemOperand *MMO =
8936 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8937 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8938 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8939 Ld = DAG.getMemIntrinsicNode(Opcode: IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8940 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops,
8941 MemVT: MVT::i32, MMO);
8942 Chain = Ld.getValue(R: 1);
8943 if (ReusingLoad && RLI.ResChain) {
8944 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Ld.getValue(R: 1));
8945 }
8946 } else {
8947 assert(Subtarget.isPPC64() &&
8948 "i32->FP without LFIWAX supported only on PPC64");
8949
8950 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
8951 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8952
8953 SDValue Ext64 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i64, Operand: Src);
8954
8955 // STD the extended value into the stack slot.
8956 SDValue Store = DAG.getStore(
8957 Chain, dl, Val: Ext64, Ptr: FIdx,
8958 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8959 Chain = Store;
8960
8961 // Load the value as a double.
8962 Ld = DAG.getLoad(
8963 VT: MVT::f64, dl, Chain, Ptr: FIdx,
8964 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8965 Chain = Ld.getValue(R: 1);
8966 }
8967
8968 // FCFID it and return it.
8969 SDValue FP = convertIntToFP(Op, Src: Ld, DAG, Subtarget, Chain);
8970 if (IsStrict)
8971 Chain = FP.getValue(R: 1);
8972 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973 if (IsStrict)
8974 FP = DAG.getNode(
8975 Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8976 Ops: {Chain, FP, DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)}, Flags);
8977 else
8978 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8979 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
8980 }
8981 return FP;
8982}
8983
8984SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8985 SelectionDAG &DAG) const {
8986 SDLoc Dl(Op);
8987 MachineFunction &MF = DAG.getMachineFunction();
8988 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8989 SDValue Chain = Op.getOperand(i: 0);
8990
8991 // If requested mode is constant, just use simpler mtfsb/mffscrni
8992 if (auto *CVal = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) {
8993 uint64_t Mode = CVal->getZExtValue();
8994 assert(Mode < 4 && "Unsupported rounding mode!");
8995 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8996 if (Subtarget.isISA3_0())
8997 return SDValue(
8998 DAG.getMachineNode(
8999 Opcode: PPC::MFFSCRNI, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9000 Ops: {DAG.getConstant(Val: InternalRnd, DL: Dl, VT: MVT::i32, isTarget: true), Chain}),
9001 1);
9002 SDNode *SetHi = DAG.getMachineNode(
9003 Opcode: (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9004 Ops: {DAG.getConstant(Val: 30, DL: Dl, VT: MVT::i32, isTarget: true), Chain});
9005 SDNode *SetLo = DAG.getMachineNode(
9006 Opcode: (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9007 Ops: {DAG.getConstant(Val: 31, DL: Dl, VT: MVT::i32, isTarget: true), SDValue(SetHi, 0)});
9008 return SDValue(SetLo, 0);
9009 }
9010
9011 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9012 SDValue One = DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32);
9013 SDValue SrcFlag = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: Op.getOperand(i: 1),
9014 N2: DAG.getConstant(Val: 3, DL: Dl, VT: MVT::i32));
9015 SDValue DstFlag = DAG.getNode(
9016 Opcode: ISD::XOR, DL: Dl, VT: MVT::i32, N1: SrcFlag,
9017 N2: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32,
9018 N1: DAG.getNOT(DL: Dl,
9019 Val: DAG.getNode(Opcode: ISD::SRL, DL: Dl, VT: MVT::i32, N1: SrcFlag, N2: One),
9020 VT: MVT::i32),
9021 N2: One));
9022 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9023 SDValue MFFS;
9024 if (!Subtarget.isISA3_0()) {
9025 MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: Dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9026 Chain = MFFS.getValue(R: 1);
9027 }
9028 SDValue NewFPSCR;
9029 if (Subtarget.isPPC64()) {
9030 if (Subtarget.isISA3_0()) {
9031 NewFPSCR = DAG.getAnyExtOrTrunc(Op: DstFlag, DL: Dl, VT: MVT::i64);
9032 } else {
9033 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9034 SDNode *InsertRN = DAG.getMachineNode(
9035 Opcode: PPC::RLDIMI, dl: Dl, VT: MVT::i64,
9036 Ops: {DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::i64, Operand: MFFS),
9037 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: Dl, VT: MVT::i64, Operand: DstFlag),
9038 DAG.getTargetConstant(Val: 0, DL: Dl, VT: MVT::i32),
9039 DAG.getTargetConstant(Val: 62, DL: Dl, VT: MVT::i32)});
9040 NewFPSCR = SDValue(InsertRN, 0);
9041 }
9042 NewFPSCR = DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::f64, Operand: NewFPSCR);
9043 } else {
9044 // In 32-bit mode, store f64, load and update the lower half.
9045 int SSFI = MF.getFrameInfo().CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9046 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9047 SDValue Addr = Subtarget.isLittleEndian()
9048 ? StackSlot
9049 : DAG.getNode(Opcode: ISD::ADD, DL: Dl, VT: PtrVT, N1: StackSlot,
9050 N2: DAG.getConstant(Val: 4, DL: Dl, VT: PtrVT));
9051 if (Subtarget.isISA3_0()) {
9052 Chain = DAG.getStore(Chain, dl: Dl, Val: DstFlag, Ptr: Addr, PtrInfo: MachinePointerInfo());
9053 } else {
9054 Chain = DAG.getStore(Chain, dl: Dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9055 SDValue Tmp =
9056 DAG.getLoad(VT: MVT::i32, dl: Dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo());
9057 Chain = Tmp.getValue(R: 1);
9058 Tmp = SDValue(DAG.getMachineNode(
9059 Opcode: PPC::RLWIMI, dl: Dl, VT: MVT::i32,
9060 Ops: {Tmp, DstFlag, DAG.getTargetConstant(Val: 0, DL: Dl, VT: MVT::i32),
9061 DAG.getTargetConstant(Val: 30, DL: Dl, VT: MVT::i32),
9062 DAG.getTargetConstant(Val: 31, DL: Dl, VT: MVT::i32)}),
9063 0);
9064 Chain = DAG.getStore(Chain, dl: Dl, Val: Tmp, Ptr: Addr, PtrInfo: MachinePointerInfo());
9065 }
9066 NewFPSCR =
9067 DAG.getLoad(VT: MVT::f64, dl: Dl, Chain, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9068 Chain = NewFPSCR.getValue(R: 1);
9069 }
9070 if (Subtarget.isISA3_0())
9071 return SDValue(DAG.getMachineNode(Opcode: PPC::MFFSCRN, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9072 Ops: {NewFPSCR, Chain}),
9073 1);
9074 SDValue Zero = DAG.getConstant(Val: 0, DL: Dl, VT: MVT::i32, isTarget: true);
9075 SDNode *MTFSF = DAG.getMachineNode(
9076 Opcode: PPC::MTFSF, dl: Dl, VT: MVT::Other,
9077 Ops: {DAG.getConstant(Val: 255, DL: Dl, VT: MVT::i32, isTarget: true), NewFPSCR, Zero, Zero, Chain});
9078 return SDValue(MTFSF, 0);
9079}
9080
9081SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9082 SelectionDAG &DAG) const {
9083 SDLoc dl(Op);
9084 /*
9085 The rounding mode is in bits 30:31 of FPSR, and has the following
9086 settings:
9087 00 Round to nearest
9088 01 Round to 0
9089 10 Round to +inf
9090 11 Round to -inf
9091
9092 GET_ROUNDING, on the other hand, expects the following:
9093 -1 Undefined
9094 0 Round to 0
9095 1 Round to nearest
9096 2 Round to +inf
9097 3 Round to -inf
9098
9099 To perform the conversion, we do:
9100 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9101 */
9102
9103 MachineFunction &MF = DAG.getMachineFunction();
9104 EVT VT = Op.getValueType();
9105 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9106
9107 // Save FP Control Word to register
9108 SDValue Chain = Op.getOperand(i: 0);
9109 SDValue MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9110 Chain = MFFS.getValue(R: 1);
9111
9112 SDValue CWD;
9113 if (isTypeLegal(VT: MVT::i64)) {
9114 CWD = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32,
9115 Operand: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: MFFS));
9116 } else {
9117 // Save FP register to stack slot
9118 int SSFI = MF.getFrameInfo().CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9119 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9120 Chain = DAG.getStore(Chain, dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9121
9122 // Load FP Control Word from low 32 bits of stack slot.
9123 assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9124 "Stack slot adjustment is valid only on big endian subtargets!");
9125 SDValue Four = DAG.getConstant(Val: 4, DL: dl, VT: PtrVT);
9126 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackSlot, N2: Four);
9127 CWD = DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo());
9128 Chain = CWD.getValue(R: 1);
9129 }
9130
9131 // Transform as necessary
9132 SDValue CWD1 =
9133 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9134 N1: CWD, N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32));
9135 SDValue CWD2 =
9136 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
9137 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9138 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32,
9139 N1: CWD, N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)),
9140 N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)),
9141 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
9142
9143 SDValue RetVal =
9144 DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1, N2: CWD2);
9145
9146 RetVal =
9147 DAG.getNode(Opcode: (VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9148 DL: dl, VT, Operand: RetVal);
9149
9150 return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
9151}
9152
9153SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9154 EVT VT = Op.getValueType();
9155 uint64_t BitWidth = VT.getSizeInBits();
9156 SDLoc dl(Op);
9157 assert(Op.getNumOperands() == 3 &&
9158 VT == Op.getOperand(1).getValueType() &&
9159 "Unexpected SHL!");
9160
9161 // Expand into a bunch of logical ops. Note that these ops
9162 // depend on the PPC behavior for oversized shift amounts.
9163 SDValue Lo = Op.getOperand(i: 0);
9164 SDValue Hi = Op.getOperand(i: 1);
9165 SDValue Amt = Op.getOperand(i: 2);
9166 EVT AmtVT = Amt.getValueType();
9167
9168 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9169 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9170 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Amt);
9171 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Tmp1);
9172 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR , DL: dl, VT, N1: Tmp2, N2: Tmp3);
9173 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9174 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9175 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Tmp5);
9176 SDValue OutHi = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9177 SDValue OutLo = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Amt);
9178 SDValue OutOps[] = { OutLo, OutHi };
9179 return DAG.getMergeValues(Ops: OutOps, dl);
9180}
9181
9182SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9183 EVT VT = Op.getValueType();
9184 SDLoc dl(Op);
9185 uint64_t BitWidth = VT.getSizeInBits();
9186 assert(Op.getNumOperands() == 3 &&
9187 VT == Op.getOperand(1).getValueType() &&
9188 "Unexpected SRL!");
9189
9190 // Expand into a bunch of logical ops. Note that these ops
9191 // depend on the PPC behavior for oversized shift amounts.
9192 SDValue Lo = Op.getOperand(i: 0);
9193 SDValue Hi = Op.getOperand(i: 1);
9194 SDValue Amt = Op.getOperand(i: 2);
9195 EVT AmtVT = Amt.getValueType();
9196
9197 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9198 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9199 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9200 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9201 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9202 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9203 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9204 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Tmp5);
9205 SDValue OutLo = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9206 SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Amt);
9207 SDValue OutOps[] = { OutLo, OutHi };
9208 return DAG.getMergeValues(Ops: OutOps, dl);
9209}
9210
9211SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9212 SDLoc dl(Op);
9213 EVT VT = Op.getValueType();
9214 uint64_t BitWidth = VT.getSizeInBits();
9215 assert(Op.getNumOperands() == 3 &&
9216 VT == Op.getOperand(1).getValueType() &&
9217 "Unexpected SRA!");
9218
9219 // Expand into a bunch of logical ops, followed by a select_cc.
9220 SDValue Lo = Op.getOperand(i: 0);
9221 SDValue Hi = Op.getOperand(i: 1);
9222 SDValue Amt = Op.getOperand(i: 2);
9223 EVT AmtVT = Amt.getValueType();
9224
9225 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9226 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9227 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9228 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9229 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9230 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9231 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9232 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Tmp5);
9233 SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Amt);
9234 SDValue OutLo = DAG.getSelectCC(DL: dl, LHS: Tmp5, RHS: DAG.getConstant(Val: 0, DL: dl, VT: AmtVT),
9235 True: Tmp4, False: Tmp6, Cond: ISD::SETLE);
9236 SDValue OutOps[] = { OutLo, OutHi };
9237 return DAG.getMergeValues(Ops: OutOps, dl);
9238}
9239
9240SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9241 SelectionDAG &DAG) const {
9242 SDLoc dl(Op);
9243 EVT VT = Op.getValueType();
9244 unsigned BitWidth = VT.getSizeInBits();
9245
9246 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9247 SDValue X = Op.getOperand(i: 0);
9248 SDValue Y = Op.getOperand(i: 1);
9249 SDValue Z = Op.getOperand(i: 2);
9250 EVT AmtVT = Z.getValueType();
9251
9252 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9253 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9254 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9255 // on PowerPC shift by BW being well defined.
9256 Z = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Z,
9257 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT: AmtVT));
9258 SDValue SubZ =
9259 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT, N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Z);
9260 X = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: X, N2: IsFSHL ? Z : SubZ);
9261 Y = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Y, N2: IsFSHL ? SubZ : Z);
9262 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: X, N2: Y);
9263}
9264
9265//===----------------------------------------------------------------------===//
9266// Vector related lowering.
9267//
9268
9269/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9270/// element size of SplatSize. Cast the result to VT.
9271static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9272 SelectionDAG &DAG, const SDLoc &dl) {
9273 static const MVT VTys[] = { // canonical VT to use for each size.
9274 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9275 };
9276
9277 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9278
9279 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9280 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9281 SplatSize = 1;
9282 Val = 0xFF;
9283 }
9284
9285 EVT CanonicalVT = VTys[SplatSize-1];
9286
9287 // Build a canonical splat for this value.
9288 // Explicitly truncate APInt here, as this API is used with a mix of
9289 // signed and unsigned values.
9290 return DAG.getBitcast(
9291 VT: ReqVT,
9292 V: DAG.getConstant(Val: APInt(64, Val).trunc(width: SplatSize * 8), DL: dl, VT: CanonicalVT));
9293}
9294
9295/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9296/// specified intrinsic ID.
9297static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9298 const SDLoc &dl, EVT DestVT = MVT::Other) {
9299 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9300 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9301 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op);
9302}
9303
9304/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9305/// specified intrinsic ID.
9306static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9307 SelectionDAG &DAG, const SDLoc &dl,
9308 EVT DestVT = MVT::Other) {
9309 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9310 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9311 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: LHS, N3: RHS);
9312}
9313
9314/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9315/// specified intrinsic ID.
9316static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9317 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9318 EVT DestVT = MVT::Other) {
9319 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9320 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9321 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op0, N3: Op1, N4: Op2);
9322}
9323
9324/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9325/// amount. The result has the specified value type.
9326static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9327 SelectionDAG &DAG, const SDLoc &dl) {
9328 // Force LHS/RHS to be the right type.
9329 LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: LHS);
9330 RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: RHS);
9331
9332 int Ops[16];
9333 for (unsigned i = 0; i != 16; ++i)
9334 Ops[i] = i + Amt;
9335 SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: LHS, N2: RHS, Mask: Ops);
9336 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9337}
9338
9339/// Do we have an efficient pattern in a .td file for this node?
9340///
9341/// \param V - pointer to the BuildVectorSDNode being matched
9342/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9343///
9344/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9345/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9346/// the opposite is true (expansion is beneficial) are:
9347/// - The node builds a vector out of integers that are not 32 or 64-bits
9348/// - The node builds a vector out of constants
9349/// - The node is a "load-and-splat"
9350/// In all other cases, we will choose to keep the BUILD_VECTOR.
9351static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9352 bool HasDirectMove,
9353 bool HasP8Vector) {
9354 EVT VecVT = V->getValueType(ResNo: 0);
9355 bool RightType = VecVT == MVT::v2f64 ||
9356 (HasP8Vector && VecVT == MVT::v4f32) ||
9357 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9358 if (!RightType)
9359 return false;
9360
9361 bool IsSplat = true;
9362 bool IsLoad = false;
9363 SDValue Op0 = V->getOperand(Num: 0);
9364
9365 // This function is called in a block that confirms the node is not a constant
9366 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9367 // different constants.
9368 if (V->isConstant())
9369 return false;
9370 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9371 if (V->getOperand(Num: i).isUndef())
9372 return false;
9373 // We want to expand nodes that represent load-and-splat even if the
9374 // loaded value is a floating point truncation or conversion to int.
9375 if (V->getOperand(Num: i).getOpcode() == ISD::LOAD ||
9376 (V->getOperand(Num: i).getOpcode() == ISD::FP_ROUND &&
9377 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD) ||
9378 (V->getOperand(Num: i).getOpcode() == ISD::FP_TO_SINT &&
9379 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD) ||
9380 (V->getOperand(Num: i).getOpcode() == ISD::FP_TO_UINT &&
9381 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD))
9382 IsLoad = true;
9383 // If the operands are different or the input is not a load and has more
9384 // uses than just this BV node, then it isn't a splat.
9385 if (V->getOperand(Num: i) != Op0 ||
9386 (!IsLoad && !V->isOnlyUserOf(N: V->getOperand(Num: i).getNode())))
9387 IsSplat = false;
9388 }
9389 return !(IsSplat && IsLoad);
9390}
9391
9392// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9393SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9394
9395 SDLoc dl(Op);
9396 SDValue Op0 = Op->getOperand(Num: 0);
9397
9398 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9399 (Op.getValueType() != MVT::f128))
9400 return SDValue();
9401
9402 SDValue Lo = Op0.getOperand(i: 0);
9403 SDValue Hi = Op0.getOperand(i: 1);
9404 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9405 return SDValue();
9406
9407 if (!Subtarget.isLittleEndian())
9408 std::swap(a&: Lo, b&: Hi);
9409
9410 return DAG.getNode(Opcode: PPCISD::BUILD_FP128, DL: dl, VT: MVT::f128, N1: Lo, N2: Hi);
9411}
9412
9413static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9414 const SDValue *InputLoad = &Op;
9415 while (InputLoad->getOpcode() == ISD::BITCAST)
9416 InputLoad = &InputLoad->getOperand(i: 0);
9417 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9418 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9419 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9420 InputLoad = &InputLoad->getOperand(i: 0);
9421 }
9422 if (InputLoad->getOpcode() != ISD::LOAD)
9423 return nullptr;
9424 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
9425 return ISD::isNormalLoad(N: LD) ? InputLoad : nullptr;
9426}
9427
9428// Convert the argument APFloat to a single precision APFloat if there is no
9429// loss in information during the conversion to single precision APFloat and the
9430// resulting number is not a denormal number. Return true if successful.
9431bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9432 APFloat APFloatToConvert = ArgAPFloat;
9433 bool LosesInfo = true;
9434 APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9435 losesInfo: &LosesInfo);
9436 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9437 if (Success)
9438 ArgAPFloat = APFloatToConvert;
9439 return Success;
9440}
9441
9442// Bitcast the argument APInt to a double and convert it to a single precision
9443// APFloat, bitcast the APFloat to an APInt and assign it to the original
9444// argument if there is no loss in information during the conversion from
9445// double to single precision APFloat and the resulting number is not a denormal
9446// number. Return true if successful.
9447bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9448 double DpValue = ArgAPInt.bitsToDouble();
9449 APFloat APFloatDp(DpValue);
9450 bool Success = convertToNonDenormSingle(ArgAPFloat&: APFloatDp);
9451 if (Success)
9452 ArgAPInt = APFloatDp.bitcastToAPInt();
9453 return Success;
9454}
9455
9456// Nondestructive check for convertTonNonDenormSingle.
9457bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9458 // Only convert if it loses info, since XXSPLTIDP should
9459 // handle the other case.
9460 APFloat APFloatToConvert = ArgAPFloat;
9461 bool LosesInfo = true;
9462 APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9463 losesInfo: &LosesInfo);
9464
9465 return (!LosesInfo && !APFloatToConvert.isDenormal());
9466}
9467
9468static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9469 unsigned &Opcode) {
9470 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Val: Op.getOperand(i: 0));
9471 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(N: InputNode))
9472 return false;
9473
9474 EVT Ty = Op->getValueType(ResNo: 0);
9475 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9476 // as we cannot handle extending loads for these types.
9477 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9478 ISD::isNON_EXTLoad(N: InputNode))
9479 return true;
9480
9481 EVT MemVT = InputNode->getMemoryVT();
9482 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9483 // memory VT is the same vector element VT type.
9484 // The loads feeding into the v8i16 and v16i8 types will be extending because
9485 // scalar i8/i16 are not legal types.
9486 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(N: InputNode) &&
9487 (MemVT == Ty.getVectorElementType()))
9488 return true;
9489
9490 if (Ty == MVT::v2i64) {
9491 // Check the extend type, when the input type is i32, and the output vector
9492 // type is v2i64.
9493 if (MemVT == MVT::i32) {
9494 if (ISD::isZEXTLoad(N: InputNode))
9495 Opcode = PPCISD::ZEXT_LD_SPLAT;
9496 if (ISD::isSEXTLoad(N: InputNode))
9497 Opcode = PPCISD::SEXT_LD_SPLAT;
9498 }
9499 return true;
9500 }
9501 return false;
9502}
9503
9504bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
9505 bool IsLittleEndian) {
9506 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9507
9508 BitMask.clearAllBits();
9509 EVT VT = BVN.getValueType(ResNo: 0);
9510 unsigned VTSize = VT.getSizeInBits();
9511 APInt ConstValue(VTSize, 0);
9512
9513 unsigned EltWidth = VT.getScalarSizeInBits();
9514
9515 unsigned BitPos = 0;
9516 for (auto OpVal : BVN.op_values()) {
9517 auto *CN = dyn_cast<ConstantSDNode>(Val&: OpVal);
9518
9519 if (!CN)
9520 return false;
9521 // The elements in a vector register are ordered in reverse byte order
9522 // between little-endian and big-endian modes.
9523 ConstValue.insertBits(SubBits: CN->getAPIntValue().zextOrTrunc(width: EltWidth),
9524 bitPosition: IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9525 BitPos += EltWidth;
9526 }
9527
9528 for (unsigned J = 0; J < 16; ++J) {
9529 APInt ExtractValue = ConstValue.extractBits(numBits: 8, bitPosition: J * 8);
9530 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9531 return false;
9532 if (ExtractValue == 0xFF)
9533 BitMask.setBit(J);
9534 }
9535 return true;
9536}
9537
9538// If this is a case we can't handle, return null and let the default
9539// expansion code take care of it. If we CAN select this case, and if it
9540// selects to a single instruction, return Op. Otherwise, if we can codegen
9541// this case more efficiently than a constant pool load, lower it to the
9542// sequence of ops that should be used.
9543SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9544 SelectionDAG &DAG) const {
9545 SDLoc dl(Op);
9546 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: Op.getNode());
9547 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9548
9549 if (Subtarget.hasP10Vector()) {
9550 APInt BitMask(32, 0);
9551 // If the value of the vector is all zeros or all ones,
9552 // we do not convert it to MTVSRBMI.
9553 // The xxleqv instruction sets a vector with all ones.
9554 // The xxlxor instruction sets a vector with all zeros.
9555 if (isValidMtVsrBmi(BitMask, BVN&: *BVN, IsLittleEndian: Subtarget.isLittleEndian()) &&
9556 BitMask != 0 && BitMask != 0xffff) {
9557 SDValue SDConstant = DAG.getTargetConstant(Val: BitMask, DL: dl, VT: MVT::i32);
9558 MachineSDNode *MSDNode =
9559 DAG.getMachineNode(Opcode: PPC::MTVSRBMI, dl, VT: MVT::v16i8, Op1: SDConstant);
9560 SDValue SDV = SDValue(MSDNode, 0);
9561 EVT DVT = BVN->getValueType(ResNo: 0);
9562 EVT SVT = SDV.getValueType();
9563 if (SVT != DVT) {
9564 SDV = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: DVT, Operand: SDV);
9565 }
9566 return SDV;
9567 }
9568 // Recognize build vector patterns to emit VSX vector instructions
9569 // instead of loading value from memory.
9570 if (SDValue VecPat = combineBVLoadsSpecialValue(Operand: Op, DAG))
9571 return VecPat;
9572 }
9573 // Check if this is a splat of a constant value.
9574 APInt APSplatBits, APSplatUndef;
9575 unsigned SplatBitSize;
9576 bool HasAnyUndefs;
9577 bool BVNIsConstantSplat =
9578 BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
9579 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian());
9580
9581 // If it is a splat of a double, check if we can shrink it to a 32 bit
9582 // non-denormal float which when converted back to double gives us the same
9583 // double. This is to exploit the XXSPLTIDP instruction.
9584 // If we lose precision, we use XXSPLTI32DX.
9585 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9586 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9587 // Check the type first to short-circuit so we don't modify APSplatBits if
9588 // this block isn't executed.
9589 if ((Op->getValueType(ResNo: 0) == MVT::v2f64) &&
9590 convertToNonDenormSingle(ArgAPInt&: APSplatBits)) {
9591 SDValue SplatNode = DAG.getNode(
9592 Opcode: PPCISD::XXSPLTI_SP_TO_DP, DL: dl, VT: MVT::v2f64,
9593 Operand: DAG.getTargetConstant(Val: APSplatBits.getZExtValue(), DL: dl, VT: MVT::i32));
9594 return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9595 } else {
9596 // We may lose precision, so we have to use XXSPLTI32DX.
9597
9598 uint32_t Hi = Hi_32(Value: APSplatBits.getZExtValue());
9599 uint32_t Lo = Lo_32(Value: APSplatBits.getZExtValue());
9600 SDValue SplatNode = DAG.getUNDEF(VT: MVT::v2i64);
9601
9602 if (!Hi || !Lo)
9603 // If either load is 0, then we should generate XXLXOR to set to 0.
9604 SplatNode = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::v2i64);
9605
9606 if (Hi)
9607 SplatNode = DAG.getNode(
9608 Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9609 N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
9610 N3: DAG.getTargetConstant(Val: Hi, DL: dl, VT: MVT::i32));
9611
9612 if (Lo)
9613 SplatNode =
9614 DAG.getNode(Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9615 N2: DAG.getTargetConstant(Val: 1, DL: dl, VT: MVT::i32),
9616 N3: DAG.getTargetConstant(Val: Lo, DL: dl, VT: MVT::i32));
9617
9618 return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9619 }
9620 }
9621
9622 bool IsSplat64 = false;
9623 uint64_t SplatBits = 0;
9624 int32_t SextVal = 0;
9625 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9626 SplatBits = APSplatBits.getZExtValue();
9627 if (SplatBitSize <= 32) {
9628 SextVal = SignExtend32(X: SplatBits, B: SplatBitSize);
9629 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9630 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9631 bool P9Vector = Subtarget.hasP9Vector();
9632 int32_t Hi = P9Vector ? 127 : 15;
9633 int32_t Lo = P9Vector ? -128 : -16;
9634 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9635 SextVal = static_cast<int32_t>(SplatBits);
9636 }
9637 }
9638
9639 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9640 unsigned NewOpcode = PPCISD::LD_SPLAT;
9641
9642 // Handle load-and-splat patterns as we have instructions that will do this
9643 // in one go.
9644 if (DAG.isSplatValue(V: Op, AllowUndefs: true) &&
9645 isValidSplatLoad(Subtarget, Op, Opcode&: NewOpcode)) {
9646 const SDValue *InputLoad = &Op.getOperand(i: 0);
9647 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
9648
9649 // If the input load is an extending load, it will be an i32 -> i64
9650 // extending load and isValidSplatLoad() will update NewOpcode.
9651 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9652 unsigned ElementSize =
9653 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9654
9655 assert(((ElementSize == 2 * MemorySize)
9656 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9657 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9658 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9659 "Unmatched element size and opcode!\n");
9660
9661 // Checking for a single use of this load, we have to check for vector
9662 // width (128 bits) / ElementSize uses (since each operand of the
9663 // BUILD_VECTOR is a separate use of the value.
9664 unsigned NumUsesOfInputLD = 128 / ElementSize;
9665 for (SDValue BVInOp : Op->ops())
9666 if (BVInOp.isUndef())
9667 NumUsesOfInputLD--;
9668
9669 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9670 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9671 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9672 // 15", but function IsValidSplatLoad() now will only return true when
9673 // the data at index 0 is not nullptr. So we will not get into trouble for
9674 // these cases.
9675 //
9676 // case 1 - lfiwzx/lfiwax
9677 // 1.1: load result is i32 and is sign/zero extend to i64;
9678 // 1.2: build a v2i64 vector type with above loaded value;
9679 // 1.3: the vector has only one value at index 0, others are all undef;
9680 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9681 if (NumUsesOfInputLD == 1 &&
9682 (Op->getValueType(ResNo: 0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9683 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9684 Subtarget.hasLFIWAX()))
9685 return SDValue();
9686
9687 // case 2 - lxvr[hb]x
9688 // 2.1: load result is at most i16;
9689 // 2.2: build a vector with above loaded value;
9690 // 2.3: the vector has only one value at index 0, others are all undef;
9691 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9692 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9693 Subtarget.isISA3_1() && ElementSize <= 16)
9694 return SDValue();
9695
9696 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9697 if (InputLoad->getNode()->hasNUsesOfValue(NUses: NumUsesOfInputLD, Value: 0) &&
9698 Subtarget.hasVSX()) {
9699 SDValue Ops[] = {
9700 LD->getChain(), // Chain
9701 LD->getBasePtr(), // Ptr
9702 DAG.getValueType(Op.getValueType()) // VT
9703 };
9704 SDValue LdSplt = DAG.getMemIntrinsicNode(
9705 Opcode: NewOpcode, dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other), Ops,
9706 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
9707 // Replace all uses of the output chain of the original load with the
9708 // output chain of the new load.
9709 DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: 1),
9710 To: LdSplt.getValue(R: 1));
9711 return LdSplt;
9712 }
9713 }
9714
9715 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9716 // 32-bits can be lowered to VSX instructions under certain conditions.
9717 // Without VSX, there is no pattern more efficient than expanding the node.
9718 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9719 haveEfficientBuildVectorPattern(V: BVN, HasDirectMove: Subtarget.hasDirectMove(),
9720 HasP8Vector: Subtarget.hasP8Vector()))
9721 return Op;
9722 return SDValue();
9723 }
9724
9725 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9726 unsigned SplatSize = SplatBitSize / 8;
9727
9728 // First, handle single instruction cases.
9729
9730 // All zeros?
9731 if (SplatBits == 0) {
9732 // Canonicalize all zero vectors to be v4i32.
9733 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9734 SDValue Z = DAG.getConstant(Val: 0, DL: dl, VT: MVT::v4i32);
9735 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Z);
9736 }
9737 return Op;
9738 }
9739
9740 // We have XXSPLTIW for constant splats four bytes wide.
9741 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9742 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9743 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9744 // turned into a 4-byte splat of 0xABABABAB.
9745 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9746 return getCanonicalConstSplat(Val: SplatBits | (SplatBits << 16), SplatSize: SplatSize * 2,
9747 VT: Op.getValueType(), DAG, dl);
9748
9749 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9750 return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9751 dl);
9752
9753 // We have XXSPLTIB for constant splats one byte wide.
9754 if (Subtarget.hasP9Vector() && SplatSize == 1)
9755 return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9756 dl);
9757
9758 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9759 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9760 if (SextVal >= -16 && SextVal <= 15) {
9761 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9762 // generate a splat word with extend for size 8.
9763 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9764 SDValue Res =
9765 getCanonicalConstSplat(Val: SextVal, SplatSize: UseSize, VT: Op.getValueType(), DAG, dl);
9766 if (SplatSize != 8)
9767 return Res;
9768 return BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vupklsw, Op: Res, DAG, dl);
9769 }
9770
9771 // Two instruction sequences.
9772
9773 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9774 SDValue C = DAG.getConstant(Val: (unsigned char)SextVal, DL: dl, VT: MVT::i32);
9775 SmallVector<SDValue, 16> Ops(16, C);
9776 SDValue BV = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops);
9777 unsigned IID;
9778 EVT VT;
9779 switch (SplatSize) {
9780 default:
9781 llvm_unreachable("Unexpected type for vector constant.");
9782 case 2:
9783 IID = Intrinsic::ppc_altivec_vupklsb;
9784 VT = MVT::v8i16;
9785 break;
9786 case 4:
9787 IID = Intrinsic::ppc_altivec_vextsb2w;
9788 VT = MVT::v4i32;
9789 break;
9790 case 8:
9791 IID = Intrinsic::ppc_altivec_vextsb2d;
9792 VT = MVT::v2i64;
9793 break;
9794 }
9795 SDValue Extend = BuildIntrinsicOp(IID, Op: BV, DAG, dl, DestVT: VT);
9796 return DAG.getBitcast(VT: Op->getValueType(ResNo: 0), V: Extend);
9797 }
9798 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9799
9800 // If this value is in the range [-32,30] and is even, use:
9801 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9802 // If this value is in the range [17,31] and is odd, use:
9803 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9804 // If this value is in the range [-31,-17] and is odd, use:
9805 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9806 // Note the last two are three-instruction sequences.
9807 if (SextVal >= -32 && SextVal <= 31) {
9808 // To avoid having these optimizations undone by constant folding,
9809 // we convert to a pseudo that will be expanded later into one of
9810 // the above forms.
9811 SDValue Elt = DAG.getSignedConstant(Val: SextVal, DL: dl, VT: MVT::i32);
9812 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9813 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9814 SDValue EltSize = DAG.getConstant(Val: SplatSize, DL: dl, VT: MVT::i32);
9815 SDValue RetVal = DAG.getNode(Opcode: PPCISD::VADD_SPLAT, DL: dl, VT, N1: Elt, N2: EltSize);
9816 if (VT == Op.getValueType())
9817 return RetVal;
9818 else
9819 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: RetVal);
9820 }
9821
9822 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9823 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9824 // for fneg/fabs.
9825 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9826 // Make -1 and vspltisw -1:
9827 SDValue OnesV = getCanonicalConstSplat(Val: -1, SplatSize: 4, VT: MVT::v4i32, DAG, dl);
9828
9829 // Make the VSLW intrinsic, computing 0x8000_0000.
9830 SDValue Res = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: OnesV,
9831 RHS: OnesV, DAG, dl);
9832
9833 // xor by OnesV to invert it.
9834 Res = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::v4i32, N1: Res, N2: OnesV);
9835 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9836 }
9837
9838 // Check to see if this is a wide variety of vsplti*, binop self cases.
9839 static const signed char SplatCsts[] = {
9840 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9841 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9842 };
9843
9844 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9845 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9846 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9847 int i = SplatCsts[idx];
9848
9849 // Figure out what shift amount will be used by altivec if shifted by i in
9850 // this splat size.
9851 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9852
9853 // vsplti + shl self.
9854 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9855 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9856 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9857 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9858 Intrinsic::ppc_altivec_vslw
9859 };
9860 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9861 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9862 }
9863
9864 // vsplti + srl self.
9865 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9866 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9867 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9868 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9869 Intrinsic::ppc_altivec_vsrw
9870 };
9871 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9872 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9873 }
9874
9875 // vsplti + rol self.
9876 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9877 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9878 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9879 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9880 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9881 Intrinsic::ppc_altivec_vrlw
9882 };
9883 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9884 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9885 }
9886
9887 // t = vsplti c, result = vsldoi t, t, 1
9888 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9889 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9890 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9891 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9892 }
9893 // t = vsplti c, result = vsldoi t, t, 2
9894 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9895 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9896 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9897 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9898 }
9899 // t = vsplti c, result = vsldoi t, t, 3
9900 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9901 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9902 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9903 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9904 }
9905 }
9906
9907 return SDValue();
9908}
9909
9910/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9911/// the specified operations to build the shuffle.
9912static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9913 SDValue RHS, SelectionDAG &DAG,
9914 const SDLoc &dl) {
9915 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9916 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9917 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9918
9919 enum {
9920 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9921 OP_VMRGHW,
9922 OP_VMRGLW,
9923 OP_VSPLTISW0,
9924 OP_VSPLTISW1,
9925 OP_VSPLTISW2,
9926 OP_VSPLTISW3,
9927 OP_VSLDOI4,
9928 OP_VSLDOI8,
9929 OP_VSLDOI12
9930 };
9931
9932 if (OpNum == OP_COPY) {
9933 if (LHSID == (1*9+2)*9+3) return LHS;
9934 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9935 return RHS;
9936 }
9937
9938 SDValue OpLHS, OpRHS;
9939 OpLHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9940 OpRHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9941
9942 int ShufIdxs[16];
9943 switch (OpNum) {
9944 default: llvm_unreachable("Unknown i32 permute!");
9945 case OP_VMRGHW:
9946 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9947 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9948 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9949 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9950 break;
9951 case OP_VMRGLW:
9952 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9953 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9954 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9955 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9956 break;
9957 case OP_VSPLTISW0:
9958 for (unsigned i = 0; i != 16; ++i)
9959 ShufIdxs[i] = (i&3)+0;
9960 break;
9961 case OP_VSPLTISW1:
9962 for (unsigned i = 0; i != 16; ++i)
9963 ShufIdxs[i] = (i&3)+4;
9964 break;
9965 case OP_VSPLTISW2:
9966 for (unsigned i = 0; i != 16; ++i)
9967 ShufIdxs[i] = (i&3)+8;
9968 break;
9969 case OP_VSPLTISW3:
9970 for (unsigned i = 0; i != 16; ++i)
9971 ShufIdxs[i] = (i&3)+12;
9972 break;
9973 case OP_VSLDOI4:
9974 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 4, VT: OpLHS.getValueType(), DAG, dl);
9975 case OP_VSLDOI8:
9976 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 8, VT: OpLHS.getValueType(), DAG, dl);
9977 case OP_VSLDOI12:
9978 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 12, VT: OpLHS.getValueType(), DAG, dl);
9979 }
9980 EVT VT = OpLHS.getValueType();
9981 OpLHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpLHS);
9982 OpRHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpRHS);
9983 SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OpLHS, N2: OpRHS, Mask: ShufIdxs);
9984 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9985}
9986
9987/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9988/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9989/// SDValue.
9990SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9991 SelectionDAG &DAG) const {
9992 const unsigned BytesInVector = 16;
9993 bool IsLE = Subtarget.isLittleEndian();
9994 SDLoc dl(N);
9995 SDValue V1 = N->getOperand(Num: 0);
9996 SDValue V2 = N->getOperand(Num: 1);
9997 unsigned ShiftElts = 0, InsertAtByte = 0;
9998 bool Swap = false;
9999
10000 // Shifts required to get the byte we want at element 7.
10001 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10002 0, 15, 14, 13, 12, 11, 10, 9};
10003 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10004 1, 2, 3, 4, 5, 6, 7, 8};
10005
10006 ArrayRef<int> Mask = N->getMask();
10007 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10008
10009 // For each mask element, find out if we're just inserting something
10010 // from V2 into V1 or vice versa.
10011 // Possible permutations inserting an element from V2 into V1:
10012 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10013 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10014 // ...
10015 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10016 // Inserting from V1 into V2 will be similar, except mask range will be
10017 // [16,31].
10018
10019 bool FoundCandidate = false;
10020 // If both vector operands for the shuffle are the same vector, the mask
10021 // will contain only elements from the first one and the second one will be
10022 // undef.
10023 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10024 // Go through the mask of half-words to find an element that's being moved
10025 // from one vector to the other.
10026 for (unsigned i = 0; i < BytesInVector; ++i) {
10027 unsigned CurrentElement = Mask[i];
10028 // If 2nd operand is undefined, we should only look for element 7 in the
10029 // Mask.
10030 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10031 continue;
10032
10033 bool OtherElementsInOrder = true;
10034 // Examine the other elements in the Mask to see if they're in original
10035 // order.
10036 for (unsigned j = 0; j < BytesInVector; ++j) {
10037 if (j == i)
10038 continue;
10039 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10040 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10041 // in which we always assume we're always picking from the 1st operand.
10042 int MaskOffset =
10043 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10044 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10045 OtherElementsInOrder = false;
10046 break;
10047 }
10048 }
10049 // If other elements are in original order, we record the number of shifts
10050 // we need to get the element we want into element 7. Also record which byte
10051 // in the vector we should insert into.
10052 if (OtherElementsInOrder) {
10053 // If 2nd operand is undefined, we assume no shifts and no swapping.
10054 if (V2.isUndef()) {
10055 ShiftElts = 0;
10056 Swap = false;
10057 } else {
10058 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10059 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10060 : BigEndianShifts[CurrentElement & 0xF];
10061 Swap = CurrentElement < BytesInVector;
10062 }
10063 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10064 FoundCandidate = true;
10065 break;
10066 }
10067 }
10068
10069 if (!FoundCandidate)
10070 return SDValue();
10071
10072 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10073 // optionally with VECSHL if shift is required.
10074 if (Swap)
10075 std::swap(a&: V1, b&: V2);
10076 if (V2.isUndef())
10077 V2 = V1;
10078 if (ShiftElts) {
10079 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10080 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10081 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: Shl,
10082 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10083 }
10084 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: V2,
10085 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10086}
10087
10088/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10089/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10090/// SDValue.
10091SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10092 SelectionDAG &DAG) const {
10093 const unsigned NumHalfWords = 8;
10094 const unsigned BytesInVector = NumHalfWords * 2;
10095 // Check that the shuffle is on half-words.
10096 if (!isNByteElemShuffleMask(N, Width: 2, StepLen: 1))
10097 return SDValue();
10098
10099 bool IsLE = Subtarget.isLittleEndian();
10100 SDLoc dl(N);
10101 SDValue V1 = N->getOperand(Num: 0);
10102 SDValue V2 = N->getOperand(Num: 1);
10103 unsigned ShiftElts = 0, InsertAtByte = 0;
10104 bool Swap = false;
10105
10106 // Shifts required to get the half-word we want at element 3.
10107 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10108 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10109
10110 uint32_t Mask = 0;
10111 uint32_t OriginalOrderLow = 0x1234567;
10112 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10113 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10114 // 32-bit space, only need 4-bit nibbles per element.
10115 for (unsigned i = 0; i < NumHalfWords; ++i) {
10116 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10117 Mask |= ((uint32_t)(N->getMaskElt(Idx: i * 2) / 2) << MaskShift);
10118 }
10119
10120 // For each mask element, find out if we're just inserting something
10121 // from V2 into V1 or vice versa. Possible permutations inserting an element
10122 // from V2 into V1:
10123 // X, 1, 2, 3, 4, 5, 6, 7
10124 // 0, X, 2, 3, 4, 5, 6, 7
10125 // 0, 1, X, 3, 4, 5, 6, 7
10126 // 0, 1, 2, X, 4, 5, 6, 7
10127 // 0, 1, 2, 3, X, 5, 6, 7
10128 // 0, 1, 2, 3, 4, X, 6, 7
10129 // 0, 1, 2, 3, 4, 5, X, 7
10130 // 0, 1, 2, 3, 4, 5, 6, X
10131 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10132
10133 bool FoundCandidate = false;
10134 // Go through the mask of half-words to find an element that's being moved
10135 // from one vector to the other.
10136 for (unsigned i = 0; i < NumHalfWords; ++i) {
10137 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10138 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10139 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10140 uint32_t TargetOrder = 0x0;
10141
10142 // If both vector operands for the shuffle are the same vector, the mask
10143 // will contain only elements from the first one and the second one will be
10144 // undef.
10145 if (V2.isUndef()) {
10146 ShiftElts = 0;
10147 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10148 TargetOrder = OriginalOrderLow;
10149 Swap = false;
10150 // Skip if not the correct element or mask of other elements don't equal
10151 // to our expected order.
10152 if (MaskOneElt == VINSERTHSrcElem &&
10153 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10154 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10155 FoundCandidate = true;
10156 break;
10157 }
10158 } else { // If both operands are defined.
10159 // Target order is [8,15] if the current mask is between [0,7].
10160 TargetOrder =
10161 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10162 // Skip if mask of other elements don't equal our expected order.
10163 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10164 // We only need the last 3 bits for the number of shifts.
10165 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10166 : BigEndianShifts[MaskOneElt & 0x7];
10167 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10168 Swap = MaskOneElt < NumHalfWords;
10169 FoundCandidate = true;
10170 break;
10171 }
10172 }
10173 }
10174
10175 if (!FoundCandidate)
10176 return SDValue();
10177
10178 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10179 // optionally with VECSHL if shift is required.
10180 if (Swap)
10181 std::swap(a&: V1, b&: V2);
10182 if (V2.isUndef())
10183 V2 = V1;
10184 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10185 if (ShiftElts) {
10186 // Double ShiftElts because we're left shifting on v16i8 type.
10187 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10188 N3: DAG.getConstant(Val: 2 * ShiftElts, DL: dl, VT: MVT::i32));
10189 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: Shl);
10190 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10191 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10192 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10193 }
10194 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V2);
10195 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10196 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10197 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10198}
10199
10200/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10201/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10202/// return the default SDValue.
10203SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10204 SelectionDAG &DAG) const {
10205 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10206 // to v16i8. Peek through the bitcasts to get the actual operands.
10207 SDValue LHS = peekThroughBitcasts(V: SVN->getOperand(Num: 0));
10208 SDValue RHS = peekThroughBitcasts(V: SVN->getOperand(Num: 1));
10209
10210 auto ShuffleMask = SVN->getMask();
10211 SDValue VecShuffle(SVN, 0);
10212 SDLoc DL(SVN);
10213
10214 // Check that we have a four byte shuffle.
10215 if (!isNByteElemShuffleMask(N: SVN, Width: 4, StepLen: 1))
10216 return SDValue();
10217
10218 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10219 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10220 std::swap(a&: LHS, b&: RHS);
10221 VecShuffle = peekThroughBitcasts(V: DAG.getCommutedVectorShuffle(SV: *SVN));
10222 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(Val&: VecShuffle);
10223 if (!CommutedSV)
10224 return SDValue();
10225 ShuffleMask = CommutedSV->getMask();
10226 }
10227
10228 // Ensure that the RHS is a vector of constants.
10229 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: RHS.getNode());
10230 if (!BVN)
10231 return SDValue();
10232
10233 // Check if RHS is a splat of 4-bytes (or smaller).
10234 APInt APSplatValue, APSplatUndef;
10235 unsigned SplatBitSize;
10236 bool HasAnyUndefs;
10237 if (!BVN->isConstantSplat(SplatValue&: APSplatValue, SplatUndef&: APSplatUndef, SplatBitSize,
10238 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian()) ||
10239 SplatBitSize > 32)
10240 return SDValue();
10241
10242 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10243 // The instruction splats a constant C into two words of the source vector
10244 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10245 // Thus we check that the shuffle mask is the equivalent of
10246 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10247 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10248 // within each word are consecutive, so we only need to check the first byte.
10249 SDValue Index;
10250 bool IsLE = Subtarget.isLittleEndian();
10251 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10252 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10253 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10254 Index = DAG.getTargetConstant(Val: IsLE ? 0 : 1, DL, VT: MVT::i32);
10255 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10256 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10257 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10258 Index = DAG.getTargetConstant(Val: IsLE ? 1 : 0, DL, VT: MVT::i32);
10259 else
10260 return SDValue();
10261
10262 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10263 // for XXSPLTI32DX.
10264 unsigned SplatVal = APSplatValue.getZExtValue();
10265 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10266 SplatVal |= (SplatVal << SplatBitSize);
10267
10268 SDValue SplatNode = DAG.getNode(
10269 Opcode: PPCISD::XXSPLTI32DX, DL, VT: MVT::v2i64, N1: DAG.getBitcast(VT: MVT::v2i64, V: LHS),
10270 N2: Index, N3: DAG.getTargetConstant(Val: SplatVal, DL, VT: MVT::i32));
10271 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: SplatNode);
10272}
10273
10274/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10275/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10276/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10277/// i.e (or (shl x, C1), (srl x, 128-C1)).
10278SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10279 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10280 assert(Op.getValueType() == MVT::v1i128 &&
10281 "Only set v1i128 as custom, other type shouldn't reach here!");
10282 SDLoc dl(Op);
10283 SDValue N0 = peekThroughBitcasts(V: Op.getOperand(i: 0));
10284 SDValue N1 = peekThroughBitcasts(V: Op.getOperand(i: 1));
10285 unsigned SHLAmt = N1.getConstantOperandVal(i: 0);
10286 if (SHLAmt % 8 == 0) {
10287 std::array<int, 16> Mask;
10288 std::iota(first: Mask.begin(), last: Mask.end(), value: 0);
10289 std::rotate(first: Mask.begin(), middle: Mask.begin() + SHLAmt / 8, last: Mask.end());
10290 if (SDValue Shuffle =
10291 DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: DAG.getBitcast(VT: MVT::v16i8, V: N0),
10292 N2: DAG.getUNDEF(VT: MVT::v16i8), Mask))
10293 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: Shuffle);
10294 }
10295 SDValue ArgVal = DAG.getBitcast(VT: MVT::i128, V: N0);
10296 SDValue SHLOp = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ArgVal,
10297 N2: DAG.getConstant(Val: SHLAmt, DL: dl, VT: MVT::i32));
10298 SDValue SRLOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: ArgVal,
10299 N2: DAG.getConstant(Val: 128 - SHLAmt, DL: dl, VT: MVT::i32));
10300 SDValue OROp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i128, N1: SHLOp, N2: SRLOp);
10301 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: OROp);
10302}
10303
10304/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10305/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10306/// return the code it can be lowered into. Worst case, it can always be
10307/// lowered into a vperm.
10308SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10309 SelectionDAG &DAG) const {
10310 SDLoc dl(Op);
10311 SDValue V1 = Op.getOperand(i: 0);
10312 SDValue V2 = Op.getOperand(i: 1);
10313 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10314
10315 // Any nodes that were combined in the target-independent combiner prior
10316 // to vector legalization will not be sent to the target combine. Try to
10317 // combine it here.
10318 if (SDValue NewShuffle = combineVectorShuffle(SVN: SVOp, DAG)) {
10319 if (!isa<ShuffleVectorSDNode>(Val: NewShuffle))
10320 return NewShuffle;
10321 Op = NewShuffle;
10322 SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10323 V1 = Op.getOperand(i: 0);
10324 V2 = Op.getOperand(i: 1);
10325 }
10326 EVT VT = Op.getValueType();
10327 bool isLittleEndian = Subtarget.isLittleEndian();
10328
10329 unsigned ShiftElts, InsertAtByte;
10330 bool Swap = false;
10331
10332 // If this is a load-and-splat, we can do that with a single instruction
10333 // in some cases. However if the load has multiple uses, we don't want to
10334 // combine it because that will just produce multiple loads.
10335 bool IsPermutedLoad = false;
10336 const SDValue *InputLoad = getNormalLoadInput(Op: V1, IsPermuted&: IsPermutedLoad);
10337 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10338 (PPC::isSplatShuffleMask(N: SVOp, EltSize: 4) || PPC::isSplatShuffleMask(N: SVOp, EltSize: 8)) &&
10339 InputLoad->hasOneUse()) {
10340 bool IsFourByte = PPC::isSplatShuffleMask(N: SVOp, EltSize: 4);
10341 int SplatIdx =
10342 PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: IsFourByte ? 4 : 8, DAG);
10343
10344 // The splat index for permuted loads will be in the left half of the vector
10345 // which is strictly wider than the loaded value by 8 bytes. So we need to
10346 // adjust the splat index to point to the correct address in memory.
10347 if (IsPermutedLoad) {
10348 assert((isLittleEndian || IsFourByte) &&
10349 "Unexpected size for permuted load on big endian target");
10350 SplatIdx += IsFourByte ? 2 : 1;
10351 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10352 "Splat of a value outside of the loaded memory");
10353 }
10354
10355 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
10356 // For 4-byte load-and-splat, we need Power9.
10357 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10358 uint64_t Offset = 0;
10359 if (IsFourByte)
10360 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10361 else
10362 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10363
10364 // If the width of the load is the same as the width of the splat,
10365 // loading with an offset would load the wrong memory.
10366 if (LD->getValueType(ResNo: 0).getSizeInBits() == (IsFourByte ? 32 : 64))
10367 Offset = 0;
10368
10369 SDValue BasePtr = LD->getBasePtr();
10370 if (Offset != 0)
10371 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
10372 N1: BasePtr, N2: DAG.getIntPtrConstant(Val: Offset, DL: dl));
10373 SDValue Ops[] = {
10374 LD->getChain(), // Chain
10375 BasePtr, // BasePtr
10376 DAG.getValueType(Op.getValueType()) // VT
10377 };
10378 SDVTList VTL =
10379 DAG.getVTList(VT1: IsFourByte ? MVT::v4i32 : MVT::v2i64, VT2: MVT::Other);
10380 SDValue LdSplt =
10381 DAG.getMemIntrinsicNode(Opcode: PPCISD::LD_SPLAT, dl, VTList: VTL,
10382 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
10383 DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: 1), To: LdSplt.getValue(R: 1));
10384 if (LdSplt.getValueType() != SVOp->getValueType(ResNo: 0))
10385 LdSplt = DAG.getBitcast(VT: SVOp->getValueType(ResNo: 0), V: LdSplt);
10386 return LdSplt;
10387 }
10388 }
10389
10390 // All v2i64 and v2f64 shuffles are legal
10391 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10392 return Op;
10393
10394 if (Subtarget.hasP9Vector() &&
10395 PPC::isXXINSERTWMask(N: SVOp, ShiftElts, InsertAtByte, Swap,
10396 IsLE: isLittleEndian)) {
10397 if (V2.isUndef())
10398 V2 = V1;
10399 else if (Swap)
10400 std::swap(a&: V1, b&: V2);
10401 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10402 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2);
10403 if (ShiftElts) {
10404 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv2, N2: Conv2,
10405 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10406 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Shl,
10407 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10408 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10409 }
10410 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10411 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10412 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10413 }
10414
10415 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10416 SDValue SplatInsertNode;
10417 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVN: SVOp, DAG)))
10418 return SplatInsertNode;
10419 }
10420
10421 if (Subtarget.hasP9Altivec()) {
10422 SDValue NewISDNode;
10423 if ((NewISDNode = lowerToVINSERTH(N: SVOp, DAG)))
10424 return NewISDNode;
10425
10426 if ((NewISDNode = lowerToVINSERTB(N: SVOp, DAG)))
10427 return NewISDNode;
10428 }
10429
10430 if (Subtarget.hasVSX() &&
10431 PPC::isXXSLDWIShuffleMask(N: SVOp, ShiftElts, Swap, IsLE: isLittleEndian)) {
10432 if (Swap)
10433 std::swap(a&: V1, b&: V2);
10434 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10435 SDValue Conv2 =
10436 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2.isUndef() ? V1 : V2);
10437
10438 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10439 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10440 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Shl);
10441 }
10442
10443 if (Subtarget.hasVSX() &&
10444 PPC::isXXPERMDIShuffleMask(N: SVOp, DM&: ShiftElts, Swap, IsLE: isLittleEndian)) {
10445 if (Swap)
10446 std::swap(a&: V1, b&: V2);
10447 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10448 SDValue Conv2 =
10449 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V2.isUndef() ? V1 : V2);
10450
10451 SDValue PermDI = DAG.getNode(Opcode: PPCISD::XXPERMDI, DL: dl, VT: MVT::v2i64, N1: Conv1, N2: Conv2,
10452 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10453 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: PermDI);
10454 }
10455
10456 if (Subtarget.hasP9Vector()) {
10457 if (PPC::isXXBRHShuffleMask(N: SVOp)) {
10458 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10459 SDValue ReveHWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v8i16, Operand: Conv);
10460 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveHWord);
10461 } else if (PPC::isXXBRWShuffleMask(N: SVOp)) {
10462 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10463 SDValue ReveWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v4i32, Operand: Conv);
10464 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveWord);
10465 } else if (PPC::isXXBRDShuffleMask(N: SVOp)) {
10466 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10467 SDValue ReveDWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Conv);
10468 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveDWord);
10469 } else if (PPC::isXXBRQShuffleMask(N: SVOp)) {
10470 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: V1);
10471 SDValue ReveQWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v1i128, Operand: Conv);
10472 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveQWord);
10473 }
10474 }
10475
10476 if (Subtarget.hasVSX()) {
10477 if (V2.isUndef() && PPC::isSplatShuffleMask(N: SVOp, EltSize: 4)) {
10478 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: 4, DAG);
10479
10480 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10481 SDValue Splat = DAG.getNode(Opcode: PPCISD::XXSPLT, DL: dl, VT: MVT::v4i32, N1: Conv,
10482 N2: DAG.getConstant(Val: SplatIdx, DL: dl, VT: MVT::i32));
10483 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Splat);
10484 }
10485
10486 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10487 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: 1, DAG) == 8) {
10488 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: V1);
10489 SDValue Swap = DAG.getNode(Opcode: PPCISD::SWAP_NO_CHAIN, DL: dl, VT: MVT::v2f64, Operand: Conv);
10490 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Swap);
10491 }
10492 }
10493
10494 // Cases that are handled by instructions that take permute immediates
10495 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10496 // selected by the instruction selector.
10497 if (V2.isUndef()) {
10498 if (PPC::isSplatShuffleMask(N: SVOp, EltSize: 1) ||
10499 PPC::isSplatShuffleMask(N: SVOp, EltSize: 2) ||
10500 PPC::isSplatShuffleMask(N: SVOp, EltSize: 4) ||
10501 PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10502 PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10503 PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: 1, DAG) != -1 ||
10504 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind: 1, DAG) ||
10505 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind: 1, DAG) ||
10506 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind: 1, DAG) ||
10507 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind: 1, DAG) ||
10508 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind: 1, DAG) ||
10509 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind: 1, DAG) ||
10510 (Subtarget.hasP8Altivec() && (
10511 PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10512 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind: 1, DAG) ||
10513 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind: 1, DAG)))) {
10514 return Op;
10515 }
10516 }
10517
10518 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10519 // and produce a fixed permutation. If any of these match, do not lower to
10520 // VPERM.
10521 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10522 if (PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10523 PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10524 PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind, DAG) != -1 ||
10525 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind, DAG) ||
10526 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind, DAG) ||
10527 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind, DAG) ||
10528 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind, DAG) ||
10529 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind, DAG) ||
10530 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind, DAG) ||
10531 (Subtarget.hasP8Altivec() && (
10532 PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10533 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind, DAG) ||
10534 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind, DAG))))
10535 return Op;
10536
10537 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10538 // perfect shuffle table to emit an optimal matching sequence.
10539 ArrayRef<int> PermMask = SVOp->getMask();
10540
10541 if (!DisablePerfectShuffle && !isLittleEndian) {
10542 unsigned PFIndexes[4];
10543 bool isFourElementShuffle = true;
10544 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10545 ++i) { // Element number
10546 unsigned EltNo = 8; // Start out undef.
10547 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10548 if (PermMask[i * 4 + j] < 0)
10549 continue; // Undef, ignore it.
10550
10551 unsigned ByteSource = PermMask[i * 4 + j];
10552 if ((ByteSource & 3) != j) {
10553 isFourElementShuffle = false;
10554 break;
10555 }
10556
10557 if (EltNo == 8) {
10558 EltNo = ByteSource / 4;
10559 } else if (EltNo != ByteSource / 4) {
10560 isFourElementShuffle = false;
10561 break;
10562 }
10563 }
10564 PFIndexes[i] = EltNo;
10565 }
10566
10567 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10568 // perfect shuffle vector to determine if it is cost effective to do this as
10569 // discrete instructions, or whether we should use a vperm.
10570 // For now, we skip this for little endian until such time as we have a
10571 // little-endian perfect shuffle table.
10572 if (isFourElementShuffle) {
10573 // Compute the index in the perfect shuffle table.
10574 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10575 PFIndexes[2] * 9 + PFIndexes[3];
10576
10577 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10578 unsigned Cost = (PFEntry >> 30);
10579
10580 // Determining when to avoid vperm is tricky. Many things affect the cost
10581 // of vperm, particularly how many times the perm mask needs to be
10582 // computed. For example, if the perm mask can be hoisted out of a loop or
10583 // is already used (perhaps because there are multiple permutes with the
10584 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10585 // permute mask out of the loop requires an extra register.
10586 //
10587 // As a compromise, we only emit discrete instructions if the shuffle can
10588 // be generated in 3 or fewer operations. When we have loop information
10589 // available, if this block is within a loop, we should avoid using vperm
10590 // for 3-operation perms and use a constant pool load instead.
10591 if (Cost < 3)
10592 return GeneratePerfectShuffle(PFEntry, LHS: V1, RHS: V2, DAG, dl);
10593 }
10594 }
10595
10596 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10597 // vector that will get spilled to the constant pool.
10598 if (V2.isUndef()) V2 = V1;
10599
10600 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10601}
10602
10603SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10604 ArrayRef<int> PermMask, EVT VT,
10605 SDValue V1, SDValue V2) const {
10606 unsigned Opcode = PPCISD::VPERM;
10607 EVT ValType = V1.getValueType();
10608 SDLoc dl(Op);
10609 bool NeedSwap = false;
10610 bool isLittleEndian = Subtarget.isLittleEndian();
10611 bool isPPC64 = Subtarget.isPPC64();
10612
10613 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10614 (V1->hasOneUse() || V2->hasOneUse())) {
10615 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10616 "XXPERM instead\n");
10617 Opcode = PPCISD::XXPERM;
10618
10619 // The second input to XXPERM is also an output so if the second input has
10620 // multiple uses then copying is necessary, as a result we want the
10621 // single-use operand to be used as the second input to prevent copying.
10622 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10623 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10624 std::swap(a&: V1, b&: V2);
10625 NeedSwap = !NeedSwap;
10626 }
10627 }
10628
10629 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10630 // that it is in input element units, not in bytes. Convert now.
10631
10632 // For little endian, the order of the input vectors is reversed, and
10633 // the permutation mask is complemented with respect to 31. This is
10634 // necessary to produce proper semantics with the big-endian-based vperm
10635 // instruction.
10636 EVT EltVT = V1.getValueType().getVectorElementType();
10637 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10638
10639 bool V1HasXXSWAPD = V1->getOperand(Num: 0)->getOpcode() == PPCISD::XXSWAPD;
10640 bool V2HasXXSWAPD = V2->getOperand(Num: 0)->getOpcode() == PPCISD::XXSWAPD;
10641
10642 /*
10643 Vectors will be appended like so: [ V1 | v2 ]
10644 XXSWAPD on V1:
10645 [ A | B | C | D ] -> [ C | D | A | B ]
10646 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10647 i.e. index of A, B += 8, and index of C, D -= 8.
10648 XXSWAPD on V2:
10649 [ E | F | G | H ] -> [ G | H | E | F ]
10650 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10651 i.e. index of E, F += 8, index of G, H -= 8
10652 Swap V1 and V2:
10653 [ V1 | V2 ] -> [ V2 | V1 ]
10654 0-15 16-31 0-15 16-31
10655 i.e. index of V1 += 16, index of V2 -= 16
10656 */
10657
10658 SmallVector<SDValue, 16> ResultMask;
10659 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10660 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10661
10662 if (V1HasXXSWAPD) {
10663 if (SrcElt < 8)
10664 SrcElt += 8;
10665 else if (SrcElt < 16)
10666 SrcElt -= 8;
10667 }
10668 if (V2HasXXSWAPD) {
10669 if (SrcElt > 23)
10670 SrcElt -= 8;
10671 else if (SrcElt > 15)
10672 SrcElt += 8;
10673 }
10674 if (NeedSwap) {
10675 if (SrcElt < 16)
10676 SrcElt += 16;
10677 else
10678 SrcElt -= 16;
10679 }
10680 for (unsigned j = 0; j != BytesPerElement; ++j)
10681 if (isLittleEndian)
10682 ResultMask.push_back(
10683 Elt: DAG.getConstant(Val: 31 - (SrcElt * BytesPerElement + j), DL: dl, VT: MVT::i32));
10684 else
10685 ResultMask.push_back(
10686 Elt: DAG.getConstant(Val: SrcElt * BytesPerElement + j, DL: dl, VT: MVT::i32));
10687 }
10688
10689 if (V1HasXXSWAPD) {
10690 dl = SDLoc(V1->getOperand(Num: 0));
10691 V1 = V1->getOperand(Num: 0)->getOperand(Num: 1);
10692 }
10693 if (V2HasXXSWAPD) {
10694 dl = SDLoc(V2->getOperand(Num: 0));
10695 V2 = V2->getOperand(Num: 0)->getOperand(Num: 1);
10696 }
10697
10698 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10699 if (ValType != MVT::v2f64)
10700 V1 = DAG.getBitcast(VT: MVT::v2f64, V: V1);
10701 if (V2.getValueType() != MVT::v2f64)
10702 V2 = DAG.getBitcast(VT: MVT::v2f64, V: V2);
10703 }
10704
10705 ShufflesHandledWithVPERM++;
10706 SDValue VPermMask = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops: ResultMask);
10707 LLVM_DEBUG({
10708 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10709 if (Opcode == PPCISD::XXPERM) {
10710 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10711 } else {
10712 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10713 }
10714 SVOp->dump();
10715 dbgs() << "With the following permute control vector:\n";
10716 VPermMask.dump();
10717 });
10718
10719 if (Opcode == PPCISD::XXPERM)
10720 VPermMask = DAG.getBitcast(VT: MVT::v4i32, V: VPermMask);
10721
10722 // Only need to place items backwards in LE,
10723 // the mask was properly calculated.
10724 if (isLittleEndian)
10725 std::swap(a&: V1, b&: V2);
10726
10727 SDValue VPERMNode =
10728 DAG.getNode(Opcode, DL: dl, VT: V1.getValueType(), N1: V1, N2: V2, N3: VPermMask);
10729
10730 VPERMNode = DAG.getBitcast(VT: ValType, V: VPERMNode);
10731 return VPERMNode;
10732}
10733
10734/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10735/// vector comparison. If it is, return true and fill in Opc/isDot with
10736/// information about the intrinsic.
10737static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10738 bool &isDot, const PPCSubtarget &Subtarget) {
10739 unsigned IntrinsicID = Intrin.getConstantOperandVal(i: 0);
10740 CompareOpc = -1;
10741 isDot = false;
10742 switch (IntrinsicID) {
10743 default:
10744 return false;
10745 // Comparison predicates.
10746 case Intrinsic::ppc_altivec_vcmpbfp_p:
10747 CompareOpc = 966;
10748 isDot = true;
10749 break;
10750 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10751 CompareOpc = 198;
10752 isDot = true;
10753 break;
10754 case Intrinsic::ppc_altivec_vcmpequb_p:
10755 CompareOpc = 6;
10756 isDot = true;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpequh_p:
10759 CompareOpc = 70;
10760 isDot = true;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpequw_p:
10763 CompareOpc = 134;
10764 isDot = true;
10765 break;
10766 case Intrinsic::ppc_altivec_vcmpequd_p:
10767 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10768 CompareOpc = 199;
10769 isDot = true;
10770 } else
10771 return false;
10772 break;
10773 case Intrinsic::ppc_altivec_vcmpneb_p:
10774 case Intrinsic::ppc_altivec_vcmpneh_p:
10775 case Intrinsic::ppc_altivec_vcmpnew_p:
10776 case Intrinsic::ppc_altivec_vcmpnezb_p:
10777 case Intrinsic::ppc_altivec_vcmpnezh_p:
10778 case Intrinsic::ppc_altivec_vcmpnezw_p:
10779 if (Subtarget.hasP9Altivec()) {
10780 switch (IntrinsicID) {
10781 default:
10782 llvm_unreachable("Unknown comparison intrinsic.");
10783 case Intrinsic::ppc_altivec_vcmpneb_p:
10784 CompareOpc = 7;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpneh_p:
10787 CompareOpc = 71;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpnew_p:
10790 CompareOpc = 135;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnezb_p:
10793 CompareOpc = 263;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezh_p:
10796 CompareOpc = 327;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpnezw_p:
10799 CompareOpc = 391;
10800 break;
10801 }
10802 isDot = true;
10803 } else
10804 return false;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgefp_p:
10807 CompareOpc = 454;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10811 CompareOpc = 710;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10815 CompareOpc = 774;
10816 isDot = true;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10819 CompareOpc = 838;
10820 isDot = true;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10823 CompareOpc = 902;
10824 isDot = true;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10827 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10828 CompareOpc = 967;
10829 isDot = true;
10830 } else
10831 return false;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpgtub_p:
10834 CompareOpc = 518;
10835 isDot = true;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10838 CompareOpc = 582;
10839 isDot = true;
10840 break;
10841 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10842 CompareOpc = 646;
10843 isDot = true;
10844 break;
10845 case Intrinsic::ppc_altivec_vcmpgtud_p:
10846 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10847 CompareOpc = 711;
10848 isDot = true;
10849 } else
10850 return false;
10851 break;
10852
10853 case Intrinsic::ppc_altivec_vcmpequq:
10854 case Intrinsic::ppc_altivec_vcmpgtsq:
10855 case Intrinsic::ppc_altivec_vcmpgtuq:
10856 if (!Subtarget.isISA3_1())
10857 return false;
10858 switch (IntrinsicID) {
10859 default:
10860 llvm_unreachable("Unknown comparison intrinsic.");
10861 case Intrinsic::ppc_altivec_vcmpequq:
10862 CompareOpc = 455;
10863 break;
10864 case Intrinsic::ppc_altivec_vcmpgtsq:
10865 CompareOpc = 903;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpgtuq:
10868 CompareOpc = 647;
10869 break;
10870 }
10871 break;
10872
10873 // VSX predicate comparisons use the same infrastructure
10874 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10875 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10876 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10877 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10879 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10880 if (Subtarget.hasVSX()) {
10881 switch (IntrinsicID) {
10882 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10883 CompareOpc = 99;
10884 break;
10885 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10886 CompareOpc = 115;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10889 CompareOpc = 107;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10892 CompareOpc = 67;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10895 CompareOpc = 83;
10896 break;
10897 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10898 CompareOpc = 75;
10899 break;
10900 }
10901 isDot = true;
10902 } else
10903 return false;
10904 break;
10905
10906 // Normal Comparisons.
10907 case Intrinsic::ppc_altivec_vcmpbfp:
10908 CompareOpc = 966;
10909 break;
10910 case Intrinsic::ppc_altivec_vcmpeqfp:
10911 CompareOpc = 198;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpequb:
10914 CompareOpc = 6;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequh:
10917 CompareOpc = 70;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequw:
10920 CompareOpc = 134;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpequd:
10923 if (Subtarget.hasP8Altivec())
10924 CompareOpc = 199;
10925 else
10926 return false;
10927 break;
10928 case Intrinsic::ppc_altivec_vcmpneb:
10929 case Intrinsic::ppc_altivec_vcmpneh:
10930 case Intrinsic::ppc_altivec_vcmpnew:
10931 case Intrinsic::ppc_altivec_vcmpnezb:
10932 case Intrinsic::ppc_altivec_vcmpnezh:
10933 case Intrinsic::ppc_altivec_vcmpnezw:
10934 if (Subtarget.hasP9Altivec())
10935 switch (IntrinsicID) {
10936 default:
10937 llvm_unreachable("Unknown comparison intrinsic.");
10938 case Intrinsic::ppc_altivec_vcmpneb:
10939 CompareOpc = 7;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpneh:
10942 CompareOpc = 71;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpnew:
10945 CompareOpc = 135;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnezb:
10948 CompareOpc = 263;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezh:
10951 CompareOpc = 327;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpnezw:
10954 CompareOpc = 391;
10955 break;
10956 }
10957 else
10958 return false;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgefp:
10961 CompareOpc = 454;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtfp:
10964 CompareOpc = 710;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtsb:
10967 CompareOpc = 774;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsh:
10970 CompareOpc = 838;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsw:
10973 CompareOpc = 902;
10974 break;
10975 case Intrinsic::ppc_altivec_vcmpgtsd:
10976 if (Subtarget.hasP8Altivec())
10977 CompareOpc = 967;
10978 else
10979 return false;
10980 break;
10981 case Intrinsic::ppc_altivec_vcmpgtub:
10982 CompareOpc = 518;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtuh:
10985 CompareOpc = 582;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtuw:
10988 CompareOpc = 646;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtud:
10991 if (Subtarget.hasP8Altivec())
10992 CompareOpc = 711;
10993 else
10994 return false;
10995 break;
10996 case Intrinsic::ppc_altivec_vcmpequq_p:
10997 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10998 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10999 if (!Subtarget.isISA3_1())
11000 return false;
11001 switch (IntrinsicID) {
11002 default:
11003 llvm_unreachable("Unknown comparison intrinsic.");
11004 case Intrinsic::ppc_altivec_vcmpequq_p:
11005 CompareOpc = 455;
11006 break;
11007 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11008 CompareOpc = 903;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11011 CompareOpc = 647;
11012 break;
11013 }
11014 isDot = true;
11015 break;
11016 }
11017 return true;
11018}
11019
11020/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11021/// lower, do it, otherwise return null.
11022SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11023 SelectionDAG &DAG) const {
11024 unsigned IntrinsicID = Op.getConstantOperandVal(i: 0);
11025
11026 SDLoc dl(Op);
11027 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11028 // but the builtin provides it as a scalar. To satisfy the instruction
11029 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11030 auto MapNodeWithSplatVector =
11031 [&](unsigned Opcode,
11032 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11033 SDValue SplatVal =
11034 DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: MVT::v4i32, Operand: Op.getOperand(i: 2));
11035
11036 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(i: 1)};
11037 Ops.append(in_start: ExtraOps.begin(), in_end: ExtraOps.end());
11038 return DAG.getNode(Opcode, DL: dl, VT: MVT::v16i8, Ops);
11039 };
11040
11041 switch (IntrinsicID) {
11042 case Intrinsic::thread_pointer:
11043 // Reads the thread pointer register, used for __builtin_thread_pointer.
11044 if (Subtarget.isPPC64())
11045 return DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
11046 return DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
11047
11048 case Intrinsic::ppc_rldimi: {
11049 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11050 SDValue Src = Op.getOperand(i: 1);
11051 APInt Mask = Op.getConstantOperandAPInt(i: 4);
11052 if (Mask.isZero())
11053 return Op.getOperand(i: 2);
11054 if (Mask.isAllOnes())
11055 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src, N2: Op.getOperand(i: 3));
11056 uint64_t SH = Op.getConstantOperandVal(i: 3);
11057 unsigned MB = 0, ME = 0;
11058 if (!isRunOfOnes64(Val: Mask.getZExtValue(), MB, ME))
11059 report_fatal_error(reason: "invalid rldimi mask!");
11060 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11061 if (ME < 63 - SH) {
11062 Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11063 N2: DAG.getConstant(Val: ME + SH + 1, DL: dl, VT: MVT::i32));
11064 } else if (ME > 63 - SH) {
11065 Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11066 N2: DAG.getConstant(Val: ME + SH - 63, DL: dl, VT: MVT::i32));
11067 }
11068 return SDValue(
11069 DAG.getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64,
11070 Ops: {Op.getOperand(i: 2), Src,
11071 DAG.getTargetConstant(Val: 63 - ME, DL: dl, VT: MVT::i32),
11072 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32)}),
11073 0);
11074 }
11075
11076 case Intrinsic::ppc_rlwimi: {
11077 APInt Mask = Op.getConstantOperandAPInt(i: 4);
11078 if (Mask.isZero())
11079 return Op.getOperand(i: 2);
11080 if (Mask.isAllOnes())
11081 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i32, N1: Op.getOperand(i: 1),
11082 N2: Op.getOperand(i: 3));
11083 unsigned MB = 0, ME = 0;
11084 if (!isRunOfOnes(Val: Mask.getZExtValue(), MB, ME))
11085 report_fatal_error(reason: "invalid rlwimi mask!");
11086 return SDValue(DAG.getMachineNode(
11087 Opcode: PPC::RLWIMI, dl, VT: MVT::i32,
11088 Ops: {Op.getOperand(i: 2), Op.getOperand(i: 1), Op.getOperand(i: 3),
11089 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11090 DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11091 0);
11092 }
11093
11094 case Intrinsic::ppc_bcdshift:
11095 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(i: 3)});
11096 case Intrinsic::ppc_bcdshiftround:
11097 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(i: 3)});
11098 case Intrinsic::ppc_bcdtruncate:
11099 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(i: 3)});
11100 case Intrinsic::ppc_bcdunsignedtruncate:
11101 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11102 case Intrinsic::ppc_bcdunsignedshift:
11103 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11104
11105 case Intrinsic::ppc_rlwnm: {
11106 if (Op.getConstantOperandVal(i: 3) == 0)
11107 return DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32);
11108 unsigned MB = 0, ME = 0;
11109 if (!isRunOfOnes(Val: Op.getConstantOperandVal(i: 3), MB, ME))
11110 report_fatal_error(reason: "invalid rlwnm mask!");
11111 return SDValue(
11112 DAG.getMachineNode(Opcode: PPC::RLWNM, dl, VT: MVT::i32,
11113 Ops: {Op.getOperand(i: 1), Op.getOperand(i: 2),
11114 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11115 DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11116 0);
11117 }
11118
11119 case Intrinsic::ppc_mma_disassemble_acc: {
11120 if (Subtarget.isISAFuture()) {
11121 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11122 SDValue WideVec =
11123 SDValue(DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes,
11124 Ops: Op.getOperand(i: 1)),
11125 0);
11126 SmallVector<SDValue, 4> RetOps;
11127 SDValue Value = SDValue(WideVec.getNode(), 0);
11128 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11129
11130 SDValue Extract;
11131 Extract = DAG.getNode(
11132 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11133 N1: Subtarget.isLittleEndian() ? Value2 : Value,
11134 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 1 : 0,
11135 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11136 RetOps.push_back(Elt: Extract);
11137 Extract = DAG.getNode(
11138 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11139 N1: Subtarget.isLittleEndian() ? Value2 : Value,
11140 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 0 : 1,
11141 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11142 RetOps.push_back(Elt: Extract);
11143 Extract = DAG.getNode(
11144 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11145 N1: Subtarget.isLittleEndian() ? Value : Value2,
11146 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 1 : 0,
11147 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11148 RetOps.push_back(Elt: Extract);
11149 Extract = DAG.getNode(
11150 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11151 N1: Subtarget.isLittleEndian() ? Value : Value2,
11152 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 0 : 1,
11153 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11154 RetOps.push_back(Elt: Extract);
11155 return DAG.getMergeValues(Ops: RetOps, dl);
11156 }
11157 [[fallthrough]];
11158 }
11159 case Intrinsic::ppc_vsx_disassemble_pair: {
11160 int NumVecs = 2;
11161 SDValue WideVec = Op.getOperand(i: 1);
11162 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11163 NumVecs = 4;
11164 WideVec = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: WideVec);
11165 }
11166 SmallVector<SDValue, 4> RetOps;
11167 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11168 SDValue Extract = DAG.getNode(
11169 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: WideVec,
11170 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11171 : VecNo,
11172 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11173 RetOps.push_back(Elt: Extract);
11174 }
11175 return DAG.getMergeValues(Ops: RetOps, dl);
11176 }
11177
11178 case Intrinsic::ppc_build_dmr: {
11179 SmallVector<SDValue, 8> Pairs;
11180 SmallVector<SDValue, 8> Chains;
11181 for (int i = 1; i < 9; i += 2) {
11182 SDValue Hi = Op.getOperand(i);
11183 SDValue Lo = Op.getOperand(i: i + 1);
11184 if (Hi->getOpcode() == ISD::LOAD)
11185 Chains.push_back(Elt: Hi.getValue(R: 1));
11186 if (Lo->getOpcode() == ISD::LOAD)
11187 Chains.push_back(Elt: Lo.getValue(R: 1));
11188 Pairs.push_back(
11189 Elt: DAG.getNode(Opcode: PPCISD::PAIR_BUILD, DL: dl, VT: MVT::v256i1, Ops: {Hi, Lo}));
11190 }
11191 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Chains);
11192 SDValue Value = DMFInsert1024(Pairs, dl: SDLoc(Op), DAG);
11193 return DAG.getMergeValues(Ops: {Value, TF}, dl);
11194 }
11195
11196 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11197 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11198 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11199 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11200 "Specify P of 0 or 1 for lower or upper 512 bytes");
11201 unsigned HiLo = Idx->getSExtValue();
11202 unsigned Opcode;
11203 unsigned Subx;
11204 if (HiLo == 0) {
11205 Opcode = PPC::DMXXEXTFDMR512;
11206 Subx = PPC::sub_wacc_lo;
11207 } else {
11208 Opcode = PPC::DMXXEXTFDMR512_HI;
11209 Subx = PPC::sub_wacc_hi;
11210 }
11211 SDValue Subreg(
11212 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
11213 Op1: Op.getOperand(i: 1),
11214 Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11215 0);
11216 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11217 return SDValue(DAG.getMachineNode(Opcode, dl, ResultTys: ReturnTypes, Ops: Subreg), 0);
11218 }
11219
11220 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11221 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11222 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11223 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11224 "Specify a dmr row pair 0-3");
11225 unsigned IdxVal = Idx->getSExtValue();
11226 unsigned Subx;
11227 switch (IdxVal) {
11228 case 0:
11229 Subx = PPC::sub_dmrrowp0;
11230 break;
11231 case 1:
11232 Subx = PPC::sub_dmrrowp1;
11233 break;
11234 case 2:
11235 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11236 break;
11237 case 3:
11238 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11239 break;
11240 }
11241 SDValue Subreg(
11242 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v256i1,
11243 Op1: Op.getOperand(i: 1),
11244 Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11245 0);
11246 SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11247 return SDValue(
11248 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR256, dl, VT: MVT::v256i1, Ops: {Subreg, P}),
11249 0);
11250 }
11251
11252 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11253 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11254 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 4));
11255 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11256 "Specify P of 0 or 1 for lower or upper 512 bytes");
11257 unsigned HiLo = Idx->getSExtValue();
11258 unsigned Opcode;
11259 unsigned Subx;
11260 if (HiLo == 0) {
11261 Opcode = PPCISD::INST512;
11262 Subx = PPC::sub_wacc_lo;
11263 } else {
11264 Opcode = PPCISD::INST512HI;
11265 Subx = PPC::sub_wacc_hi;
11266 }
11267 SDValue Wacc = DAG.getNode(Opcode, DL: dl, VT: MVT::v512i1, N1: Op.getOperand(i: 2),
11268 N2: Op.getOperand(i: 3));
11269 SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11270 return SDValue(DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11271 Op1: Op.getOperand(i: 1), Op2: Wacc, Op3: SubReg),
11272 0);
11273 }
11274
11275 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11276 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11277 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 3));
11278 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11279 "Specify a dmr row pair 0-3");
11280 unsigned IdxVal = Idx->getSExtValue();
11281 unsigned Subx;
11282 switch (IdxVal) {
11283 case 0:
11284 Subx = PPC::sub_dmrrowp0;
11285 break;
11286 case 1:
11287 Subx = PPC::sub_dmrrowp1;
11288 break;
11289 case 2:
11290 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11291 break;
11292 case 3:
11293 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11294 break;
11295 }
11296 SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11297 SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11298 SDValue DMRRowp =
11299 DAG.getNode(Opcode: PPCISD::INST256, DL: dl, VT: MVT::v256i1, N1: Op.getOperand(i: 2), N2: P);
11300 return SDValue(DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11301 Op1: Op.getOperand(i: 1), Op2: DMRRowp, Op3: SubReg),
11302 0);
11303 }
11304
11305 case Intrinsic::ppc_mma_xxmfacc:
11306 case Intrinsic::ppc_mma_xxmtacc: {
11307 // Allow pre-isa-future subtargets to lower as normal.
11308 if (!Subtarget.isISAFuture())
11309 return SDValue();
11310 // The intrinsics for xxmtacc and xxmfacc take one argument of
11311 // type v512i1, for future cpu the corresponding wacc instruction
11312 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11313 // the need to produce the xxm[t|f]acc.
11314 SDValue WideVec = Op.getOperand(i: 1);
11315 DAG.ReplaceAllUsesWith(From: Op, To: WideVec);
11316 return SDValue();
11317 }
11318
11319 case Intrinsic::ppc_unpack_longdouble: {
11320 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11321 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11322 "Argument of long double unpack must be 0 or 1!");
11323 return DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: dl, VT: MVT::f64, N1: Op.getOperand(i: 1),
11324 N2: DAG.getConstant(Val: !!(Idx->getSExtValue()), DL: dl,
11325 VT: Idx->getValueType(ResNo: 0)));
11326 }
11327
11328 case Intrinsic::ppc_compare_exp_lt:
11329 case Intrinsic::ppc_compare_exp_gt:
11330 case Intrinsic::ppc_compare_exp_eq:
11331 case Intrinsic::ppc_compare_exp_uo: {
11332 unsigned Pred;
11333 switch (IntrinsicID) {
11334 case Intrinsic::ppc_compare_exp_lt:
11335 Pred = PPC::PRED_LT;
11336 break;
11337 case Intrinsic::ppc_compare_exp_gt:
11338 Pred = PPC::PRED_GT;
11339 break;
11340 case Intrinsic::ppc_compare_exp_eq:
11341 Pred = PPC::PRED_EQ;
11342 break;
11343 case Intrinsic::ppc_compare_exp_uo:
11344 Pred = PPC::PRED_UN;
11345 break;
11346 }
11347 return SDValue(
11348 DAG.getMachineNode(
11349 Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11350 Ops: {SDValue(DAG.getMachineNode(Opcode: PPC::XSCMPEXPDP, dl, VT: MVT::i32,
11351 Op1: Op.getOperand(i: 1), Op2: Op.getOperand(i: 2)),
11352 0),
11353 DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32), DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
11354 DAG.getTargetConstant(Val: Pred, DL: dl, VT: MVT::i32)}),
11355 0);
11356 }
11357 case Intrinsic::ppc_test_data_class: {
11358 EVT OpVT = Op.getOperand(i: 1).getValueType();
11359 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11360 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11361 : PPC::XSTSTDCSP);
11362 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11363 // The XSTSTDC* instructions test if a floating-point value matches any of
11364 // the data classes specified in the mask, setting CR field bits
11365 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11366 // convert it to an integer result (1 if match, 0 if no match).
11367 //
11368 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11369 // intrinsic provides (value, mask) as Op.getOperand(1) and
11370 // Op.getOperand(2).
11371 SDValue TestDataClass =
11372 SDValue(DAG.getMachineNode(Opcode: CmprOpc, dl, VT: MVT::i32,
11373 Ops: {Op.getOperand(i: 2), Op.getOperand(i: 1)}),
11374 0);
11375 if (Subtarget.isISA3_1()) {
11376 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11377 // This is more efficient than the SELECT_CC approach used in earlier
11378 // ISAs.
11379 SDValue SubRegIdx = DAG.getTargetConstant(Val: PPC::sub_eq, DL: dl, VT: MVT::i32);
11380 SDValue CRBit =
11381 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11382 Op1: TestDataClass, Op2: SubRegIdx),
11383 0);
11384
11385 return DAG.getNode(Opcode: PPCISD::SETBC, DL: dl, VT: MVT::i32, Operand: CRBit);
11386 }
11387
11388 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11389 return SDValue(
11390 DAG.getMachineNode(Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11391 Ops: {TestDataClass, DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32),
11392 DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
11393 DAG.getTargetConstant(Val: PPC::PRED_EQ, DL: dl, VT: MVT::i32)}),
11394 0);
11395 }
11396 case Intrinsic::ppc_fnmsub: {
11397 EVT VT = Op.getOperand(i: 1).getValueType();
11398 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11399 return DAG.getNode(
11400 Opcode: ISD::FNEG, DL: dl, VT,
11401 Operand: DAG.getNode(Opcode: ISD::FMA, DL: dl, VT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
11402 N3: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op.getOperand(i: 3))));
11403 return DAG.getNode(Opcode: PPCISD::FNMSUB, DL: dl, VT, N1: Op.getOperand(i: 1),
11404 N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
11405 }
11406 case Intrinsic::ppc_convert_f128_to_ppcf128:
11407 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11408 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11409 ? RTLIB::CONVERT_PPCF128_F128
11410 : RTLIB::CONVERT_F128_PPCF128;
11411 MakeLibCallOptions CallOptions;
11412 std::pair<SDValue, SDValue> Result =
11413 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op.getOperand(i: 1), CallOptions,
11414 dl, Chain: SDValue());
11415 return Result.first;
11416 }
11417 case Intrinsic::ppc_maxfe:
11418 case Intrinsic::ppc_maxfl:
11419 case Intrinsic::ppc_maxfs:
11420 case Intrinsic::ppc_minfe:
11421 case Intrinsic::ppc_minfl:
11422 case Intrinsic::ppc_minfs: {
11423 EVT VT = Op.getValueType();
11424 assert(
11425 all_of(Op->ops().drop_front(4),
11426 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11427 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11428 (void)VT;
11429 ISD::CondCode CC = ISD::SETGT;
11430 if (IntrinsicID == Intrinsic::ppc_minfe ||
11431 IntrinsicID == Intrinsic::ppc_minfl ||
11432 IntrinsicID == Intrinsic::ppc_minfs)
11433 CC = ISD::SETLT;
11434 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11435 SDValue Res = Op.getOperand(i: I);
11436 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11437 Res =
11438 DAG.getSelectCC(DL: dl, LHS: Res, RHS: Op.getOperand(i: I), True: Res, False: Op.getOperand(i: I), Cond: CC);
11439 }
11440 return Res;
11441 }
11442 }
11443
11444 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11445 // opcode number of the comparison.
11446 int CompareOpc;
11447 bool isDot;
11448 if (!getVectorCompareInfo(Intrin: Op, CompareOpc, isDot, Subtarget))
11449 return SDValue(); // Don't custom lower most intrinsics.
11450
11451 // If this is a non-dot comparison, make the VCMP node and we are done.
11452 if (!isDot) {
11453 SDValue Tmp = DAG.getNode(Opcode: PPCISD::VCMP, DL: dl, VT: Op.getOperand(i: 2).getValueType(),
11454 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
11455 N3: DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32));
11456 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Tmp);
11457 }
11458
11459 // Create the PPCISD altivec 'dot' comparison node.
11460 SDValue Ops[] = {
11461 Op.getOperand(i: 2), // LHS
11462 Op.getOperand(i: 3), // RHS
11463 DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
11464 };
11465 EVT VTs[] = { Op.getOperand(i: 2).getValueType(), MVT::Glue };
11466 SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
11467
11468 // Unpack the result based on how the target uses it.
11469 unsigned BitNo; // Bit # of CR6.
11470 bool InvertBit; // Invert result?
11471 unsigned Bitx;
11472 unsigned SetOp;
11473 switch (Op.getConstantOperandVal(i: 1)) {
11474 default: // Can't happen, don't crash on invalid number though.
11475 case 0: // Return the value of the EQ bit of CR6.
11476 BitNo = 0;
11477 InvertBit = false;
11478 Bitx = PPC::sub_eq;
11479 SetOp = PPCISD::SETBC;
11480 break;
11481 case 1: // Return the inverted value of the EQ bit of CR6.
11482 BitNo = 0;
11483 InvertBit = true;
11484 Bitx = PPC::sub_eq;
11485 SetOp = PPCISD::SETBCR;
11486 break;
11487 case 2: // Return the value of the LT bit of CR6.
11488 BitNo = 2;
11489 InvertBit = false;
11490 Bitx = PPC::sub_lt;
11491 SetOp = PPCISD::SETBC;
11492 break;
11493 case 3: // Return the inverted value of the LT bit of CR6.
11494 BitNo = 2;
11495 InvertBit = true;
11496 Bitx = PPC::sub_lt;
11497 SetOp = PPCISD::SETBCR;
11498 break;
11499 }
11500
11501 SDValue GlueOp = CompNode.getValue(R: 1);
11502 if (Subtarget.isISA3_1()) {
11503 SDValue SubRegIdx = DAG.getTargetConstant(Val: Bitx, DL: dl, VT: MVT::i32);
11504 SDValue CR6Reg = DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32);
11505 SDValue CRBit =
11506 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11507 Op1: CR6Reg, Op2: SubRegIdx, Op3: GlueOp),
11508 0);
11509 return DAG.getNode(Opcode: SetOp, DL: dl, VT: MVT::i32, Operand: CRBit);
11510 }
11511
11512 // Now that we have the comparison, emit a copy from the CR to a GPR.
11513 // This is flagged to the above dot comparison.
11514 SDValue Flags = DAG.getNode(Opcode: PPCISD::MFOCRF, DL: dl, VT: MVT::i32,
11515 N1: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32), N2: GlueOp);
11516
11517 // Shift the bit into the low position.
11518 Flags = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: Flags,
11519 N2: DAG.getConstant(Val: 8 - (3 - BitNo), DL: dl, VT: MVT::i32));
11520 // Isolate the bit.
11521 Flags = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Flags,
11522 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11523
11524 // If we are supposed to, toggle the bit.
11525 if (InvertBit)
11526 Flags = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: Flags,
11527 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11528 return Flags;
11529}
11530
11531SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11532 SelectionDAG &DAG) const {
11533 unsigned IntrinsicID = Op.getConstantOperandVal(i: 1);
11534 SDLoc dl(Op);
11535 switch (IntrinsicID) {
11536 case Intrinsic::ppc_amo_lwat_csne:
11537 case Intrinsic::ppc_amo_ldat_csne:
11538 SDValue Chain = Op.getOperand(i: 0);
11539 SDValue Ptr = Op.getOperand(i: 2);
11540 SDValue CmpVal = Op.getOperand(i: 3);
11541 SDValue NewVal = Op.getOperand(i: 4);
11542
11543 EVT VT = IntrinsicID == Intrinsic::ppc_amo_ldat_csne ? MVT::i64 : MVT::i32;
11544 Type *Ty = VT.getTypeForEVT(Context&: *DAG.getContext());
11545 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext());
11546
11547 TargetLowering::ArgListTy Args;
11548 Args.emplace_back(args: DAG.getUNDEF(VT: MVT::i64),
11549 args: Type::getInt64Ty(C&: *DAG.getContext()));
11550 Args.emplace_back(args&: CmpVal, args&: Ty);
11551 Args.emplace_back(args&: NewVal, args&: Ty);
11552 Args.emplace_back(args&: Ptr, args&: IntPtrTy);
11553
11554 // Lower to dummy call to use ABI for consecutive register allocation.
11555 // Places return value, compare value, and new value in X3/X4/X5 as required
11556 // by lwat/ldat FC=16, avoiding a new register class for 3 adjacent
11557 // registers.
11558 const char *SymName = IntrinsicID == Intrinsic::ppc_amo_ldat_csne
11559 ? "__ldat_csne_pseudo"
11560 : "__lwat_csne_pseudo";
11561 SDValue Callee =
11562 DAG.getExternalSymbol(Sym: SymName, VT: getPointerTy(DL: DAG.getDataLayout()));
11563
11564 TargetLowering::CallLoweringInfo CLI(DAG);
11565 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee,
11566 ArgsList: std::move(Args));
11567
11568 auto Result = LowerCallTo(CLI);
11569 return DAG.getMergeValues(Ops: {Result.first, Result.second}, dl);
11570 }
11571 return SDValue();
11572}
11573
11574SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11577 // the beginning of the argument list.
11578 int ArgStart = isa<ConstantSDNode>(Val: Op.getOperand(i: 0)) ? 0 : 1;
11579 SDLoc DL(Op);
11580 switch (Op.getConstantOperandVal(i: ArgStart)) {
11581 case Intrinsic::ppc_cfence: {
11582 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11583 SDValue Val = Op.getOperand(i: ArgStart + 1);
11584 EVT Ty = Val.getValueType();
11585 if (Ty == MVT::i128) {
11586 // FIXME: Testing one of two paired registers is sufficient to guarantee
11587 // ordering?
11588 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: Val);
11589 }
11590 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11591 return SDValue(
11592 DAG.getMachineNode(
11593 Opcode, dl: DL, VT: MVT::Other,
11594 Op1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getScalarIntVT(), Operand: Val),
11595 Op2: Op.getOperand(i: 0)),
11596 0);
11597 }
11598 case Intrinsic::ppc_disassemble_dmr: {
11599 return DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: Op.getOperand(i: ArgStart + 2),
11600 Ptr: Op.getOperand(i: ArgStart + 1), PtrInfo: MachinePointerInfo());
11601 }
11602 case Intrinsic::ppc_amo_stwat:
11603 case Intrinsic::ppc_amo_stdat: {
11604 SDLoc dl(Op);
11605 SDValue Chain = Op.getOperand(i: 0);
11606 SDValue Ptr = Op.getOperand(i: ArgStart + 1);
11607 SDValue Val = Op.getOperand(i: ArgStart + 2);
11608 SDValue FC = Op.getOperand(i: ArgStart + 3);
11609
11610 return DAG.getNode(Opcode: PPCISD::STAT, DL: dl, VT: MVT::Other, N1: Chain, N2: Val, N3: Ptr, N4: FC);
11611 }
11612 default:
11613 break;
11614 }
11615 return SDValue();
11616}
11617
11618// Lower scalar BSWAP64 to xxbrd.
11619SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11620 SDLoc dl(Op);
11621 if (!Subtarget.isPPC64())
11622 return Op;
11623 // MTVSRDD
11624 Op = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: MVT::v2i64, N1: Op.getOperand(i: 0),
11625 N2: Op.getOperand(i: 0));
11626 // XXBRD
11627 Op = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Op);
11628 // MFVSRD
11629 int VectorIndex = 0;
11630 if (Subtarget.isLittleEndian())
11631 VectorIndex = 1;
11632 Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Op,
11633 N2: DAG.getTargetConstant(Val: VectorIndex, DL: dl, VT: MVT::i32));
11634 return Op;
11635}
11636
11637// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11638// compared to a value that is atomically loaded (atomic loads zero-extend).
11639SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11640 SelectionDAG &DAG) const {
11641 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11642 "Expecting an atomic compare-and-swap here.");
11643 SDLoc dl(Op);
11644 auto *AtomicNode = cast<AtomicSDNode>(Val: Op.getNode());
11645 EVT MemVT = AtomicNode->getMemoryVT();
11646 if (MemVT.getSizeInBits() >= 32)
11647 return Op;
11648
11649 SDValue CmpOp = Op.getOperand(i: 2);
11650 // If this is already correctly zero-extended, leave it alone.
11651 auto HighBits = APInt::getHighBitsSet(numBits: 32, hiBitsSet: 32 - MemVT.getSizeInBits());
11652 if (DAG.MaskedValueIsZero(Op: CmpOp, Mask: HighBits))
11653 return Op;
11654
11655 // Clear the high bits of the compare operand.
11656 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11657 SDValue NewCmpOp =
11658 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: CmpOp,
11659 N2: DAG.getConstant(Val: MaskVal, DL: dl, VT: MVT::i32));
11660
11661 // Replace the existing compare operand with the properly zero-extended one.
11662 SmallVector<SDValue, 4> Ops;
11663 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11664 Ops.push_back(Elt: AtomicNode->getOperand(Num: i));
11665 Ops[2] = NewCmpOp;
11666 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11667 SDVTList Tys = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
11668 auto NodeTy =
11669 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11670 return DAG.getMemIntrinsicNode(Opcode: NodeTy, dl, VTList: Tys, Ops, MemVT, MMO);
11671}
11672
11673SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11674 SelectionDAG &DAG) const {
11675 AtomicSDNode *N = cast<AtomicSDNode>(Val: Op.getNode());
11676 EVT MemVT = N->getMemoryVT();
11677 assert(MemVT.getSimpleVT() == MVT::i128 &&
11678 "Expect quadword atomic operations");
11679 SDLoc dl(N);
11680 unsigned Opc = N->getOpcode();
11681 switch (Opc) {
11682 case ISD::ATOMIC_LOAD: {
11683 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11684 // lowered to ppc instructions by pattern matching instruction selector.
11685 SDVTList Tys = DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::Other);
11686 SmallVector<SDValue, 4> Ops{
11687 N->getOperand(Num: 0),
11688 DAG.getConstant(Val: Intrinsic::ppc_atomic_load_i128, DL: dl, VT: MVT::i32)};
11689 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11690 Ops.push_back(Elt: N->getOperand(Num: I));
11691 SDValue LoadedVal = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys,
11692 Ops, MemVT, MMO: N->getMemOperand());
11693 SDValue ValLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal);
11694 SDValue ValHi =
11695 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal.getValue(R: 1));
11696 ValHi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ValHi,
11697 N2: DAG.getConstant(Val: 64, DL: dl, VT: MVT::i32));
11698 SDValue Val =
11699 DAG.getNode(Opcode: ISD::OR, DL: dl, ResultTys: {MVT::i128, MVT::Other}, Ops: {ValLo, ValHi});
11700 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, ResultTys: {MVT::i128, MVT::Other},
11701 Ops: {Val, LoadedVal.getValue(R: 2)});
11702 }
11703 case ISD::ATOMIC_STORE: {
11704 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11705 // lowered to ppc instructions by pattern matching instruction selector.
11706 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11707 SmallVector<SDValue, 4> Ops{
11708 N->getOperand(Num: 0),
11709 DAG.getConstant(Val: Intrinsic::ppc_atomic_store_i128, DL: dl, VT: MVT::i32)};
11710 SDValue Val = N->getOperand(Num: 1);
11711 SDValue ValLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: Val);
11712 SDValue ValHi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: Val,
11713 N2: DAG.getConstant(Val: 64, DL: dl, VT: MVT::i32));
11714 ValHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: ValHi);
11715 Ops.push_back(Elt: ValLo);
11716 Ops.push_back(Elt: ValHi);
11717 Ops.push_back(Elt: N->getOperand(Num: 2));
11718 return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops, MemVT,
11719 MMO: N->getMemOperand());
11720 }
11721 default:
11722 llvm_unreachable("Unexpected atomic opcode");
11723 }
11724}
11725
11726static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11727 SelectionDAG &DAG,
11728 const PPCSubtarget &Subtarget) {
11729 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11730
11731 enum DataClassMask {
11732 DC_NAN = 1 << 6,
11733 DC_NEG_INF = 1 << 4,
11734 DC_POS_INF = 1 << 5,
11735 DC_NEG_ZERO = 1 << 2,
11736 DC_POS_ZERO = 1 << 3,
11737 DC_NEG_SUBNORM = 1,
11738 DC_POS_SUBNORM = 1 << 1,
11739 };
11740
11741 EVT VT = Op.getValueType();
11742
11743 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11744 : VT == MVT::f64 ? PPC::XSTSTDCDP
11745 : PPC::XSTSTDCSP;
11746
11747 if (Mask == fcAllFlags)
11748 return DAG.getBoolConstant(V: true, DL: Dl, VT: MVT::i1, OpVT: VT);
11749 if (Mask == 0)
11750 return DAG.getBoolConstant(V: false, DL: Dl, VT: MVT::i1, OpVT: VT);
11751
11752 // When it's cheaper or necessary to test reverse flags.
11753 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11754 SDValue Rev = getDataClassTest(Op, Mask: ~Mask, Dl, DAG, Subtarget);
11755 return DAG.getNOT(DL: Dl, Val: Rev, VT: MVT::i1);
11756 }
11757
11758 // Power doesn't support testing whether a value is 'normal'. Test the rest
11759 // first, and test if it's 'not not-normal' with expected sign.
11760 if (Mask & fcNormal) {
11761 SDValue Rev(DAG.getMachineNode(
11762 Opcode: TestOp, dl: Dl, VT: MVT::i32,
11763 Op1: DAG.getTargetConstant(Val: DC_NAN | DC_NEG_INF | DC_POS_INF |
11764 DC_NEG_ZERO | DC_POS_ZERO |
11765 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11766 DL: Dl, VT: MVT::i32),
11767 Op2: Op),
11768 0);
11769 // Sign are stored in CR bit 0, result are in CR bit 2.
11770 SDValue Sign(
11771 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11772 Op2: DAG.getTargetConstant(Val: PPC::sub_lt, DL: Dl, VT: MVT::i32)),
11773 0);
11774 SDValue Normal(DAG.getNOT(
11775 DL: Dl,
11776 Val: SDValue(DAG.getMachineNode(
11777 Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11778 Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11779 0),
11780 VT: MVT::i1));
11781 if (Mask & fcPosNormal)
11782 Sign = DAG.getNOT(DL: Dl, Val: Sign, VT: MVT::i1);
11783 SDValue Result = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: Sign, N2: Normal);
11784 if (Mask == fcPosNormal || Mask == fcNegNormal)
11785 return Result;
11786
11787 return DAG.getNode(
11788 Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11789 N1: getDataClassTest(Op, Mask: Mask & ~fcNormal, Dl, DAG, Subtarget), N2: Result);
11790 }
11791
11792 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11793 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11794 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11795 bool IsQuiet = Mask & fcQNan;
11796 SDValue NanCheck = getDataClassTest(Op, Mask: fcNan, Dl, DAG, Subtarget);
11797
11798 // Quietness is determined by the first bit in fraction field.
11799 uint64_t QuietMask = 0;
11800 SDValue HighWord;
11801 if (VT == MVT::f128) {
11802 HighWord = DAG.getNode(
11803 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: DAG.getBitcast(VT: MVT::v4i32, V: Op),
11804 N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? 3 : 0, DL: Dl));
11805 QuietMask = 0x8000;
11806 } else if (VT == MVT::f64) {
11807 if (Subtarget.isPPC64()) {
11808 HighWord = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::i32,
11809 N1: DAG.getBitcast(VT: MVT::i64, V: Op),
11810 N2: DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32));
11811 } else {
11812 SDValue Vec = DAG.getBitcast(
11813 VT: MVT::v4i32, V: DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: Dl, VT: MVT::v2f64, Operand: Op));
11814 HighWord = DAG.getNode(
11815 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: Vec,
11816 N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? 1 : 0, DL: Dl));
11817 }
11818 QuietMask = 0x80000;
11819 } else if (VT == MVT::f32) {
11820 HighWord = DAG.getBitcast(VT: MVT::i32, V: Op);
11821 QuietMask = 0x400000;
11822 }
11823 SDValue NanRes = DAG.getSetCC(
11824 DL: Dl, VT: MVT::i1,
11825 LHS: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: HighWord,
11826 N2: DAG.getConstant(Val: QuietMask, DL: Dl, VT: MVT::i32)),
11827 RHS: DAG.getConstant(Val: 0, DL: Dl, VT: MVT::i32), Cond: IsQuiet ? ISD::SETNE : ISD::SETEQ);
11828 NanRes = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: NanCheck, N2: NanRes);
11829 if (Mask == fcQNan || Mask == fcSNan)
11830 return NanRes;
11831
11832 return DAG.getNode(Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11833 N1: getDataClassTest(Op, Mask: Mask & ~fcNan, Dl, DAG, Subtarget),
11834 N2: NanRes);
11835 }
11836
11837 unsigned NativeMask = 0;
11838 if ((Mask & fcNan) == fcNan)
11839 NativeMask |= DC_NAN;
11840 if (Mask & fcNegInf)
11841 NativeMask |= DC_NEG_INF;
11842 if (Mask & fcPosInf)
11843 NativeMask |= DC_POS_INF;
11844 if (Mask & fcNegZero)
11845 NativeMask |= DC_NEG_ZERO;
11846 if (Mask & fcPosZero)
11847 NativeMask |= DC_POS_ZERO;
11848 if (Mask & fcNegSubnormal)
11849 NativeMask |= DC_NEG_SUBNORM;
11850 if (Mask & fcPosSubnormal)
11851 NativeMask |= DC_POS_SUBNORM;
11852 return SDValue(
11853 DAG.getMachineNode(
11854 Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1,
11855 Op1: SDValue(DAG.getMachineNode(
11856 Opcode: TestOp, dl: Dl, VT: MVT::i32,
11857 Op1: DAG.getTargetConstant(Val: NativeMask, DL: Dl, VT: MVT::i32), Op2: Op),
11858 0),
11859 Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11860 0);
11861}
11862
11863SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11864 SelectionDAG &DAG) const {
11865 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11866 SDValue LHS = Op.getOperand(i: 0);
11867 uint64_t RHSC = Op.getConstantOperandVal(i: 1);
11868 SDLoc Dl(Op);
11869 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11870 if (LHS.getValueType() == MVT::ppcf128) {
11871 // The higher part determines the value class.
11872 LHS = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::f64, N1: LHS,
11873 N2: DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32));
11874 }
11875
11876 return getDataClassTest(Op: LHS, Mask: Category, Dl, DAG, Subtarget);
11877}
11878
11879// Adjust the length value for a load/store with length to account for the
11880// instructions requiring a left justified length, and for non-byte element
11881// types requiring scaling by element size.
11882static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11883 SelectionDAG &DAG) {
11884 SDLoc dl(Val);
11885 EVT VT = Val->getValueType(ResNo: 0);
11886 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11887 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Val: Bits / 8);
11888 SDValue SHLAmt = DAG.getConstant(Val: LeftAdj + TypeAdj, DL: dl, VT);
11889 return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Val, N2: SHLAmt);
11890}
11891
11892SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11893 auto VPLD = cast<VPLoadSDNode>(Val&: Op);
11894 bool Future = Subtarget.isISAFuture();
11895 SDLoc dl(Op);
11896 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11897 "Mask predication not supported");
11898 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11899 SDValue Len = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: PtrVT, Operand: VPLD->getOperand(Num: 4));
11900 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11901 unsigned EltBits = Op->getValueType(ResNo: 0).getScalarType().getSizeInBits();
11902 Len = AdjustLength(Val: Len, Bits: EltBits, Left: !Future, DAG);
11903 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32),
11904 VPLD->getOperand(Num: 1), Len};
11905 SDVTList Tys = DAG.getVTList(VT1: Op->getValueType(ResNo: 0), VT2: MVT::Other);
11906 SDValue VPL =
11907 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys, Ops,
11908 MemVT: VPLD->getMemoryVT(), MMO: VPLD->getMemOperand());
11909 return VPL;
11910}
11911
11912SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11913 auto VPST = cast<VPStoreSDNode>(Val&: Op);
11914 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11915 "Mask predication not supported");
11916 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11917 SDLoc dl(Op);
11918 SDValue Len = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: PtrVT, Operand: VPST->getOperand(Num: 5));
11919 unsigned EltBits =
11920 Op->getOperand(Num: 1).getValueType().getScalarType().getSizeInBits();
11921 bool Future = Subtarget.isISAFuture();
11922 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11923 Len = AdjustLength(Val: Len, Bits: EltBits, Left: !Future, DAG);
11924 SDValue Ops[] = {
11925 VPST->getChain(), DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32),
11926 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: VPST->getOperand(Num: 1)),
11927 VPST->getOperand(Num: 2), Len};
11928 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11929 SDValue VPS =
11930 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
11931 MemVT: VPST->getMemoryVT(), MMO: VPST->getMemOperand());
11932 return VPS;
11933}
11934
11935SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11936 SelectionDAG &DAG) const {
11937 SDLoc dl(Op);
11938
11939 MachineFunction &MF = DAG.getMachineFunction();
11940 SDValue Op0 = Op.getOperand(i: 0);
11941 EVT ValVT = Op0.getValueType();
11942 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11943 if (isa<ConstantSDNode>(Val: Op0) && EltSize <= 32) {
11944 int64_t IntVal = Op.getConstantOperandVal(i: 0);
11945 if (IntVal >= -16 && IntVal <= 15)
11946 return getCanonicalConstSplat(Val: IntVal, SplatSize: EltSize / 8, VT: Op.getValueType(), DAG,
11947 dl);
11948 }
11949
11950 ReuseLoadInfo RLI;
11951 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11952 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11953 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11954 canReuseLoadAddress(Op: Op0, MemVT: MVT::i32, RLI, DAG, ET: ISD::NON_EXTLOAD)) {
11955
11956 MachineMemOperand *MMO =
11957 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
11958 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
11959 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11960 SDValue Bits = DAG.getMemIntrinsicNode(
11961 Opcode: PPCISD::LD_SPLAT, dl, VTList: DAG.getVTList(VT1: MVT::v4i32, VT2: MVT::Other), Ops,
11962 MemVT: MVT::i32, MMO);
11963 if (RLI.ResChain)
11964 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
11965 return Bits.getValue(R: 0);
11966 }
11967
11968 // Create a stack slot that is 16-byte aligned.
11969 MachineFrameInfo &MFI = MF.getFrameInfo();
11970 int FrameIdx = MFI.CreateStackObject(Size: 16, Alignment: Align(16), isSpillSlot: false);
11971 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11972 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
11973
11974 SDValue Val = Op0;
11975 // P10 hardware store forwarding requires that a single store contains all
11976 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11977 // to avoid load hit store on P10 when running binaries compiled for older
11978 // processors by generating two mergeable scalar stores to forward with the
11979 // vector load.
11980 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11981 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11982 ValVT.getSizeInBits() <= 64) {
11983 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: Val);
11984 EVT ShiftAmountTy = getShiftAmountTy(LHSTy: MVT::i64, DL: DAG.getDataLayout());
11985 SDValue ShiftBy = DAG.getConstant(
11986 Val: 64 - Op.getValueType().getScalarSizeInBits(), DL: dl, VT: ShiftAmountTy);
11987 Val = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: Val, N2: ShiftBy);
11988 SDValue Plus8 =
11989 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FIdx, N2: DAG.getConstant(Val: 8, DL: dl, VT: PtrVT));
11990 SDValue Store2 =
11991 DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: Plus8, PtrInfo: MachinePointerInfo());
11992 SDValue Store = DAG.getStore(Chain: Store2, dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo());
11993 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx,
11994 PtrInfo: MachinePointerInfo());
11995 }
11996
11997 // Store the input value into Value#0 of the stack slot.
11998 SDValue Store =
11999 DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo());
12000 // Load it out.
12001 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx, PtrInfo: MachinePointerInfo());
12002}
12003
12004SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12005 SelectionDAG &DAG) const {
12006 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12007 "Should only be called for ISD::INSERT_VECTOR_ELT");
12008
12009 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
12010
12011 EVT VT = Op.getValueType();
12012 SDLoc dl(Op);
12013 SDValue V1 = Op.getOperand(i: 0);
12014 SDValue V2 = Op.getOperand(i: 1);
12015
12016 if (VT == MVT::v2f64 && C)
12017 return Op;
12018
12019 if (Subtarget.hasP9Vector()) {
12020 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12021 // because on P10, it allows this specific insert_vector_elt load pattern to
12022 // utilize the refactored load and store infrastructure in order to exploit
12023 // prefixed loads.
12024 // On targets with inexpensive direct moves (Power9 and up), a
12025 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12026 // load since a single precision load will involve conversion to double
12027 // precision on the load followed by another conversion to single precision.
12028 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12029 (isa<LoadSDNode>(Val: V2))) {
12030 SDValue BitcastVector = DAG.getBitcast(VT: MVT::v4i32, V: V1);
12031 SDValue BitcastLoad = DAG.getBitcast(VT: MVT::i32, V: V2);
12032 SDValue InsVecElt =
12033 DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, VT: MVT::v4i32, N1: BitcastVector,
12034 N2: BitcastLoad, N3: Op.getOperand(i: 2));
12035 return DAG.getBitcast(VT: MVT::v4f32, V: InsVecElt);
12036 }
12037 }
12038
12039 if (Subtarget.isISA3_1()) {
12040 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12041 return SDValue();
12042 // On P10, we have legal lowering for constant and variable indices for
12043 // all vectors.
12044 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12045 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12046 return Op;
12047 }
12048
12049 // Before P10, we have legal lowering for constant indices but not for
12050 // variable ones.
12051 if (!C)
12052 return SDValue();
12053
12054 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12055 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12056 SDValue Mtvsrz = DAG.getNode(Opcode: PPCISD::MTVSRZ, DL: dl, VT, Operand: V2);
12057 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12058 unsigned InsertAtElement = C->getZExtValue();
12059 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12060 if (Subtarget.isLittleEndian()) {
12061 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12062 }
12063 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT, N1: V1, N2: Mtvsrz,
12064 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
12065 }
12066 return Op;
12067}
12068
12069SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12070 SelectionDAG &DAG) const {
12071 SDLoc dl(Op);
12072 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12073 SDValue LoadChain = LN->getChain();
12074 SDValue BasePtr = LN->getBasePtr();
12075 EVT VT = Op.getValueType();
12076 bool IsV1024i1 = VT == MVT::v1024i1;
12077 bool IsV2048i1 = VT == MVT::v2048i1;
12078
12079 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12080 // Dense Math dmr pair registers, respectively.
12081 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12082 (void)IsV2048i1;
12083 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12084 "Dense Math support required.");
12085 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12086
12087 SmallVector<SDValue, 8> Loads;
12088 SmallVector<SDValue, 8> LoadChains;
12089
12090 SDValue IntrinID = DAG.getConstant(Val: Intrinsic::ppc_vsx_lxvp, DL: dl, VT: MVT::i32);
12091 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12092 MachineMemOperand *MMO = LN->getMemOperand();
12093 unsigned NumVecs = VT.getSizeInBits() / 256;
12094 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12095 MachineMemOperand *NewMMO =
12096 DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * 32, Size: 32);
12097 if (Idx > 0) {
12098 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12099 N2: DAG.getConstant(Val: 32, DL: dl, VT: BasePtr.getValueType()));
12100 LoadOps[2] = BasePtr;
12101 }
12102 SDValue Ld = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
12103 VTList: DAG.getVTList(VT1: MVT::v256i1, VT2: MVT::Other),
12104 Ops: LoadOps, MemVT: MVT::v256i1, MMO: NewMMO);
12105 LoadChains.push_back(Elt: Ld.getValue(R: 1));
12106 Loads.push_back(Elt: Ld);
12107 }
12108
12109 if (Subtarget.isLittleEndian()) {
12110 std::reverse(first: Loads.begin(), last: Loads.end());
12111 std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12112 }
12113
12114 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12115 SDValue Value = DMFInsert1024(Pairs: Loads, dl, DAG);
12116
12117 if (IsV1024i1) {
12118 return DAG.getMergeValues(Ops: {Value, TF}, dl);
12119 }
12120
12121 // Handle Loads for V2048i1 which represents a dmr pair.
12122 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12123 SDValue Dmr1Value = DMFInsert1024(Pairs: MoreLoads, dl, DAG);
12124
12125 SDValue Dmr0Sub = DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32);
12126 SDValue Dmr1Sub = DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32);
12127
12128 SDValue DmrPRC = DAG.getTargetConstant(Val: PPC::DMRpRCRegClassID, DL: dl, VT: MVT::i32);
12129 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12130
12131 SDValue DmrPValue = SDValue(
12132 DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v2048i1, Ops: DmrPOps), 0);
12133
12134 return DAG.getMergeValues(Ops: {DmrPValue, TF}, dl);
12135}
12136
12137SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12138 const SDLoc &dl,
12139 SelectionDAG &DAG) const {
12140 SDValue Lo =
12141 DAG.getNode(Opcode: PPCISD::INST512, DL: dl, VT: MVT::v512i1, N1: Pairs[0], N2: Pairs[1]);
12142 SDValue LoSub = DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32);
12143 SDValue Hi =
12144 DAG.getNode(Opcode: PPCISD::INST512HI, DL: dl, VT: MVT::v512i1, N1: Pairs[2], N2: Pairs[3]);
12145 SDValue HiSub = DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32);
12146 SDValue RC = DAG.getTargetConstant(Val: PPC::DMRRCRegClassID, DL: dl, VT: MVT::i32);
12147
12148 return SDValue(DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1,
12149 Ops: {RC, Lo, LoSub, Hi, HiSub}),
12150 0);
12151}
12152
12153SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12154 SelectionDAG &DAG) const {
12155 SDLoc dl(Op);
12156 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12157 SDValue LoadChain = LN->getChain();
12158 SDValue BasePtr = LN->getBasePtr();
12159 EVT VT = Op.getValueType();
12160
12161 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12162 return LowerDMFVectorLoad(Op, DAG);
12163
12164 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12165 return Op;
12166
12167 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12168 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12169 "Type unsupported without MMA");
12170 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12171 "Type unsupported without paired vector support");
12172
12173 // For v256i1 on ISA Future, let the load go through to instruction selection
12174 // where it will be matched to lxvp/plxvp by the instruction patterns.
12175 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12176 return Op;
12177
12178 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12179 // value in 2 or 4 vsx registers.
12180 Align Alignment = LN->getAlign();
12181 SmallVector<SDValue, 4> Loads;
12182 SmallVector<SDValue, 4> LoadChains;
12183 unsigned NumVecs = VT.getSizeInBits() / 128;
12184 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12185 SDValue Load =
12186 DAG.getLoad(VT: MVT::v16i8, dl, Chain: LoadChain, Ptr: BasePtr,
12187 PtrInfo: LN->getPointerInfo().getWithOffset(O: Idx * 16),
12188 Alignment: commonAlignment(A: Alignment, Offset: Idx * 16),
12189 MMOFlags: LN->getMemOperand()->getFlags(), AAInfo: LN->getAAInfo());
12190 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12191 N2: DAG.getConstant(Val: 16, DL: dl, VT: BasePtr.getValueType()));
12192 Loads.push_back(Elt: Load);
12193 LoadChains.push_back(Elt: Load.getValue(R: 1));
12194 }
12195 if (Subtarget.isLittleEndian()) {
12196 std::reverse(first: Loads.begin(), last: Loads.end());
12197 std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12198 }
12199 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12200 SDValue Value =
12201 DAG.getNode(Opcode: VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12202 DL: dl, VT, Ops: Loads);
12203 SDValue RetOps[] = {Value, TF};
12204 return DAG.getMergeValues(Ops: RetOps, dl);
12205}
12206
12207SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12208 SelectionDAG &DAG) const {
12209
12210 SDLoc dl(Op);
12211 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12212 SDValue StoreChain = SN->getChain();
12213 SDValue BasePtr = SN->getBasePtr();
12214 SmallVector<SDValue, 8> Values;
12215 SmallVector<SDValue, 8> Stores;
12216 EVT VT = SN->getValue().getValueType();
12217 bool IsV1024i1 = VT == MVT::v1024i1;
12218 bool IsV2048i1 = VT == MVT::v2048i1;
12219
12220 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12221 // Dense Math dmr pair registers, respectively.
12222 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12223 (void)IsV2048i1;
12224 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12225 "Dense Math support required.");
12226 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12227
12228 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12229 if (IsV1024i1) {
12230 SDValue Lo(DAG.getMachineNode(
12231 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12232 Op1: Op.getOperand(i: 1),
12233 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12234 0);
12235 SDValue Hi(DAG.getMachineNode(
12236 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12237 Op1: Op.getOperand(i: 1),
12238 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12239 0);
12240 MachineSDNode *ExtNode =
12241 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Lo);
12242 Values.push_back(Elt: SDValue(ExtNode, 0));
12243 Values.push_back(Elt: SDValue(ExtNode, 1));
12244 ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Hi);
12245 Values.push_back(Elt: SDValue(ExtNode, 0));
12246 Values.push_back(Elt: SDValue(ExtNode, 1));
12247 } else {
12248 // This corresponds to v2048i1 which represents a dmr pair.
12249 SDValue Dmr0(
12250 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12251 Op1: Op.getOperand(i: 1),
12252 Op2: DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32)),
12253 0);
12254
12255 SDValue Dmr1(
12256 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12257 Op1: Op.getOperand(i: 1),
12258 Op2: DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32)),
12259 0);
12260
12261 SDValue Dmr0Lo(DAG.getMachineNode(
12262 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12263 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12264 0);
12265
12266 SDValue Dmr0Hi(DAG.getMachineNode(
12267 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12268 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12269 0);
12270
12271 SDValue Dmr1Lo(DAG.getMachineNode(
12272 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12273 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12274 0);
12275
12276 SDValue Dmr1Hi(DAG.getMachineNode(
12277 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12278 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12279 0);
12280
12281 MachineSDNode *ExtNode =
12282 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr0Lo);
12283 Values.push_back(Elt: SDValue(ExtNode, 0));
12284 Values.push_back(Elt: SDValue(ExtNode, 1));
12285 ExtNode =
12286 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr0Hi);
12287 Values.push_back(Elt: SDValue(ExtNode, 0));
12288 Values.push_back(Elt: SDValue(ExtNode, 1));
12289 ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr1Lo);
12290 Values.push_back(Elt: SDValue(ExtNode, 0));
12291 Values.push_back(Elt: SDValue(ExtNode, 1));
12292 ExtNode =
12293 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr1Hi);
12294 Values.push_back(Elt: SDValue(ExtNode, 0));
12295 Values.push_back(Elt: SDValue(ExtNode, 1));
12296 }
12297
12298 if (Subtarget.isLittleEndian())
12299 std::reverse(first: Values.begin(), last: Values.end());
12300
12301 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
12302 SmallVector<SDValue, 4> Ops{
12303 StoreChain, DAG.getConstant(Val: Intrinsic::ppc_vsx_stxvp, DL: dl, VT: MVT::i32),
12304 Values[0], BasePtr};
12305 MachineMemOperand *MMO = SN->getMemOperand();
12306 unsigned NumVecs = VT.getSizeInBits() / 256;
12307 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12308 MachineMemOperand *NewMMO =
12309 DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * 32, Size: 32);
12310 if (Idx > 0) {
12311 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12312 N2: DAG.getConstant(Val: 32, DL: dl, VT: BasePtr.getValueType()));
12313 Ops[3] = BasePtr;
12314 }
12315 Ops[2] = Values[Idx];
12316 SDValue St = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
12317 MemVT: MVT::v256i1, MMO: NewMMO);
12318 Stores.push_back(Elt: St);
12319 }
12320
12321 SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12322 return TF;
12323}
12324
12325SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12326 SelectionDAG &DAG) const {
12327 SDLoc dl(Op);
12328 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12329 SDValue StoreChain = SN->getChain();
12330 SDValue BasePtr = SN->getBasePtr();
12331 SDValue Value = SN->getValue();
12332 SDValue Value2 = SN->getValue();
12333 EVT StoreVT = Value.getValueType();
12334
12335 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12336 return LowerDMFVectorStore(Op, DAG);
12337
12338 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12339 return Op;
12340
12341 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12342 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12343 "Type unsupported without MMA");
12344 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12345 "Type unsupported without paired vector support");
12346
12347 // For v256i1 on ISA Future, let the store go through to instruction selection
12348 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12349 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12350 !DisableAutoPairedVecSt)
12351 return Op;
12352
12353 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12354 // accumulator underlying registers individually.
12355 Align Alignment = SN->getAlign();
12356 SmallVector<SDValue, 4> Stores;
12357 unsigned NumVecs = 2;
12358 if (StoreVT == MVT::v512i1) {
12359 if (Subtarget.isISAFuture()) {
12360 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12361 MachineSDNode *ExtNode = DAG.getMachineNode(
12362 Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Op.getOperand(i: 1));
12363
12364 Value = SDValue(ExtNode, 0);
12365 Value2 = SDValue(ExtNode, 1);
12366 } else
12367 Value = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: Value);
12368 NumVecs = 4;
12369 }
12370 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12371 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12372 SDValue Elt;
12373 if (Subtarget.isISAFuture()) {
12374 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12375 Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
12376 N1: Idx > 1 ? Value2 : Value,
12377 N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12378 } else
12379 Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: Value,
12380 N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12381
12382 SDValue Store =
12383 DAG.getStore(Chain: StoreChain, dl, Val: Elt, Ptr: BasePtr,
12384 PtrInfo: SN->getPointerInfo().getWithOffset(O: Idx * 16),
12385 Alignment: commonAlignment(A: Alignment, Offset: Idx * 16),
12386 MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
12387 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12388 N2: DAG.getConstant(Val: 16, DL: dl, VT: BasePtr.getValueType()));
12389 Stores.push_back(Elt: Store);
12390 }
12391 SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12392 return TF;
12393}
12394
12395SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12396 SDLoc dl(Op);
12397 if (Op.getValueType() == MVT::v4i32) {
12398 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
12399
12400 SDValue Zero = getCanonicalConstSplat(Val: 0, SplatSize: 1, VT: MVT::v4i32, DAG, dl);
12401 // +16 as shift amt.
12402 SDValue Neg16 = getCanonicalConstSplat(Val: -16, SplatSize: 4, VT: MVT::v4i32, DAG, dl);
12403 SDValue RHSSwap = // = vrlw RHS, 16
12404 BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vrlw, LHS: RHS, RHS: Neg16, DAG, dl);
12405
12406 // Shrinkify inputs to v8i16.
12407 LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: LHS);
12408 RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHS);
12409 RHSSwap = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHSSwap);
12410
12411 // Low parts multiplied together, generating 32-bit results (we ignore the
12412 // top parts).
12413 SDValue LoProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmulouh,
12414 LHS, RHS, DAG, dl, DestVT: MVT::v4i32);
12415
12416 SDValue HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmsumuhm,
12417 Op0: LHS, Op1: RHSSwap, Op2: Zero, DAG, dl, DestVT: MVT::v4i32);
12418 // Shift the high parts up 16 bits.
12419 HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: HiProd,
12420 RHS: Neg16, DAG, dl);
12421 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::v4i32, N1: LoProd, N2: HiProd);
12422 } else if (Op.getValueType() == MVT::v16i8) {
12423 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
12424 bool isLittleEndian = Subtarget.isLittleEndian();
12425
12426 // Multiply the even 8-bit parts, producing 16-bit sums.
12427 SDValue EvenParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuleub,
12428 LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12429 EvenParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: EvenParts);
12430
12431 // Multiply the odd 8-bit parts, producing 16-bit sums.
12432 SDValue OddParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuloub,
12433 LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12434 OddParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OddParts);
12435
12436 // Merge the results together. Because vmuleub and vmuloub are
12437 // instructions with a big-endian bias, we must reverse the
12438 // element numbering and reverse the meaning of "odd" and "even"
12439 // when generating little endian code.
12440 int Ops[16];
12441 for (unsigned i = 0; i != 8; ++i) {
12442 if (isLittleEndian) {
12443 Ops[i*2 ] = 2*i;
12444 Ops[i*2+1] = 2*i+16;
12445 } else {
12446 Ops[i*2 ] = 2*i+1;
12447 Ops[i*2+1] = 2*i+1+16;
12448 }
12449 }
12450 if (isLittleEndian)
12451 return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OddParts, N2: EvenParts, Mask: Ops);
12452 else
12453 return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: EvenParts, N2: OddParts, Mask: Ops);
12454 } else {
12455 llvm_unreachable("Unknown mul to lower!");
12456 }
12457}
12458
12459SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12460 bool IsStrict = Op->isStrictFPOpcode();
12461 if (Op.getOperand(i: IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12462 !Subtarget.hasP9Vector())
12463 return SDValue();
12464
12465 return Op;
12466}
12467
12468// Custom lowering for fpext vf32 to v2f64
12469SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12470
12471 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12472 "Should only be called for ISD::FP_EXTEND");
12473
12474 // FIXME: handle extends from half precision float vectors on P9.
12475 // We only want to custom lower an extend from v2f32 to v2f64.
12476 if (Op.getValueType() != MVT::v2f64 ||
12477 Op.getOperand(i: 0).getValueType() != MVT::v2f32)
12478 return SDValue();
12479
12480 SDLoc dl(Op);
12481 SDValue Op0 = Op.getOperand(i: 0);
12482
12483 switch (Op0.getOpcode()) {
12484 default:
12485 return SDValue();
12486 case ISD::EXTRACT_SUBVECTOR: {
12487 assert(Op0.getNumOperands() == 2 &&
12488 isa<ConstantSDNode>(Op0->getOperand(1)) &&
12489 "Node should have 2 operands with second one being a constant!");
12490
12491 if (Op0.getOperand(i: 0).getValueType() != MVT::v4f32)
12492 return SDValue();
12493
12494 // Custom lower is only done for high or low doubleword.
12495 int Idx = Op0.getConstantOperandVal(i: 1);
12496 if (Idx % 2 != 0)
12497 return SDValue();
12498
12499 // Since input is v4f32, at this point Idx is either 0 or 2.
12500 // Shift to get the doubleword position we want.
12501 int DWord = Idx >> 1;
12502
12503 // High and low word positions are different on little endian.
12504 if (Subtarget.isLittleEndian())
12505 DWord ^= 0x1;
12506
12507 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64,
12508 N1: Op0.getOperand(i: 0), N2: DAG.getConstant(Val: DWord, DL: dl, VT: MVT::i32));
12509 }
12510 case ISD::FADD:
12511 case ISD::FMUL:
12512 case ISD::FSUB: {
12513 SDValue NewLoad[2];
12514 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12515 // Ensure both input are loads.
12516 SDValue LdOp = Op0.getOperand(i);
12517 if (LdOp.getOpcode() != ISD::LOAD)
12518 return SDValue();
12519 // Generate new load node.
12520 LoadSDNode *LD = cast<LoadSDNode>(Val&: LdOp);
12521 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12522 NewLoad[i] = DAG.getMemIntrinsicNode(
12523 Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12524 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12525 }
12526 SDValue NewOp =
12527 DAG.getNode(Opcode: Op0.getOpcode(), DL: SDLoc(Op0), VT: MVT::v4f32, N1: NewLoad[0],
12528 N2: NewLoad[1], Flags: Op0.getNode()->getFlags());
12529 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewOp,
12530 N2: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32));
12531 }
12532 case ISD::LOAD: {
12533 LoadSDNode *LD = cast<LoadSDNode>(Val&: Op0);
12534 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12535 SDValue NewLd = DAG.getMemIntrinsicNode(
12536 Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12537 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12538 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewLd,
12539 N2: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32));
12540 }
12541 }
12542 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12543}
12544
12545static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value,
12546 SelectionDAG &DAG,
12547 const PPCSubtarget &STI) {
12548 SDLoc DL(Value);
12549 if (STI.useCRBits())
12550 Value = DAG.getNode(Opcode: ISD::SELECT, DL, VT: SumType, N1: Value,
12551 N2: DAG.getConstant(Val: 1, DL, VT: SumType),
12552 N3: DAG.getConstant(Val: 0, DL, VT: SumType));
12553 else
12554 Value = DAG.getZExtOrTrunc(Op: Value, DL, VT: SumType);
12555 SDValue Sum = DAG.getNode(Opcode: PPCISD::ADDC, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32),
12556 N1: Value, N2: DAG.getAllOnesConstant(DL, VT: SumType));
12557 return Sum.getValue(R: 1);
12558}
12559
12560static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag,
12561 EVT CarryType, SelectionDAG &DAG,
12562 const PPCSubtarget &STI) {
12563 SDLoc DL(Flag);
12564 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SumType);
12565 SDValue Carry = DAG.getNode(
12566 Opcode: PPCISD::ADDE, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32), N1: Zero, N2: Zero, N3: Flag);
12567 if (STI.useCRBits())
12568 return DAG.getSetCC(DL, VT: CarryType, LHS: Carry, RHS: Zero, Cond: ISD::SETNE);
12569 return DAG.getZExtOrTrunc(Op: Carry, DL, VT: CarryType);
12570}
12571
12572SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12573
12574 SDLoc DL(Op);
12575 SDNode *N = Op.getNode();
12576 EVT VT = N->getValueType(ResNo: 0);
12577 EVT CarryType = N->getValueType(ResNo: 1);
12578 unsigned Opc = N->getOpcode();
12579 bool IsAdd = Opc == ISD::UADDO;
12580 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12581 SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12582 N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1));
12583 SDValue Carry = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: 1), CarryType,
12584 DAG, STI: Subtarget);
12585 if (!IsAdd)
12586 Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Carry,
12587 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryType));
12588 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: Carry);
12589}
12590
12591SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12592 SelectionDAG &DAG) const {
12593 SDLoc DL(Op);
12594 SDNode *N = Op.getNode();
12595 unsigned Opc = N->getOpcode();
12596 EVT VT = N->getValueType(ResNo: 0);
12597 EVT CarryType = N->getValueType(ResNo: 1);
12598 SDValue CarryOp = N->getOperand(Num: 2);
12599 bool IsAdd = Opc == ISD::UADDO_CARRY;
12600 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12601 if (!IsAdd)
12602 CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12603 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryOp.getValueType()));
12604 CarryOp = ConvertCarryValueToCarryFlag(SumType: VT, Value: CarryOp, DAG, STI: Subtarget);
12605 SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12606 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1), N3: CarryOp);
12607 CarryOp = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: 1), CarryType, DAG,
12608 STI: Subtarget);
12609 if (!IsAdd)
12610 CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12611 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryOp.getValueType()));
12612 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: CarryOp);
12613}
12614
12615SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12616
12617 SDLoc dl(Op);
12618 SDValue LHS = Op.getOperand(i: 0);
12619 SDValue RHS = Op.getOperand(i: 1);
12620 EVT VT = Op.getNode()->getValueType(ResNo: 0);
12621
12622 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
12623
12624 SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: RHS, N2: LHS);
12625 SDValue Xor2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sub, N2: LHS);
12626
12627 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Xor1, N2: Xor2);
12628
12629 SDValue Overflow =
12630 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: And,
12631 N2: DAG.getConstant(Val: VT.getSizeInBits() - 1, DL: dl, VT: MVT::i32));
12632
12633 SDValue OverflowTrunc =
12634 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: 1), Operand: Overflow);
12635
12636 return DAG.getMergeValues(Ops: {Sub, OverflowTrunc}, dl);
12637}
12638
12639/// Implements signed add with overflow detection using the rule:
12640/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12641SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12642
12643 SDLoc dl(Op);
12644 SDValue LHS = Op.getOperand(i: 0);
12645 SDValue RHS = Op.getOperand(i: 1);
12646 EVT VT = Op.getNode()->getValueType(ResNo: 0);
12647
12648 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
12649
12650 // Compute ~(x xor y)
12651 SDValue XorXY = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
12652 SDValue EqvXY = DAG.getNOT(DL: dl, Val: XorXY, VT);
12653 // Compute (s xor x)
12654 SDValue SumXorX = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sum, N2: LHS);
12655
12656 // overflow = (x eqv y) & (s xor x)
12657 SDValue OverflowInSign = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: EqvXY, N2: SumXorX);
12658
12659 // Shift sign bit down to LSB
12660 SDValue Overflow =
12661 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: OverflowInSign,
12662 N2: DAG.getConstant(Val: VT.getSizeInBits() - 1, DL: dl, VT: MVT::i32));
12663 // Truncate to the overflow type (i1)
12664 SDValue OverflowTrunc =
12665 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: 1), Operand: Overflow);
12666
12667 return DAG.getMergeValues(Ops: {Sum, OverflowTrunc}, dl);
12668}
12669
12670// Lower unsigned 3-way compare producing -1/0/1.
12671SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12672 SDLoc DL(Op);
12673 SDValue A = DAG.getFreeze(V: Op.getOperand(i: 0));
12674 SDValue B = DAG.getFreeze(V: Op.getOperand(i: 1));
12675 EVT OpVT = A.getValueType();
12676 EVT ResVT = Op.getValueType();
12677
12678 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12679 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12680 // comparison.
12681 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12682 A = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: A);
12683 B = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: B);
12684 OpVT = MVT::i64;
12685 }
12686
12687 // First compute diff = A - B.
12688 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: OpVT, N1: A, N2: B);
12689
12690 // Generate B - A using SUBC to capture carry.
12691 SDVTList VTs = DAG.getVTList(VT1: OpVT, VT2: MVT::i32);
12692 SDValue SubC = DAG.getNode(Opcode: PPCISD::SUBC, DL, VTList: VTs, N1: B, N2: A);
12693 SDValue CA0 = SubC.getValue(R: 1);
12694
12695 // t2 = A - B + CA0 using SUBE.
12696 SDValue SubE1 = DAG.getNode(Opcode: PPCISD::SUBE, DL, VTList: VTs, N1: A, N2: B, N3: CA0);
12697 SDValue CA1 = SubE1.getValue(R: 1);
12698
12699 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12700 SDValue ResPair = DAG.getNode(Opcode: PPCISD::SUBE, DL, VTList: VTs, N1: Diff, N2: SubE1, N3: CA1);
12701
12702 // Extract the first result and truncate to result type if needed.
12703 return DAG.getSExtOrTrunc(Op: ResPair.getValue(R: 0), DL, VT: ResVT);
12704}
12705
12706/// LowerOperation - Provide custom lowering hooks for some operations.
12707///
12708SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
12709 switch (Op.getOpcode()) {
12710 default:
12711 llvm_unreachable("Wasn't expecting to be able to lower this!");
12712 case ISD::FPOW: return lowerPow(Op, DAG);
12713 case ISD::FSIN: return lowerSin(Op, DAG);
12714 case ISD::FCOS: return lowerCos(Op, DAG);
12715 case ISD::FLOG: return lowerLog(Op, DAG);
12716 case ISD::FLOG10: return lowerLog10(Op, DAG);
12717 case ISD::FEXP: return lowerExp(Op, DAG);
12718 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12719 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12720 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12721 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12722 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12723 case ISD::STRICT_FSETCC:
12724 case ISD::STRICT_FSETCCS:
12725 case ISD::SETCC: return LowerSETCC(Op, DAG);
12726 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12727 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12728 case ISD::SSUBO:
12729 return LowerSSUBO(Op, DAG);
12730 case ISD::SADDO:
12731 return LowerSADDO(Op, DAG);
12732
12733 case ISD::INLINEASM:
12734 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12735 // Variable argument lowering.
12736 case ISD::VASTART: return LowerVASTART(Op, DAG);
12737 case ISD::VAARG: return LowerVAARG(Op, DAG);
12738 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12739
12740 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12741 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12742 case ISD::GET_DYNAMIC_AREA_OFFSET:
12743 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12744
12745 // Exception handling lowering.
12746 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12747 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12748 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12749
12750 case ISD::LOAD: return LowerLOAD(Op, DAG);
12751 case ISD::STORE: return LowerSTORE(Op, DAG);
12752 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12753 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12754 case ISD::STRICT_FP_TO_UINT:
12755 case ISD::STRICT_FP_TO_SINT:
12756 case ISD::FP_TO_UINT:
12757 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, dl: SDLoc(Op));
12758 case ISD::STRICT_UINT_TO_FP:
12759 case ISD::STRICT_SINT_TO_FP:
12760 case ISD::UINT_TO_FP:
12761 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12762 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12763 case ISD::SET_ROUNDING:
12764 return LowerSET_ROUNDING(Op, DAG);
12765
12766 // Lower 64-bit shifts.
12767 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12768 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12769 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12770
12771 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12772 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12773
12774 // Vector-related lowering.
12775 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12776 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12777 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12778 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12779 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12780 case ISD::MUL: return LowerMUL(Op, DAG);
12781 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12782 case ISD::STRICT_FP_ROUND:
12783 case ISD::FP_ROUND:
12784 return LowerFP_ROUND(Op, DAG);
12785 case ISD::ROTL: return LowerROTL(Op, DAG);
12786
12787 // For counter-based loop handling.
12788 case ISD::INTRINSIC_W_CHAIN:
12789 return LowerINTRINSIC_W_CHAIN(Op, DAG);
12790
12791 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12792
12793 // Frame & Return address.
12794 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12795 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12796
12797 case ISD::INTRINSIC_VOID:
12798 return LowerINTRINSIC_VOID(Op, DAG);
12799 case ISD::BSWAP:
12800 return LowerBSWAP(Op, DAG);
12801 case ISD::ATOMIC_CMP_SWAP:
12802 return LowerATOMIC_CMP_SWAP(Op, DAG);
12803 case ISD::ATOMIC_STORE:
12804 return LowerATOMIC_LOAD_STORE(Op, DAG);
12805 case ISD::IS_FPCLASS:
12806 return LowerIS_FPCLASS(Op, DAG);
12807 case ISD::UADDO:
12808 case ISD::USUBO:
12809 return LowerADDSUBO(Op, DAG);
12810 case ISD::UADDO_CARRY:
12811 case ISD::USUBO_CARRY:
12812 return LowerADDSUBO_CARRY(Op, DAG);
12813 case ISD::UCMP:
12814 return LowerUCMP(Op, DAG);
12815 case ISD::STRICT_LRINT:
12816 case ISD::STRICT_LLRINT:
12817 case ISD::STRICT_LROUND:
12818 case ISD::STRICT_LLROUND:
12819 case ISD::STRICT_FNEARBYINT:
12820 if (Op->getFlags().hasNoFPExcept())
12821 return Op;
12822 return SDValue();
12823 case ISD::VP_LOAD:
12824 return LowerVP_LOAD(Op, DAG);
12825 case ISD::VP_STORE:
12826 return LowerVP_STORE(Op, DAG);
12827 }
12828}
12829
12830void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
12831 SmallVectorImpl<SDValue>&Results,
12832 SelectionDAG &DAG) const {
12833 SDLoc dl(N);
12834 switch (N->getOpcode()) {
12835 default:
12836 llvm_unreachable("Do not know how to custom type legalize this operation!");
12837 case ISD::ATOMIC_LOAD: {
12838 SDValue Res = LowerATOMIC_LOAD_STORE(Op: SDValue(N, 0), DAG);
12839 Results.push_back(Elt: Res);
12840 Results.push_back(Elt: Res.getValue(R: 1));
12841 break;
12842 }
12843 case ISD::READCYCLECOUNTER: {
12844 SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other);
12845 SDValue RTB = DAG.getNode(Opcode: PPCISD::READ_TIME_BASE, DL: dl, VTList: VTs, N: N->getOperand(Num: 0));
12846
12847 Results.push_back(
12848 Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: RTB, N2: RTB.getValue(R: 1)));
12849 Results.push_back(Elt: RTB.getValue(R: 2));
12850 break;
12851 }
12852 case ISD::INTRINSIC_W_CHAIN: {
12853 if (N->getConstantOperandVal(Num: 1) != Intrinsic::loop_decrement)
12854 break;
12855
12856 assert(N->getValueType(0) == MVT::i1 &&
12857 "Unexpected result type for CTR decrement intrinsic");
12858 EVT SVT = getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(),
12859 VT: N->getValueType(ResNo: 0));
12860 SDVTList VTs = DAG.getVTList(VT1: SVT, VT2: MVT::Other);
12861 SDValue NewInt = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: VTs, N1: N->getOperand(Num: 0),
12862 N2: N->getOperand(Num: 1));
12863
12864 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewInt));
12865 Results.push_back(Elt: NewInt.getValue(R: 1));
12866 break;
12867 }
12868 case ISD::INTRINSIC_WO_CHAIN: {
12869 switch (N->getConstantOperandVal(Num: 0)) {
12870 case Intrinsic::ppc_pack_longdouble:
12871 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::ppcf128,
12872 N1: N->getOperand(Num: 2), N2: N->getOperand(Num: 1)));
12873 break;
12874 case Intrinsic::ppc_maxfe:
12875 case Intrinsic::ppc_minfe:
12876 case Intrinsic::ppc_fnmsub:
12877 case Intrinsic::ppc_convert_f128_to_ppcf128:
12878 Results.push_back(Elt: LowerINTRINSIC_WO_CHAIN(Op: SDValue(N, 0), DAG));
12879 break;
12880 }
12881 break;
12882 }
12883 case ISD::VAARG: {
12884 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12885 return;
12886
12887 EVT VT = N->getValueType(ResNo: 0);
12888
12889 if (VT == MVT::i64) {
12890 SDValue NewNode = LowerVAARG(Op: SDValue(N, 1), DAG);
12891
12892 Results.push_back(Elt: NewNode);
12893 Results.push_back(Elt: NewNode.getValue(R: 1));
12894 }
12895 return;
12896 }
12897 case ISD::STRICT_FP_TO_SINT:
12898 case ISD::STRICT_FP_TO_UINT:
12899 case ISD::FP_TO_SINT:
12900 case ISD::FP_TO_UINT: {
12901 // LowerFP_TO_INT() can only handle f32 and f64.
12902 if (N->getOperand(Num: N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12903 MVT::ppcf128)
12904 return;
12905 SDValue LoweredValue = LowerFP_TO_INT(Op: SDValue(N, 0), DAG, dl);
12906 Results.push_back(Elt: LoweredValue);
12907 if (N->isStrictFPOpcode())
12908 Results.push_back(Elt: LoweredValue.getValue(R: 1));
12909 return;
12910 }
12911 case ISD::TRUNCATE: {
12912 if (!N->getValueType(ResNo: 0).isVector())
12913 return;
12914 SDValue Lowered = LowerTRUNCATEVector(Op: SDValue(N, 0), DAG);
12915 if (Lowered)
12916 Results.push_back(Elt: Lowered);
12917 return;
12918 }
12919 case ISD::SCALAR_TO_VECTOR: {
12920 SDValue Lowered = LowerSCALAR_TO_VECTOR(Op: SDValue(N, 0), DAG);
12921 if (Lowered)
12922 Results.push_back(Elt: Lowered);
12923 return;
12924 }
12925 case ISD::FSHL:
12926 case ISD::FSHR:
12927 // Don't handle funnel shifts here.
12928 return;
12929 case ISD::BITCAST:
12930 // Don't handle bitcast here.
12931 return;
12932 case ISD::FP_EXTEND:
12933 SDValue Lowered = LowerFP_EXTEND(Op: SDValue(N, 0), DAG);
12934 if (Lowered)
12935 Results.push_back(Elt: Lowered);
12936 return;
12937 }
12938}
12939
12940//===----------------------------------------------------------------------===//
12941// Other Lowering Code
12942//===----------------------------------------------------------------------===//
12943
12944static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
12945 return Builder.CreateIntrinsic(ID: Id, Args: {});
12946}
12947
12948Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
12949 Value *Addr,
12950 AtomicOrdering Ord) const {
12951 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12952
12953 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12954 "Only 8/16/32/64-bit atomic loads supported");
12955 Intrinsic::ID IntID;
12956 switch (SZ) {
12957 default:
12958 llvm_unreachable("Unexpected PrimitiveSize");
12959 case 8:
12960 IntID = Intrinsic::ppc_lbarx;
12961 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12962 break;
12963 case 16:
12964 IntID = Intrinsic::ppc_lharx;
12965 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12966 break;
12967 case 32:
12968 IntID = Intrinsic::ppc_lwarx;
12969 break;
12970 case 64:
12971 IntID = Intrinsic::ppc_ldarx;
12972 break;
12973 }
12974 Value *Call =
12975 Builder.CreateIntrinsic(ID: IntID, Args: Addr, /*FMFSource=*/nullptr, Name: "larx");
12976
12977 return Builder.CreateTruncOrBitCast(V: Call, DestTy: ValueTy);
12978}
12979
12980// Perform a store-conditional operation to Addr. Return the status of the
12981// store. This should be 0 if the store succeeded, non-zero otherwise.
12982Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
12983 Value *Val, Value *Addr,
12984 AtomicOrdering Ord) const {
12985 Type *Ty = Val->getType();
12986 unsigned SZ = Ty->getPrimitiveSizeInBits();
12987
12988 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12989 "Only 8/16/32/64-bit atomic loads supported");
12990 Intrinsic::ID IntID;
12991 switch (SZ) {
12992 default:
12993 llvm_unreachable("Unexpected PrimitiveSize");
12994 case 8:
12995 IntID = Intrinsic::ppc_stbcx;
12996 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12997 break;
12998 case 16:
12999 IntID = Intrinsic::ppc_sthcx;
13000 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13001 break;
13002 case 32:
13003 IntID = Intrinsic::ppc_stwcx;
13004 break;
13005 case 64:
13006 IntID = Intrinsic::ppc_stdcx;
13007 break;
13008 }
13009
13010 if (SZ == 8 || SZ == 16)
13011 Val = Builder.CreateZExt(V: Val, DestTy: Builder.getInt32Ty());
13012
13013 Value *Call = Builder.CreateIntrinsic(ID: IntID, Args: {Addr, Val},
13014 /*FMFSource=*/nullptr, Name: "stcx");
13015 return Builder.CreateXor(LHS: Call, RHS: Builder.getInt32(C: 1));
13016}
13017
13018// The mappings for emitLeading/TrailingFence is taken from
13019// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13020Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
13021 Instruction *Inst,
13022 AtomicOrdering Ord) const {
13023 if (Ord == AtomicOrdering::SequentiallyConsistent)
13024 return callIntrinsic(Builder, Id: Intrinsic::ppc_sync);
13025 if (isReleaseOrStronger(AO: Ord))
13026 return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
13027 return nullptr;
13028}
13029
13030Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
13031 Instruction *Inst,
13032 AtomicOrdering Ord) const {
13033 if (Inst->hasAtomicLoad() && isAcquireOrStronger(AO: Ord)) {
13034 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13035 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13036 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13037 if (isa<LoadInst>(Val: Inst))
13038 return Builder.CreateIntrinsic(ID: Intrinsic::ppc_cfence, Types: {Inst->getType()},
13039 Args: {Inst});
13040 // FIXME: Can use isync for rmw operation.
13041 return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
13042 }
13043 return nullptr;
13044}
13045
13046MachineBasicBlock *
13047PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
13048 unsigned AtomicSize,
13049 unsigned BinOpcode,
13050 unsigned CmpOpcode,
13051 unsigned CmpPred) const {
13052 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13053 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13054
13055 auto LoadMnemonic = PPC::LDARX;
13056 auto StoreMnemonic = PPC::STDCX;
13057 switch (AtomicSize) {
13058 default:
13059 llvm_unreachable("Unexpected size of atomic entity");
13060 case 1:
13061 LoadMnemonic = PPC::LBARX;
13062 StoreMnemonic = PPC::STBCX;
13063 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13064 break;
13065 case 2:
13066 LoadMnemonic = PPC::LHARX;
13067 StoreMnemonic = PPC::STHCX;
13068 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13069 break;
13070 case 4:
13071 LoadMnemonic = PPC::LWARX;
13072 StoreMnemonic = PPC::STWCX;
13073 break;
13074 case 8:
13075 LoadMnemonic = PPC::LDARX;
13076 StoreMnemonic = PPC::STDCX;
13077 break;
13078 }
13079
13080 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13081 MachineFunction *F = BB->getParent();
13082 MachineFunction::iterator It = ++BB->getIterator();
13083
13084 Register dest = MI.getOperand(i: 0).getReg();
13085 Register ptrA = MI.getOperand(i: 1).getReg();
13086 Register ptrB = MI.getOperand(i: 2).getReg();
13087 Register incr = MI.getOperand(i: 3).getReg();
13088 DebugLoc dl = MI.getDebugLoc();
13089
13090 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13091 MachineBasicBlock *loop2MBB =
13092 CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13093 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13094 F->insert(MBBI: It, MBB: loopMBB);
13095 if (CmpOpcode)
13096 F->insert(MBBI: It, MBB: loop2MBB);
13097 F->insert(MBBI: It, MBB: exitMBB);
13098 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13099 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13100 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13101
13102 MachineRegisterInfo &RegInfo = F->getRegInfo();
13103 Register TmpReg = (!BinOpcode) ? incr :
13104 RegInfo.createVirtualRegister( RegClass: AtomicSize == 8 ? &PPC::G8RCRegClass
13105 : &PPC::GPRCRegClass);
13106
13107 // thisMBB:
13108 // ...
13109 // fallthrough --> loopMBB
13110 BB->addSuccessor(Succ: loopMBB);
13111
13112 // loopMBB:
13113 // l[wd]arx dest, ptr
13114 // add r0, dest, incr
13115 // st[wd]cx. r0, ptr
13116 // bne- loopMBB
13117 // fallthrough --> exitMBB
13118
13119 // For max/min...
13120 // loopMBB:
13121 // l[wd]arx dest, ptr
13122 // cmpl?[wd] dest, incr
13123 // bgt exitMBB
13124 // loop2MBB:
13125 // st[wd]cx. dest, ptr
13126 // bne- loopMBB
13127 // fallthrough --> exitMBB
13128
13129 BB = loopMBB;
13130 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest)
13131 .addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13132 if (BinOpcode)
13133 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg).addReg(RegNo: incr).addReg(RegNo: dest);
13134 if (CmpOpcode) {
13135 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13136 // Signed comparisons of byte or halfword values must be sign-extended.
13137 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13138 Register ExtReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13139 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13140 DestReg: ExtReg).addReg(RegNo: dest);
13141 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ExtReg).addReg(RegNo: incr);
13142 } else
13143 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: dest).addReg(RegNo: incr);
13144
13145 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13146 .addImm(Val: CmpPred)
13147 .addReg(RegNo: CrReg)
13148 .addMBB(MBB: exitMBB);
13149 BB->addSuccessor(Succ: loop2MBB);
13150 BB->addSuccessor(Succ: exitMBB);
13151 BB = loop2MBB;
13152 }
13153 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
13154 .addReg(RegNo: TmpReg).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13155 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13156 .addImm(Val: PPC::PRED_NE_MINUS)
13157 .addReg(RegNo: PPC::CR0)
13158 .addMBB(MBB: loopMBB);
13159 BB->addSuccessor(Succ: loopMBB);
13160 BB->addSuccessor(Succ: exitMBB);
13161
13162 // exitMBB:
13163 // ...
13164 BB = exitMBB;
13165 return BB;
13166}
13167
13168static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
13169 switch(MI.getOpcode()) {
13170 default:
13171 return false;
13172 case PPC::COPY:
13173 return TII->isSignExtended(Reg: MI.getOperand(i: 1).getReg(),
13174 MRI: &MI.getMF()->getRegInfo());
13175 case PPC::LHA:
13176 case PPC::LHA8:
13177 case PPC::LHAU:
13178 case PPC::LHAU8:
13179 case PPC::LHAUX:
13180 case PPC::LHAUX8:
13181 case PPC::LHAX:
13182 case PPC::LHAX8:
13183 case PPC::LWA:
13184 case PPC::LWAUX:
13185 case PPC::LWAX:
13186 case PPC::LWAX_32:
13187 case PPC::LWA_32:
13188 case PPC::PLHA:
13189 case PPC::PLHA8:
13190 case PPC::PLHA8pc:
13191 case PPC::PLHApc:
13192 case PPC::PLWA:
13193 case PPC::PLWA8:
13194 case PPC::PLWA8pc:
13195 case PPC::PLWApc:
13196 case PPC::EXTSB:
13197 case PPC::EXTSB8:
13198 case PPC::EXTSB8_32_64:
13199 case PPC::EXTSB8_rec:
13200 case PPC::EXTSB_rec:
13201 case PPC::EXTSH:
13202 case PPC::EXTSH8:
13203 case PPC::EXTSH8_32_64:
13204 case PPC::EXTSH8_rec:
13205 case PPC::EXTSH_rec:
13206 case PPC::EXTSW:
13207 case PPC::EXTSWSLI:
13208 case PPC::EXTSWSLI_32_64:
13209 case PPC::EXTSWSLI_32_64_rec:
13210 case PPC::EXTSWSLI_rec:
13211 case PPC::EXTSW_32:
13212 case PPC::EXTSW_32_64:
13213 case PPC::EXTSW_32_64_rec:
13214 case PPC::EXTSW_rec:
13215 case PPC::SRAW:
13216 case PPC::SRAWI:
13217 case PPC::SRAWI_rec:
13218 case PPC::SRAW_rec:
13219 return true;
13220 }
13221 return false;
13222}
13223
13224MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
13225 MachineInstr &MI, MachineBasicBlock *BB,
13226 bool is8bit, // operation
13227 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13228 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13229 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13230
13231 // If this is a signed comparison and the value being compared is not known
13232 // to be sign extended, sign extend it here.
13233 DebugLoc dl = MI.getDebugLoc();
13234 MachineFunction *F = BB->getParent();
13235 MachineRegisterInfo &RegInfo = F->getRegInfo();
13236 Register incr = MI.getOperand(i: 3).getReg();
13237 bool IsSignExtended =
13238 incr.isVirtual() && isSignExtended(MI&: *RegInfo.getVRegDef(Reg: incr), TII);
13239
13240 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13241 Register ValueReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13242 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueReg)
13243 .addReg(RegNo: MI.getOperand(i: 3).getReg());
13244 MI.getOperand(i: 3).setReg(ValueReg);
13245 incr = ValueReg;
13246 }
13247 // If we support part-word atomic mnemonics, just use them
13248 if (Subtarget.hasPartwordAtomics())
13249 return EmitAtomicBinary(MI, BB, AtomicSize: is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13250 CmpPred);
13251
13252 // In 64 bit mode we have to use 64 bits for addresses, even though the
13253 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13254 // registers without caring whether they're 32 or 64, but here we're
13255 // doing actual arithmetic on the addresses.
13256 bool is64bit = Subtarget.isPPC64();
13257 bool isLittleEndian = Subtarget.isLittleEndian();
13258 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13259
13260 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13261 MachineFunction::iterator It = ++BB->getIterator();
13262
13263 Register dest = MI.getOperand(i: 0).getReg();
13264 Register ptrA = MI.getOperand(i: 1).getReg();
13265 Register ptrB = MI.getOperand(i: 2).getReg();
13266
13267 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13268 MachineBasicBlock *loop2MBB =
13269 CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13270 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13271 F->insert(MBBI: It, MBB: loopMBB);
13272 if (CmpOpcode)
13273 F->insert(MBBI: It, MBB: loop2MBB);
13274 F->insert(MBBI: It, MBB: exitMBB);
13275 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13276 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13277 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13278
13279 const TargetRegisterClass *RC =
13280 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13281 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13282
13283 Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
13284 Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13285 Register ShiftReg =
13286 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13287 Register Incr2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13288 Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13289 Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13290 Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13291 Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13292 Register Tmp3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13293 Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13294 Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13295 Register SrwDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13296 Register Ptr1Reg;
13297 Register TmpReg =
13298 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13299
13300 // thisMBB:
13301 // ...
13302 // fallthrough --> loopMBB
13303 BB->addSuccessor(Succ: loopMBB);
13304
13305 // The 4-byte load must be aligned, while a char or short may be
13306 // anywhere in the word. Hence all this nasty bookkeeping code.
13307 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13308 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13309 // xori shift, shift1, 24 [16]
13310 // rlwinm ptr, ptr1, 0, 0, 29
13311 // slw incr2, incr, shift
13312 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13313 // slw mask, mask2, shift
13314 // loopMBB:
13315 // lwarx tmpDest, ptr
13316 // add tmp, tmpDest, incr2
13317 // andc tmp2, tmpDest, mask
13318 // and tmp3, tmp, mask
13319 // or tmp4, tmp3, tmp2
13320 // stwcx. tmp4, ptr
13321 // bne- loopMBB
13322 // fallthrough --> exitMBB
13323 // srw SrwDest, tmpDest, shift
13324 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13325 if (ptrA != ZeroReg) {
13326 Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
13327 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
13328 .addReg(RegNo: ptrA)
13329 .addReg(RegNo: ptrB);
13330 } else {
13331 Ptr1Reg = ptrB;
13332 }
13333 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13334 // mode.
13335 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
13336 .addReg(RegNo: Ptr1Reg, Flags: {}, SubReg: is64bit ? PPC::sub_32 : 0)
13337 .addImm(Val: 3)
13338 .addImm(Val: 27)
13339 .addImm(Val: is8bit ? 28 : 27);
13340 if (!isLittleEndian)
13341 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
13342 .addReg(RegNo: Shift1Reg)
13343 .addImm(Val: is8bit ? 24 : 16);
13344 if (is64bit)
13345 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
13346 .addReg(RegNo: Ptr1Reg)
13347 .addImm(Val: 0)
13348 .addImm(Val: 61);
13349 else
13350 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
13351 .addReg(RegNo: Ptr1Reg)
13352 .addImm(Val: 0)
13353 .addImm(Val: 0)
13354 .addImm(Val: 29);
13355 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: Incr2Reg).addReg(RegNo: incr).addReg(RegNo: ShiftReg);
13356 if (is8bit)
13357 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: 255);
13358 else {
13359 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: 0);
13360 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
13361 .addReg(RegNo: Mask3Reg)
13362 .addImm(Val: 65535);
13363 }
13364 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
13365 .addReg(RegNo: Mask2Reg)
13366 .addReg(RegNo: ShiftReg);
13367
13368 BB = loopMBB;
13369 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
13370 .addReg(RegNo: ZeroReg)
13371 .addReg(RegNo: PtrReg);
13372 if (BinOpcode)
13373 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg)
13374 .addReg(RegNo: Incr2Reg)
13375 .addReg(RegNo: TmpDestReg);
13376 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
13377 .addReg(RegNo: TmpDestReg)
13378 .addReg(RegNo: MaskReg);
13379 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: Tmp3Reg).addReg(RegNo: TmpReg).addReg(RegNo: MaskReg);
13380 if (CmpOpcode) {
13381 // For unsigned comparisons, we can directly compare the shifted values.
13382 // For signed comparisons we shift and sign extend.
13383 Register SReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13384 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13385 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: SReg)
13386 .addReg(RegNo: TmpDestReg)
13387 .addReg(RegNo: MaskReg);
13388 unsigned ValueReg = SReg;
13389 unsigned CmpReg = Incr2Reg;
13390 if (CmpOpcode == PPC::CMPW) {
13391 ValueReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13392 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: ValueReg)
13393 .addReg(RegNo: SReg)
13394 .addReg(RegNo: ShiftReg);
13395 Register ValueSReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13396 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueSReg)
13397 .addReg(RegNo: ValueReg);
13398 ValueReg = ValueSReg;
13399 CmpReg = incr;
13400 }
13401 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ValueReg).addReg(RegNo: CmpReg);
13402 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13403 .addImm(Val: CmpPred)
13404 .addReg(RegNo: CrReg)
13405 .addMBB(MBB: exitMBB);
13406 BB->addSuccessor(Succ: loop2MBB);
13407 BB->addSuccessor(Succ: exitMBB);
13408 BB = loop2MBB;
13409 }
13410 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg).addReg(RegNo: Tmp3Reg).addReg(RegNo: Tmp2Reg);
13411 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
13412 .addReg(RegNo: Tmp4Reg)
13413 .addReg(RegNo: ZeroReg)
13414 .addReg(RegNo: PtrReg);
13415 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13416 .addImm(Val: PPC::PRED_NE_MINUS)
13417 .addReg(RegNo: PPC::CR0)
13418 .addMBB(MBB: loopMBB);
13419 BB->addSuccessor(Succ: loopMBB);
13420 BB->addSuccessor(Succ: exitMBB);
13421
13422 // exitMBB:
13423 // ...
13424 BB = exitMBB;
13425 // Since the shift amount is not a constant, we need to clear
13426 // the upper bits with a separate RLWINM.
13427 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: dest)
13428 .addReg(RegNo: SrwDestReg)
13429 .addImm(Val: 0)
13430 .addImm(Val: is8bit ? 24 : 16)
13431 .addImm(Val: 31);
13432 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: SrwDestReg)
13433 .addReg(RegNo: TmpDestReg)
13434 .addReg(RegNo: ShiftReg);
13435 return BB;
13436}
13437
13438llvm::MachineBasicBlock *
13439PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
13440 MachineBasicBlock *MBB) const {
13441 DebugLoc DL = MI.getDebugLoc();
13442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13443 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13444
13445 MachineFunction *MF = MBB->getParent();
13446 MachineRegisterInfo &MRI = MF->getRegInfo();
13447
13448 const BasicBlock *BB = MBB->getBasicBlock();
13449 MachineFunction::iterator I = ++MBB->getIterator();
13450
13451 Register DstReg = MI.getOperand(i: 0).getReg();
13452 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
13453 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13454 Register mainDstReg = MRI.createVirtualRegister(RegClass: RC);
13455 Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC);
13456
13457 MVT PVT = getPointerTy(DL: MF->getDataLayout());
13458 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13459 "Invalid Pointer Size!");
13460 // For v = setjmp(buf), we generate
13461 //
13462 // thisMBB:
13463 // SjLjSetup mainMBB
13464 // bl mainMBB
13465 // v_restore = 1
13466 // b sinkMBB
13467 //
13468 // mainMBB:
13469 // buf[LabelOffset] = LR
13470 // v_main = 0
13471 //
13472 // sinkMBB:
13473 // v = phi(main, restore)
13474 //
13475
13476 MachineBasicBlock *thisMBB = MBB;
13477 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13478 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13479 MF->insert(MBBI: I, MBB: mainMBB);
13480 MF->insert(MBBI: I, MBB: sinkMBB);
13481
13482 MachineInstrBuilder MIB;
13483
13484 // Transfer the remainder of BB and its successor edges to sinkMBB.
13485 sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB,
13486 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
13487 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13488
13489 // Note that the structure of the jmp_buf used here is not compatible
13490 // with that used by libc, and is not designed to be. Specifically, it
13491 // stores only those 'reserved' registers that LLVM does not otherwise
13492 // understand how to spill. Also, by convention, by the time this
13493 // intrinsic is called, Clang has already stored the frame address in the
13494 // first slot of the buffer and stack address in the third. Following the
13495 // X86 target code, we'll store the jump address in the second slot. We also
13496 // need to save the TOC pointer (R2) to handle jumps between shared
13497 // libraries, and that will be stored in the fourth slot. The thread
13498 // identifier (R13) is not affected.
13499
13500 // thisMBB:
13501 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13502 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13503 const int64_t BPOffset = 4 * PVT.getStoreSize();
13504
13505 // Prepare IP either in reg.
13506 const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
13507 Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
13508 Register BufReg = MI.getOperand(i: 1).getReg();
13509
13510 if (Subtarget.is64BitELFABI()) {
13511 setUsesTOCBasePtr(*MBB->getParent());
13512 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13513 .addReg(RegNo: PPC::X2)
13514 .addImm(Val: TOCOffset)
13515 .addReg(RegNo: BufReg)
13516 .cloneMemRefs(OtherMI: MI);
13517 }
13518
13519 // Naked functions never have a base pointer, and so we use r1. For all
13520 // other functions, this decision must be delayed until during PEI.
13521 unsigned BaseReg;
13522 if (MF->getFunction().hasFnAttribute(Kind: Attribute::Naked))
13523 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13524 else
13525 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13526
13527 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL,
13528 MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13529 .addReg(RegNo: BaseReg)
13530 .addImm(Val: BPOffset)
13531 .addReg(RegNo: BufReg)
13532 .cloneMemRefs(OtherMI: MI);
13533
13534 // Setup
13535 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::BCLalways)).addMBB(MBB: mainMBB);
13536 MIB.addRegMask(Mask: TRI->getNoPreservedMask());
13537
13538 BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: restoreDstReg).addImm(Val: 1);
13539
13540 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::EH_SjLj_Setup))
13541 .addMBB(MBB: mainMBB);
13542 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: sinkMBB);
13543
13544 thisMBB->addSuccessor(Succ: mainMBB, Prob: BranchProbability::getZero());
13545 thisMBB->addSuccessor(Succ: sinkMBB, Prob: BranchProbability::getOne());
13546
13547 // mainMBB:
13548 // mainDstReg = 0
13549 MIB =
13550 BuildMI(BB: mainMBB, MIMD: DL,
13551 MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), DestReg: LabelReg);
13552
13553 // Store IP
13554 if (Subtarget.isPPC64()) {
13555 MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13556 .addReg(RegNo: LabelReg)
13557 .addImm(Val: LabelOffset)
13558 .addReg(RegNo: BufReg);
13559 } else {
13560 MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STW))
13561 .addReg(RegNo: LabelReg)
13562 .addImm(Val: LabelOffset)
13563 .addReg(RegNo: BufReg);
13564 }
13565 MIB.cloneMemRefs(OtherMI: MI);
13566
13567 BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: mainDstReg).addImm(Val: 0);
13568 mainMBB->addSuccessor(Succ: sinkMBB);
13569
13570 // sinkMBB:
13571 BuildMI(BB&: *sinkMBB, I: sinkMBB->begin(), MIMD: DL,
13572 MCID: TII->get(Opcode: PPC::PHI), DestReg: DstReg)
13573 .addReg(RegNo: mainDstReg).addMBB(MBB: mainMBB)
13574 .addReg(RegNo: restoreDstReg).addMBB(MBB: thisMBB);
13575
13576 MI.eraseFromParent();
13577 return sinkMBB;
13578}
13579
13580MachineBasicBlock *
13581PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
13582 MachineBasicBlock *MBB) const {
13583 DebugLoc DL = MI.getDebugLoc();
13584 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13585
13586 MachineFunction *MF = MBB->getParent();
13587 MachineRegisterInfo &MRI = MF->getRegInfo();
13588
13589 MVT PVT = getPointerTy(DL: MF->getDataLayout());
13590 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13591 "Invalid Pointer Size!");
13592
13593 const TargetRegisterClass *RC =
13594 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13595 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
13596 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13597 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13598 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13599 unsigned BP =
13600 (PVT == MVT::i64)
13601 ? PPC::X30
13602 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13603 : PPC::R30);
13604
13605 MachineInstrBuilder MIB;
13606
13607 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13608 const int64_t SPOffset = 2 * PVT.getStoreSize();
13609 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13610 const int64_t BPOffset = 4 * PVT.getStoreSize();
13611
13612 Register BufReg = MI.getOperand(i: 0).getReg();
13613
13614 // Reload FP (the jumped-to function may not have had a
13615 // frame pointer, and if so, then its r31 will be restored
13616 // as necessary).
13617 if (PVT == MVT::i64) {
13618 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: FP)
13619 .addImm(Val: 0)
13620 .addReg(RegNo: BufReg);
13621 } else {
13622 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: FP)
13623 .addImm(Val: 0)
13624 .addReg(RegNo: BufReg);
13625 }
13626 MIB.cloneMemRefs(OtherMI: MI);
13627
13628 // Reload IP
13629 if (PVT == MVT::i64) {
13630 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: Tmp)
13631 .addImm(Val: LabelOffset)
13632 .addReg(RegNo: BufReg);
13633 } else {
13634 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: Tmp)
13635 .addImm(Val: LabelOffset)
13636 .addReg(RegNo: BufReg);
13637 }
13638 MIB.cloneMemRefs(OtherMI: MI);
13639
13640 // Reload SP
13641 if (PVT == MVT::i64) {
13642 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: SP)
13643 .addImm(Val: SPOffset)
13644 .addReg(RegNo: BufReg);
13645 } else {
13646 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: SP)
13647 .addImm(Val: SPOffset)
13648 .addReg(RegNo: BufReg);
13649 }
13650 MIB.cloneMemRefs(OtherMI: MI);
13651
13652 // Reload BP
13653 if (PVT == MVT::i64) {
13654 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: BP)
13655 .addImm(Val: BPOffset)
13656 .addReg(RegNo: BufReg);
13657 } else {
13658 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: BP)
13659 .addImm(Val: BPOffset)
13660 .addReg(RegNo: BufReg);
13661 }
13662 MIB.cloneMemRefs(OtherMI: MI);
13663
13664 // Reload TOC
13665 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13666 setUsesTOCBasePtr(*MBB->getParent());
13667 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: PPC::X2)
13668 .addImm(Val: TOCOffset)
13669 .addReg(RegNo: BufReg)
13670 .cloneMemRefs(OtherMI: MI);
13671 }
13672
13673 // Jump
13674 BuildMI(BB&: *MBB, I&: MI, MIMD: DL,
13675 MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(RegNo: Tmp);
13676 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13677
13678 MI.eraseFromParent();
13679 return MBB;
13680}
13681
13682bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
13683 // If the function specifically requests inline stack probes, emit them.
13684 if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
13685 return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
13686 "inline-asm";
13687 return false;
13688}
13689
13690unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
13691 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13692 unsigned StackAlign = TFI->getStackAlignment();
13693 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13694 "Unexpected stack alignment");
13695 // The default stack probe size is 4096 if the function has no
13696 // stack-probe-size attribute.
13697 const Function &Fn = MF.getFunction();
13698 unsigned StackProbeSize =
13699 Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: 4096);
13700 // Round down to the stack alignment.
13701 StackProbeSize &= ~(StackAlign - 1);
13702 return StackProbeSize ? StackProbeSize : StackAlign;
13703}
13704
13705// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13706// into three phases. In the first phase, it uses pseudo instruction
13707// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13708// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13709// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13710// MaxCallFrameSize so that it can calculate correct data area pointer.
13711MachineBasicBlock *
13712PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
13713 MachineBasicBlock *MBB) const {
13714 const bool isPPC64 = Subtarget.isPPC64();
13715 MachineFunction *MF = MBB->getParent();
13716 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13717 DebugLoc DL = MI.getDebugLoc();
13718 const unsigned ProbeSize = getStackProbeSize(MF: *MF);
13719 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13720 MachineRegisterInfo &MRI = MF->getRegInfo();
13721 // The CFG of probing stack looks as
13722 // +-----+
13723 // | MBB |
13724 // +--+--+
13725 // |
13726 // +----v----+
13727 // +--->+ TestMBB +---+
13728 // | +----+----+ |
13729 // | | |
13730 // | +-----v----+ |
13731 // +---+ BlockMBB | |
13732 // +----------+ |
13733 // |
13734 // +---------+ |
13735 // | TailMBB +<--+
13736 // +---------+
13737 // In MBB, calculate previous frame pointer and final stack pointer.
13738 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13739 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13740 // TailMBB is spliced via \p MI.
13741 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13742 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13743 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13744
13745 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13746 MF->insert(MBBI: MBBIter, MBB: TestMBB);
13747 MF->insert(MBBI: MBBIter, MBB: BlockMBB);
13748 MF->insert(MBBI: MBBIter, MBB: TailMBB);
13749
13750 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13751 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13752
13753 Register DstReg = MI.getOperand(i: 0).getReg();
13754 Register NegSizeReg = MI.getOperand(i: 1).getReg();
13755 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13756 Register FinalStackPtr = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13757 Register FramePointer = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13758 Register ActualNegSizeReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13759
13760 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13761 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13762 // NegSize.
13763 unsigned ProbeOpc;
13764 if (!MRI.hasOneNonDBGUse(RegNo: NegSizeReg))
13765 ProbeOpc =
13766 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13767 else
13768 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13769 // and NegSizeReg will be allocated in the same phyreg to avoid
13770 // redundant copy when NegSizeReg has only one use which is current MI and
13771 // will be replaced by PREPARE_PROBED_ALLOCA then.
13772 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13773 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13774 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: ProbeOpc), DestReg: FramePointer)
13775 .addDef(RegNo: ActualNegSizeReg)
13776 .addReg(RegNo: NegSizeReg)
13777 .add(MO: MI.getOperand(i: 2))
13778 .add(MO: MI.getOperand(i: 3));
13779
13780 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13781 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4),
13782 DestReg: FinalStackPtr)
13783 .addReg(RegNo: SPReg)
13784 .addReg(RegNo: ActualNegSizeReg);
13785
13786 // Materialize a scratch register for update.
13787 int64_t NegProbeSize = -(int64_t)ProbeSize;
13788 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13789 Register ScratchReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13790 if (!isInt<16>(x: NegProbeSize)) {
13791 Register TempReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13792 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg)
13793 .addImm(Val: NegProbeSize >> 16);
13794 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI),
13795 DestReg: ScratchReg)
13796 .addReg(RegNo: TempReg)
13797 .addImm(Val: NegProbeSize & 0xFFFF);
13798 } else
13799 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: ScratchReg)
13800 .addImm(Val: NegProbeSize);
13801
13802 {
13803 // Probing leading residual part.
13804 Register Div = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13805 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::DIVD : PPC::DIVW), DestReg: Div)
13806 .addReg(RegNo: ActualNegSizeReg)
13807 .addReg(RegNo: ScratchReg);
13808 Register Mul = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13809 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::MULLD : PPC::MULLW), DestReg: Mul)
13810 .addReg(RegNo: Div)
13811 .addReg(RegNo: ScratchReg);
13812 Register NegMod = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13813 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: NegMod)
13814 .addReg(RegNo: Mul)
13815 .addReg(RegNo: ActualNegSizeReg);
13816 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13817 .addReg(RegNo: FramePointer)
13818 .addReg(RegNo: SPReg)
13819 .addReg(RegNo: NegMod);
13820 }
13821
13822 {
13823 // Remaining part should be multiple of ProbeSize.
13824 Register CmpResult = MRI.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13825 BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::CMPD : PPC::CMPW), DestReg: CmpResult)
13826 .addReg(RegNo: SPReg)
13827 .addReg(RegNo: FinalStackPtr);
13828 BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
13829 .addImm(Val: PPC::PRED_EQ)
13830 .addReg(RegNo: CmpResult)
13831 .addMBB(MBB: TailMBB);
13832 TestMBB->addSuccessor(Succ: BlockMBB);
13833 TestMBB->addSuccessor(Succ: TailMBB);
13834 }
13835
13836 {
13837 // Touch the block.
13838 // |P...|P...|P...
13839 BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13840 .addReg(RegNo: FramePointer)
13841 .addReg(RegNo: SPReg)
13842 .addReg(RegNo: ScratchReg);
13843 BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: TestMBB);
13844 BlockMBB->addSuccessor(Succ: TestMBB);
13845 }
13846
13847 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13848 // DYNAREAOFFSET pseudo instruction to get the future result.
13849 Register MaxCallFrameSizeReg =
13850 MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13851 BuildMI(BB: TailMBB, MIMD: DL,
13852 MCID: TII->get(Opcode: isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13853 DestReg: MaxCallFrameSizeReg)
13854 .add(MO: MI.getOperand(i: 2))
13855 .add(MO: MI.getOperand(i: 3));
13856 BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), DestReg: DstReg)
13857 .addReg(RegNo: SPReg)
13858 .addReg(RegNo: MaxCallFrameSizeReg);
13859
13860 // Splice instructions after MI to TailMBB.
13861 TailMBB->splice(Where: TailMBB->end(), Other: MBB,
13862 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
13863 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13864 MBB->addSuccessor(Succ: TestMBB);
13865
13866 // Delete the pseudo instruction.
13867 MI.eraseFromParent();
13868
13869 ++NumDynamicAllocaProbed;
13870 return TailMBB;
13871}
13872
13873static bool IsSelectCC(MachineInstr &MI) {
13874 switch (MI.getOpcode()) {
13875 case PPC::SELECT_CC_I4:
13876 case PPC::SELECT_CC_I8:
13877 case PPC::SELECT_CC_F4:
13878 case PPC::SELECT_CC_F8:
13879 case PPC::SELECT_CC_F16:
13880 case PPC::SELECT_CC_VRRC:
13881 case PPC::SELECT_CC_VSFRC:
13882 case PPC::SELECT_CC_VSSRC:
13883 case PPC::SELECT_CC_VSRC:
13884 case PPC::SELECT_CC_SPE4:
13885 case PPC::SELECT_CC_SPE:
13886 return true;
13887 default:
13888 return false;
13889 }
13890}
13891
13892static bool IsSelect(MachineInstr &MI) {
13893 switch (MI.getOpcode()) {
13894 case PPC::SELECT_I4:
13895 case PPC::SELECT_I8:
13896 case PPC::SELECT_F4:
13897 case PPC::SELECT_F8:
13898 case PPC::SELECT_F16:
13899 case PPC::SELECT_SPE:
13900 case PPC::SELECT_SPE4:
13901 case PPC::SELECT_VRRC:
13902 case PPC::SELECT_VSFRC:
13903 case PPC::SELECT_VSSRC:
13904 case PPC::SELECT_VSRC:
13905 return true;
13906 default:
13907 return false;
13908 }
13909}
13910
13911MachineBasicBlock *
13912PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
13913 MachineBasicBlock *BB) const {
13914 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13915 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13916 if (Subtarget.is64BitELFABI() &&
13917 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13918 !Subtarget.isUsingPCRelativeCalls()) {
13919 // Call lowering should have added an r2 operand to indicate a dependence
13920 // on the TOC base pointer value. It can't however, because there is no
13921 // way to mark the dependence as implicit there, and so the stackmap code
13922 // will confuse it with a regular operand. Instead, add the dependence
13923 // here.
13924 MI.addOperand(Op: MachineOperand::CreateReg(Reg: PPC::X2, isDef: false, isImp: true));
13925 }
13926
13927 return emitPatchPoint(MI, MBB: BB);
13928 }
13929
13930 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13931 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13932 return emitEHSjLjSetJmp(MI, MBB: BB);
13933 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13934 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13935 return emitEHSjLjLongJmp(MI, MBB: BB);
13936 }
13937
13938 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13939
13940 // To "insert" these instructions we actually have to insert their
13941 // control-flow patterns.
13942 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13943 MachineFunction::iterator It = ++BB->getIterator();
13944
13945 MachineFunction *F = BB->getParent();
13946 MachineRegisterInfo &MRI = F->getRegInfo();
13947
13948 if (Subtarget.hasISEL() &&
13949 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13950 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13951 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13952 SmallVector<MachineOperand, 2> Cond;
13953 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13954 MI.getOpcode() == PPC::SELECT_CC_I8)
13955 Cond.push_back(Elt: MI.getOperand(i: 4));
13956 else
13957 Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
13958 Cond.push_back(Elt: MI.getOperand(i: 1));
13959
13960 DebugLoc dl = MI.getDebugLoc();
13961 TII->insertSelect(MBB&: *BB, I: MI, DL: dl, DstReg: MI.getOperand(i: 0).getReg(), Cond,
13962 TrueReg: MI.getOperand(i: 2).getReg(), FalseReg: MI.getOperand(i: 3).getReg());
13963 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13964 // The incoming instruction knows the destination vreg to set, the
13965 // condition code register to branch on, the true/false values to
13966 // select between, and a branch opcode to use.
13967
13968 // thisMBB:
13969 // ...
13970 // TrueVal = ...
13971 // cmpTY ccX, r1, r2
13972 // bCC sinkMBB
13973 // fallthrough --> copy0MBB
13974 MachineBasicBlock *thisMBB = BB;
13975 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13976 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13977 DebugLoc dl = MI.getDebugLoc();
13978 F->insert(MBBI: It, MBB: copy0MBB);
13979 F->insert(MBBI: It, MBB: sinkMBB);
13980
13981 if (isPhysRegUsedAfter(Reg: PPC::CARRY, MBI: MI.getIterator())) {
13982 copy0MBB->addLiveIn(PhysReg: PPC::CARRY);
13983 sinkMBB->addLiveIn(PhysReg: PPC::CARRY);
13984 }
13985
13986 // Set the call frame size on entry to the new basic blocks.
13987 // See https://reviews.llvm.org/D156113.
13988 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13989 copy0MBB->setCallFrameSize(CallFrameSize);
13990 sinkMBB->setCallFrameSize(CallFrameSize);
13991
13992 // Transfer the remainder of BB and its successor edges to sinkMBB.
13993 sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13994 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13995 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13996
13997 // Next, add the true and fallthrough blocks as its successors.
13998 BB->addSuccessor(Succ: copy0MBB);
13999 BB->addSuccessor(Succ: sinkMBB);
14000
14001 if (IsSelect(MI)) {
14002 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BC))
14003 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14004 .addMBB(MBB: sinkMBB);
14005 } else {
14006 unsigned SelectPred = MI.getOperand(i: 4).getImm();
14007 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14008 .addImm(Val: SelectPred)
14009 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14010 .addMBB(MBB: sinkMBB);
14011 }
14012
14013 // copy0MBB:
14014 // %FalseValue = ...
14015 // # fallthrough to sinkMBB
14016 BB = copy0MBB;
14017
14018 // Update machine-CFG edges
14019 BB->addSuccessor(Succ: sinkMBB);
14020
14021 // sinkMBB:
14022 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
14023 // ...
14024 BB = sinkMBB;
14025 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::PHI), DestReg: MI.getOperand(i: 0).getReg())
14026 .addReg(RegNo: MI.getOperand(i: 3).getReg())
14027 .addMBB(MBB: copy0MBB)
14028 .addReg(RegNo: MI.getOperand(i: 2).getReg())
14029 .addMBB(MBB: thisMBB);
14030 } else if (MI.getOpcode() == PPC::ReadTB) {
14031 // To read the 64-bit time-base register on a 32-bit target, we read the
14032 // two halves. Should the counter have wrapped while it was being read, we
14033 // need to try again.
14034 // ...
14035 // readLoop:
14036 // mfspr Rx,TBU # load from TBU
14037 // mfspr Ry,TB # load from TB
14038 // mfspr Rz,TBU # load from TBU
14039 // cmpw crX,Rx,Rz # check if 'old'='new'
14040 // bne readLoop # branch if they're not equal
14041 // ...
14042
14043 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14044 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14045 DebugLoc dl = MI.getDebugLoc();
14046 F->insert(MBBI: It, MBB: readMBB);
14047 F->insert(MBBI: It, MBB: sinkMBB);
14048
14049 // Transfer the remainder of BB and its successor edges to sinkMBB.
14050 sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
14051 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14052 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14053
14054 BB->addSuccessor(Succ: readMBB);
14055 BB = readMBB;
14056
14057 MachineRegisterInfo &RegInfo = F->getRegInfo();
14058 Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
14059 Register LoReg = MI.getOperand(i: 0).getReg();
14060 Register HiReg = MI.getOperand(i: 1).getReg();
14061
14062 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: HiReg).addImm(Val: 269);
14063 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: LoReg).addImm(Val: 268);
14064 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: ReadAgainReg).addImm(Val: 269);
14065
14066 Register CmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14067
14068 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CmpReg)
14069 .addReg(RegNo: HiReg)
14070 .addReg(RegNo: ReadAgainReg);
14071 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14072 .addImm(Val: PPC::PRED_NE)
14073 .addReg(RegNo: CmpReg)
14074 .addMBB(MBB: readMBB);
14075
14076 BB->addSuccessor(Succ: readMBB);
14077 BB->addSuccessor(Succ: sinkMBB);
14078 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14079 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::ADD4);
14080 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14081 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::ADD4);
14082 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14083 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::ADD4);
14084 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14085 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::ADD8);
14086
14087 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14088 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::AND);
14089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14090 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::AND);
14091 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14092 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::AND);
14093 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14094 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::AND8);
14095
14096 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14097 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::OR);
14098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14099 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::OR);
14100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14101 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::OR);
14102 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14103 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::OR8);
14104
14105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14106 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::XOR);
14107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14108 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::XOR);
14109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14110 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::XOR);
14111 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14112 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::XOR8);
14113
14114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14115 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::NAND);
14116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14117 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::NAND);
14118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14119 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::NAND);
14120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14121 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::NAND8);
14122
14123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14124 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::SUBF);
14125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14126 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::SUBF);
14127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14128 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::SUBF);
14129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14130 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::SUBF8);
14131
14132 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14133 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14135 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14137 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14139 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_LT);
14140
14141 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14142 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14143 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14144 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14145 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14146 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14148 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_GT);
14149
14150 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14151 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14152 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14153 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14154 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14155 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14157 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_LT);
14158
14159 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14160 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14161 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14162 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14163 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14164 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14165 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14166 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_GT);
14167
14168 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14169 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0);
14170 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14171 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0);
14172 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14173 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0);
14174 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14175 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0);
14176 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14177 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14178 (Subtarget.hasPartwordAtomics() &&
14179 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14180 (Subtarget.hasPartwordAtomics() &&
14181 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14182 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14183
14184 auto LoadMnemonic = PPC::LDARX;
14185 auto StoreMnemonic = PPC::STDCX;
14186 switch (MI.getOpcode()) {
14187 default:
14188 llvm_unreachable("Compare and swap of unknown size");
14189 case PPC::ATOMIC_CMP_SWAP_I8:
14190 LoadMnemonic = PPC::LBARX;
14191 StoreMnemonic = PPC::STBCX;
14192 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14193 break;
14194 case PPC::ATOMIC_CMP_SWAP_I16:
14195 LoadMnemonic = PPC::LHARX;
14196 StoreMnemonic = PPC::STHCX;
14197 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14198 break;
14199 case PPC::ATOMIC_CMP_SWAP_I32:
14200 LoadMnemonic = PPC::LWARX;
14201 StoreMnemonic = PPC::STWCX;
14202 break;
14203 case PPC::ATOMIC_CMP_SWAP_I64:
14204 LoadMnemonic = PPC::LDARX;
14205 StoreMnemonic = PPC::STDCX;
14206 break;
14207 }
14208 MachineRegisterInfo &RegInfo = F->getRegInfo();
14209 Register dest = MI.getOperand(i: 0).getReg();
14210 Register ptrA = MI.getOperand(i: 1).getReg();
14211 Register ptrB = MI.getOperand(i: 2).getReg();
14212 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14213 Register oldval = MI.getOperand(i: 3).getReg();
14214 Register newval = MI.getOperand(i: 4).getReg();
14215 DebugLoc dl = MI.getDebugLoc();
14216
14217 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14218 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14219 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14220 F->insert(MBBI: It, MBB: loop1MBB);
14221 F->insert(MBBI: It, MBB: loop2MBB);
14222 F->insert(MBBI: It, MBB: exitMBB);
14223 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14224 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14225 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14226
14227 // thisMBB:
14228 // ...
14229 // fallthrough --> loopMBB
14230 BB->addSuccessor(Succ: loop1MBB);
14231
14232 // loop1MBB:
14233 // l[bhwd]arx dest, ptr
14234 // cmp[wd] dest, oldval
14235 // bne- exitBB
14236 // loop2MBB:
14237 // st[bhwd]cx. newval, ptr
14238 // bne- loopMBB
14239 // b exitBB
14240 // exitBB:
14241 BB = loop1MBB;
14242 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
14243 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::CMPD : PPC::CMPW), DestReg: CrReg)
14244 .addReg(RegNo: dest)
14245 .addReg(RegNo: oldval);
14246 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14247 .addImm(Val: PPC::PRED_NE_MINUS)
14248 .addReg(RegNo: CrReg)
14249 .addMBB(MBB: exitMBB);
14250 BB->addSuccessor(Succ: loop2MBB);
14251 BB->addSuccessor(Succ: exitMBB);
14252
14253 BB = loop2MBB;
14254 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
14255 .addReg(RegNo: newval)
14256 .addReg(RegNo: ptrA)
14257 .addReg(RegNo: ptrB);
14258 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14259 .addImm(Val: PPC::PRED_NE_MINUS)
14260 .addReg(RegNo: PPC::CR0)
14261 .addMBB(MBB: loop1MBB);
14262 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14263 BB->addSuccessor(Succ: loop1MBB);
14264 BB->addSuccessor(Succ: exitMBB);
14265
14266 // exitMBB:
14267 // ...
14268 BB = exitMBB;
14269 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14270 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14271 // We must use 64-bit registers for addresses when targeting 64-bit,
14272 // since we're actually doing arithmetic on them. Other registers
14273 // can be 32-bit.
14274 bool is64bit = Subtarget.isPPC64();
14275 bool isLittleEndian = Subtarget.isLittleEndian();
14276 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14277
14278 Register dest = MI.getOperand(i: 0).getReg();
14279 Register ptrA = MI.getOperand(i: 1).getReg();
14280 Register ptrB = MI.getOperand(i: 2).getReg();
14281 Register oldval = MI.getOperand(i: 3).getReg();
14282 Register newval = MI.getOperand(i: 4).getReg();
14283 DebugLoc dl = MI.getDebugLoc();
14284
14285 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14286 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14287 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14288 F->insert(MBBI: It, MBB: loop1MBB);
14289 F->insert(MBBI: It, MBB: loop2MBB);
14290 F->insert(MBBI: It, MBB: exitMBB);
14291 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14292 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14293 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14294
14295 MachineRegisterInfo &RegInfo = F->getRegInfo();
14296 const TargetRegisterClass *RC =
14297 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14298 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14299
14300 Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
14301 Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14302 Register ShiftReg =
14303 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
14304 Register NewVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14305 Register NewVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14306 Register OldVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14307 Register OldVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14308 Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14309 Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14310 Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14311 Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14312 Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14313 Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14314 Register Ptr1Reg;
14315 Register TmpReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14316 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14317 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14318 // thisMBB:
14319 // ...
14320 // fallthrough --> loopMBB
14321 BB->addSuccessor(Succ: loop1MBB);
14322
14323 // The 4-byte load must be aligned, while a char or short may be
14324 // anywhere in the word. Hence all this nasty bookkeeping code.
14325 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14326 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14327 // xori shift, shift1, 24 [16]
14328 // rlwinm ptr, ptr1, 0, 0, 29
14329 // slw newval2, newval, shift
14330 // slw oldval2, oldval,shift
14331 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14332 // slw mask, mask2, shift
14333 // and newval3, newval2, mask
14334 // and oldval3, oldval2, mask
14335 // loop1MBB:
14336 // lwarx tmpDest, ptr
14337 // and tmp, tmpDest, mask
14338 // cmpw tmp, oldval3
14339 // bne- exitBB
14340 // loop2MBB:
14341 // andc tmp2, tmpDest, mask
14342 // or tmp4, tmp2, newval3
14343 // stwcx. tmp4, ptr
14344 // bne- loop1MBB
14345 // b exitBB
14346 // exitBB:
14347 // srw dest, tmpDest, shift
14348 if (ptrA != ZeroReg) {
14349 Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
14350 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
14351 .addReg(RegNo: ptrA)
14352 .addReg(RegNo: ptrB);
14353 } else {
14354 Ptr1Reg = ptrB;
14355 }
14356
14357 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14358 // mode.
14359 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
14360 .addReg(RegNo: Ptr1Reg, Flags: {}, SubReg: is64bit ? PPC::sub_32 : 0)
14361 .addImm(Val: 3)
14362 .addImm(Val: 27)
14363 .addImm(Val: is8bit ? 28 : 27);
14364 if (!isLittleEndian)
14365 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
14366 .addReg(RegNo: Shift1Reg)
14367 .addImm(Val: is8bit ? 24 : 16);
14368 if (is64bit)
14369 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
14370 .addReg(RegNo: Ptr1Reg)
14371 .addImm(Val: 0)
14372 .addImm(Val: 61);
14373 else
14374 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
14375 .addReg(RegNo: Ptr1Reg)
14376 .addImm(Val: 0)
14377 .addImm(Val: 0)
14378 .addImm(Val: 29);
14379 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: NewVal2Reg)
14380 .addReg(RegNo: newval)
14381 .addReg(RegNo: ShiftReg);
14382 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: OldVal2Reg)
14383 .addReg(RegNo: oldval)
14384 .addReg(RegNo: ShiftReg);
14385 if (is8bit)
14386 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: 255);
14387 else {
14388 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: 0);
14389 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
14390 .addReg(RegNo: Mask3Reg)
14391 .addImm(Val: 65535);
14392 }
14393 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
14394 .addReg(RegNo: Mask2Reg)
14395 .addReg(RegNo: ShiftReg);
14396 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: NewVal3Reg)
14397 .addReg(RegNo: NewVal2Reg)
14398 .addReg(RegNo: MaskReg);
14399 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: OldVal3Reg)
14400 .addReg(RegNo: OldVal2Reg)
14401 .addReg(RegNo: MaskReg);
14402
14403 BB = loop1MBB;
14404 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
14405 .addReg(RegNo: ZeroReg)
14406 .addReg(RegNo: PtrReg);
14407 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: TmpReg)
14408 .addReg(RegNo: TmpDestReg)
14409 .addReg(RegNo: MaskReg);
14410 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CrReg)
14411 .addReg(RegNo: TmpReg)
14412 .addReg(RegNo: OldVal3Reg);
14413 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14414 .addImm(Val: PPC::PRED_NE)
14415 .addReg(RegNo: CrReg)
14416 .addMBB(MBB: exitMBB);
14417 BB->addSuccessor(Succ: loop2MBB);
14418 BB->addSuccessor(Succ: exitMBB);
14419
14420 BB = loop2MBB;
14421 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
14422 .addReg(RegNo: TmpDestReg)
14423 .addReg(RegNo: MaskReg);
14424 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg)
14425 .addReg(RegNo: Tmp2Reg)
14426 .addReg(RegNo: NewVal3Reg);
14427 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
14428 .addReg(RegNo: Tmp4Reg)
14429 .addReg(RegNo: ZeroReg)
14430 .addReg(RegNo: PtrReg);
14431 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14432 .addImm(Val: PPC::PRED_NE)
14433 .addReg(RegNo: PPC::CR0)
14434 .addMBB(MBB: loop1MBB);
14435 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14436 BB->addSuccessor(Succ: loop1MBB);
14437 BB->addSuccessor(Succ: exitMBB);
14438
14439 // exitMBB:
14440 // ...
14441 BB = exitMBB;
14442 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: dest)
14443 .addReg(RegNo: TmpReg)
14444 .addReg(RegNo: ShiftReg);
14445 } else if (MI.getOpcode() == PPC::FADDrtz) {
14446 // This pseudo performs an FADD with rounding mode temporarily forced
14447 // to round-to-zero. We emit this via custom inserter since the FPSCR
14448 // is not modeled at the SelectionDAG level.
14449 Register Dest = MI.getOperand(i: 0).getReg();
14450 Register Src1 = MI.getOperand(i: 1).getReg();
14451 Register Src2 = MI.getOperand(i: 2).getReg();
14452 DebugLoc dl = MI.getDebugLoc();
14453
14454 MachineRegisterInfo &RegInfo = F->getRegInfo();
14455 Register MFFSReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14456
14457 // Save FPSCR value.
14458 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: MFFSReg);
14459
14460 // Set rounding mode to round-to-zero.
14461 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB1))
14462 .addImm(Val: 31)
14463 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14464
14465 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB0))
14466 .addImm(Val: 30)
14467 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14468
14469 // Perform addition.
14470 auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::FADD), DestReg: Dest)
14471 .addReg(RegNo: Src1)
14472 .addReg(RegNo: Src2);
14473 if (MI.getFlag(Flag: MachineInstr::NoFPExcept))
14474 MIB.setMIFlag(MachineInstr::NoFPExcept);
14475
14476 // Restore FPSCR value.
14477 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSFb)).addImm(Val: 1).addReg(RegNo: MFFSReg);
14478 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14479 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14480 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14481 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14482 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14483 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14484 ? PPC::ANDI8_rec
14485 : PPC::ANDI_rec;
14486 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14487 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14488
14489 MachineRegisterInfo &RegInfo = F->getRegInfo();
14490 Register Dest = RegInfo.createVirtualRegister(
14491 RegClass: Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14492
14493 DebugLoc Dl = MI.getDebugLoc();
14494 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode), DestReg: Dest)
14495 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14496 .addImm(Val: 1);
14497 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14498 DestReg: MI.getOperand(i: 0).getReg())
14499 .addReg(RegNo: IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14500 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14501 DebugLoc Dl = MI.getDebugLoc();
14502 MachineRegisterInfo &RegInfo = F->getRegInfo();
14503 Register CRReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14504 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TCHECK), DestReg: CRReg);
14505 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14506 DestReg: MI.getOperand(i: 0).getReg())
14507 .addReg(RegNo: CRReg);
14508 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14509 DebugLoc Dl = MI.getDebugLoc();
14510 unsigned Imm = MI.getOperand(i: 1).getImm();
14511 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TBEGIN)).addImm(Val: Imm);
14512 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14513 DestReg: MI.getOperand(i: 0).getReg())
14514 .addReg(RegNo: PPC::CR0EQ);
14515 } else if (MI.getOpcode() == PPC::SETRNDi) {
14516 DebugLoc dl = MI.getDebugLoc();
14517 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14518
14519 // Save FPSCR value.
14520 if (MRI.use_empty(RegNo: OldFPSCRReg))
14521 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14522 else
14523 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14524
14525 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14526 // the following settings:
14527 // 00 Round to nearest
14528 // 01 Round to 0
14529 // 10 Round to +inf
14530 // 11 Round to -inf
14531
14532 // When the operand is immediate, using the two least significant bits of
14533 // the immediate to set the bits 62:63 of FPSCR.
14534 unsigned Mode = MI.getOperand(i: 1).getImm();
14535 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14536 .addImm(Val: 31)
14537 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14538
14539 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14540 .addImm(Val: 30)
14541 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14542 } else if (MI.getOpcode() == PPC::SETRND) {
14543 DebugLoc dl = MI.getDebugLoc();
14544
14545 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14546 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14547 // If the target doesn't have DirectMove, we should use stack to do the
14548 // conversion, because the target doesn't have the instructions like mtvsrd
14549 // or mfvsrd to do this conversion directly.
14550 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14551 if (Subtarget.hasDirectMove()) {
14552 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg)
14553 .addReg(RegNo: SrcReg);
14554 } else {
14555 // Use stack to do the register copy.
14556 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14557 MachineRegisterInfo &RegInfo = F->getRegInfo();
14558 const TargetRegisterClass *RC = RegInfo.getRegClass(Reg: SrcReg);
14559 if (RC == &PPC::F8RCRegClass) {
14560 // Copy register from F8RCRegClass to G8RCRegclass.
14561 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14562 "Unsupported RegClass.");
14563
14564 StoreOp = PPC::STFD;
14565 LoadOp = PPC::LD;
14566 } else {
14567 // Copy register from G8RCRegClass to F8RCRegclass.
14568 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14569 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14570 "Unsupported RegClass.");
14571 }
14572
14573 MachineFrameInfo &MFI = F->getFrameInfo();
14574 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
14575
14576 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14577 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: 0),
14578 F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14579 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14580
14581 // Store the SrcReg into the stack.
14582 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: StoreOp))
14583 .addReg(RegNo: SrcReg)
14584 .addImm(Val: 0)
14585 .addFrameIndex(Idx: FrameIdx)
14586 .addMemOperand(MMO: MMOStore);
14587
14588 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14589 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: 0),
14590 F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14591 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14592
14593 // Load from the stack where SrcReg is stored, and save to DestReg,
14594 // so we have done the RegClass conversion from RegClass::SrcReg to
14595 // RegClass::DestReg.
14596 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: LoadOp), DestReg)
14597 .addImm(Val: 0)
14598 .addFrameIndex(Idx: FrameIdx)
14599 .addMemOperand(MMO: MMOLoad);
14600 }
14601 };
14602
14603 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14604
14605 // Save FPSCR value.
14606 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14607
14608 // When the operand is gprc register, use two least significant bits of the
14609 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14610 //
14611 // copy OldFPSCRTmpReg, OldFPSCRReg
14612 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14613 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14614 // copy NewFPSCRReg, NewFPSCRTmpReg
14615 // mtfsf 255, NewFPSCRReg
14616 MachineOperand SrcOp = MI.getOperand(i: 1);
14617 MachineRegisterInfo &RegInfo = F->getRegInfo();
14618 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14619
14620 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14621
14622 Register ImDefReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14623 Register ExtSrcReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14624
14625 // The first operand of INSERT_SUBREG should be a register which has
14626 // subregisters, we only care about its RegClass, so we should use an
14627 // IMPLICIT_DEF register.
14628 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImDefReg);
14629 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::INSERT_SUBREG), DestReg: ExtSrcReg)
14630 .addReg(RegNo: ImDefReg)
14631 .add(MO: SrcOp)
14632 .addImm(Val: 1);
14633
14634 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14635 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDIMI), DestReg: NewFPSCRTmpReg)
14636 .addReg(RegNo: OldFPSCRTmpReg)
14637 .addReg(RegNo: ExtSrcReg)
14638 .addImm(Val: 0)
14639 .addImm(Val: 62);
14640
14641 Register NewFPSCRReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14642 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14643
14644 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14645 // bits of FPSCR.
14646 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSF))
14647 .addImm(Val: 255)
14648 .addReg(RegNo: NewFPSCRReg)
14649 .addImm(Val: 0)
14650 .addImm(Val: 0);
14651 } else if (MI.getOpcode() == PPC::SETFLM) {
14652 DebugLoc Dl = MI.getDebugLoc();
14653
14654 // Result of setflm is previous FPSCR content, so we need to save it first.
14655 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14656 if (MRI.use_empty(RegNo: OldFPSCRReg))
14657 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14658 else
14659 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14660
14661 // Put bits in 32:63 to FPSCR.
14662 Register NewFPSCRReg = MI.getOperand(i: 1).getReg();
14663 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MTFSF))
14664 .addImm(Val: 255)
14665 .addReg(RegNo: NewFPSCRReg)
14666 .addImm(Val: 0)
14667 .addImm(Val: 0);
14668 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14669 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14670 return emitProbedAlloca(MI, MBB: BB);
14671 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14672 DebugLoc DL = MI.getDebugLoc();
14673 Register Src = MI.getOperand(i: 2).getReg();
14674 Register Lo = MI.getOperand(i: 0).getReg();
14675 Register Hi = MI.getOperand(i: 1).getReg();
14676 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14677 .addDef(RegNo: Lo)
14678 .addUse(RegNo: Src, Flags: {}, SubReg: PPC::sub_gp8_x1);
14679 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14680 .addDef(RegNo: Hi)
14681 .addUse(RegNo: Src, Flags: {}, SubReg: PPC::sub_gp8_x0);
14682 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14683 MI.getOpcode() == PPC::STQX_PSEUDO) {
14684 DebugLoc DL = MI.getDebugLoc();
14685 // Ptr is used as the ptr_rc_no_r0 part
14686 // of LQ/STQ's memory operand and adding result of RA and RB,
14687 // so it has to be g8rc_and_g8rc_nox0.
14688 Register Ptr =
14689 F->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
14690 Register Val = MI.getOperand(i: 0).getReg();
14691 Register RA = MI.getOperand(i: 1).getReg();
14692 Register RB = MI.getOperand(i: 2).getReg();
14693 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::ADD8), DestReg: Ptr).addReg(RegNo: RA).addReg(RegNo: RB);
14694 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
14695 MCID: MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(Opcode: PPC::LQ)
14696 : TII->get(Opcode: PPC::STQ))
14697 .addReg(RegNo: Val, Flags: getDefRegState(B: MI.getOpcode() == PPC::LQX_PSEUDO))
14698 .addImm(Val: 0)
14699 .addReg(RegNo: Ptr);
14700 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14701 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14702 DebugLoc DL = MI.getDebugLoc();
14703 Register DstReg = MI.getOperand(i: 0).getReg();
14704 Register PtrReg = MI.getOperand(i: 1).getReg();
14705 Register ValReg = MI.getOperand(i: 2).getReg();
14706 unsigned FC = MI.getOperand(i: 3).getImm();
14707 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14708 Register Val64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14709 if (IsLwat)
14710 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: Val64)
14711 .addReg(RegNo: ValReg)
14712 .addImm(Val: PPC::sub_32);
14713 else
14714 Val64 = ValReg;
14715
14716 Register G8rPair = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14717 Register UndefG8r = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14718 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: UndefG8r);
14719 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::REG_SEQUENCE), DestReg: G8rPair)
14720 .addReg(RegNo: UndefG8r)
14721 .addImm(Val: PPC::sub_gp8_x0)
14722 .addReg(RegNo: Val64)
14723 .addImm(Val: PPC::sub_gp8_x1);
14724
14725 Register PairResult = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14726 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: IsLwat ? PPC::LWAT : PPC::LDAT), DestReg: PairResult)
14727 .addReg(RegNo: G8rPair)
14728 .addReg(RegNo: PtrReg)
14729 .addImm(Val: FC);
14730 Register Result64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14731 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Result64)
14732 .addReg(RegNo: PairResult, Flags: {}, SubReg: PPC::sub_gp8_x0);
14733 if (IsLwat)
14734 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14735 .addReg(RegNo: Result64, Flags: {}, SubReg: PPC::sub_32);
14736 else
14737 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14738 .addReg(RegNo: Result64);
14739 } else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO ||
14740 MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14741 DebugLoc DL = MI.getDebugLoc();
14742 Register DstReg = MI.getOperand(i: 0).getReg();
14743 Register PtrReg = MI.getOperand(i: 1).getReg();
14744 unsigned FC = MI.getOperand(i: 2).getImm();
14745 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14746
14747 Register Pair = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14748 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: Pair);
14749
14750 Register PairResult = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14751 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14752 DestReg: PairResult)
14753 .addReg(RegNo: Pair)
14754 .addReg(RegNo: PtrReg)
14755 .addImm(Val: FC);
14756 Register Result64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14757 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Result64)
14758 .addReg(RegNo: PairResult, Flags: {}, SubReg: PPC::sub_gp8_x0);
14759 if (IsLwat_Cond)
14760 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14761 .addReg(RegNo: Result64, Flags: {}, SubReg: PPC::sub_32);
14762 else
14763 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14764 .addReg(RegNo: Result64);
14765 } else {
14766 llvm_unreachable("Unexpected instr type to insert");
14767 }
14768
14769 MI.eraseFromParent(); // The pseudo instruction is gone now.
14770 return BB;
14771}
14772
14773//===----------------------------------------------------------------------===//
14774// Target Optimization Hooks
14775//===----------------------------------------------------------------------===//
14776
14777static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14778 // For the estimates, convergence is quadratic, so we essentially double the
14779 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14780 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14781 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14782 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14783 if (VT.getScalarType() == MVT::f64)
14784 RefinementSteps++;
14785 return RefinementSteps;
14786}
14787
14788SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14789 const DenormalMode &Mode,
14790 SDNodeFlags Flags) const {
14791 // We only have VSX Vector Test for software Square Root.
14792 EVT VT = Op.getValueType();
14793 if (!isTypeLegal(VT: MVT::i1) ||
14794 (VT != MVT::f64 &&
14795 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14796 return TargetLowering::getSqrtInputTest(Operand: Op, DAG, Mode, Flags);
14797
14798 SDLoc DL(Op);
14799 // The output register of FTSQRT is CR field.
14800 SDValue FTSQRT = DAG.getNode(Opcode: PPCISD::FTSQRT, DL, VT: MVT::i32, Operand: Op, Flags);
14801 // ftsqrt BF,FRB
14802 // Let e_b be the unbiased exponent of the double-precision
14803 // floating-point operand in register FRB.
14804 // fe_flag is set to 1 if either of the following conditions occurs.
14805 // - The double-precision floating-point operand in register FRB is a zero,
14806 // a NaN, or an infinity, or a negative value.
14807 // - e_b is less than or equal to -970.
14808 // Otherwise fe_flag is set to 0.
14809 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14810 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14811 // exponent is less than -970)
14812 SDValue SRIdxVal = DAG.getTargetConstant(Val: PPC::sub_eq, DL, VT: MVT::i32);
14813 return SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i1,
14814 Op1: FTSQRT, Op2: SRIdxVal),
14815 0);
14816}
14817
14818SDValue
14819PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14820 SelectionDAG &DAG) const {
14821 // We only have VSX Vector Square Root.
14822 EVT VT = Op.getValueType();
14823 if (VT != MVT::f64 &&
14824 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14825 return TargetLowering::getSqrtResultForDenormInput(Operand: Op, DAG);
14826
14827 return DAG.getNode(Opcode: PPCISD::FSQRT, DL: SDLoc(Op), VT, Operand: Op);
14828}
14829
14830SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14831 int Enabled, int &RefinementSteps,
14832 bool &UseOneConstNR,
14833 bool Reciprocal) const {
14834 EVT VT = Operand.getValueType();
14835 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14836 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14837 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14838 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14839 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14840 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14841
14842 // The Newton-Raphson computation with a single constant does not provide
14843 // enough accuracy on some CPUs.
14844 UseOneConstNR = !Subtarget.needsTwoConstNR();
14845 return DAG.getNode(Opcode: PPCISD::FRSQRTE, DL: SDLoc(Operand), VT, Operand);
14846 }
14847 return SDValue();
14848}
14849
14850SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14851 int Enabled,
14852 int &RefinementSteps) const {
14853 EVT VT = Operand.getValueType();
14854 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14855 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14856 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14857 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14858 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14859 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14860 return DAG.getNode(Opcode: PPCISD::FRE, DL: SDLoc(Operand), VT, Operand);
14861 }
14862 return SDValue();
14863}
14864
14865unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14866 // Note: This functionality is used only when arcp is enabled, and
14867 // on cores with reciprocal estimates (which are used when arcp is
14868 // enabled for division), this functionality is redundant with the default
14869 // combiner logic (once the division -> reciprocal/multiply transformation
14870 // has taken place). As a result, this matters more for older cores than for
14871 // newer ones.
14872
14873 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14874 // reciprocal if there are two or more FDIVs (for embedded cores with only
14875 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14876 switch (Subtarget.getCPUDirective()) {
14877 default:
14878 return 3;
14879 case PPC::DIR_440:
14880 case PPC::DIR_A2:
14881 case PPC::DIR_E500:
14882 case PPC::DIR_E500mc:
14883 case PPC::DIR_E5500:
14884 return 2;
14885 }
14886}
14887
14888// isConsecutiveLSLoc needs to work even if all adds have not yet been
14889// collapsed, and so we need to look through chains of them.
14890static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
14891 int64_t& Offset, SelectionDAG &DAG) {
14892 if (DAG.isBaseWithConstantOffset(Op: Loc)) {
14893 Base = Loc.getOperand(i: 0);
14894 Offset += cast<ConstantSDNode>(Val: Loc.getOperand(i: 1))->getSExtValue();
14895
14896 // The base might itself be a base plus an offset, and if so, accumulate
14897 // that as well.
14898 getBaseWithConstantOffset(Loc: Loc.getOperand(i: 0), Base, Offset, DAG);
14899 }
14900}
14901
14902static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
14903 unsigned Bytes, int Dist,
14904 SelectionDAG &DAG) {
14905 if (VT.getSizeInBits() / 8 != Bytes)
14906 return false;
14907
14908 SDValue BaseLoc = Base->getBasePtr();
14909 if (Loc.getOpcode() == ISD::FrameIndex) {
14910 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14911 return false;
14912 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14913 int FI = cast<FrameIndexSDNode>(Val&: Loc)->getIndex();
14914 int BFI = cast<FrameIndexSDNode>(Val&: BaseLoc)->getIndex();
14915 int FS = MFI.getObjectSize(ObjectIdx: FI);
14916 int BFS = MFI.getObjectSize(ObjectIdx: BFI);
14917 if (FS != BFS || FS != (int)Bytes) return false;
14918 return MFI.getObjectOffset(ObjectIdx: FI) == (MFI.getObjectOffset(ObjectIdx: BFI) + Dist*Bytes);
14919 }
14920
14921 SDValue Base1 = Loc, Base2 = BaseLoc;
14922 int64_t Offset1 = 0, Offset2 = 0;
14923 getBaseWithConstantOffset(Loc, Base&: Base1, Offset&: Offset1, DAG);
14924 getBaseWithConstantOffset(Loc: BaseLoc, Base&: Base2, Offset&: Offset2, DAG);
14925 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14926 return true;
14927
14928 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14929 const GlobalValue *GV1 = nullptr;
14930 const GlobalValue *GV2 = nullptr;
14931 Offset1 = 0;
14932 Offset2 = 0;
14933 bool isGA1 = TLI.isGAPlusOffset(N: Loc.getNode(), GA&: GV1, Offset&: Offset1);
14934 bool isGA2 = TLI.isGAPlusOffset(N: BaseLoc.getNode(), GA&: GV2, Offset&: Offset2);
14935 if (isGA1 && isGA2 && GV1 == GV2)
14936 return Offset1 == (Offset2 + Dist*Bytes);
14937 return false;
14938}
14939
14940// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14941// not enforce equality of the chain operands.
14942static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
14943 unsigned Bytes, int Dist,
14944 SelectionDAG &DAG) {
14945 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) {
14946 EVT VT = LS->getMemoryVT();
14947 SDValue Loc = LS->getBasePtr();
14948 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14949 }
14950
14951 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14952 EVT VT;
14953 switch (N->getConstantOperandVal(Num: 1)) {
14954 default: return false;
14955 case Intrinsic::ppc_altivec_lvx:
14956 case Intrinsic::ppc_altivec_lvxl:
14957 case Intrinsic::ppc_vsx_lxvw4x:
14958 case Intrinsic::ppc_vsx_lxvw4x_be:
14959 VT = MVT::v4i32;
14960 break;
14961 case Intrinsic::ppc_vsx_lxvd2x:
14962 case Intrinsic::ppc_vsx_lxvd2x_be:
14963 VT = MVT::v2f64;
14964 break;
14965 case Intrinsic::ppc_altivec_lvebx:
14966 VT = MVT::i8;
14967 break;
14968 case Intrinsic::ppc_altivec_lvehx:
14969 VT = MVT::i16;
14970 break;
14971 case Intrinsic::ppc_altivec_lvewx:
14972 VT = MVT::i32;
14973 break;
14974 }
14975
14976 return isConsecutiveLSLoc(Loc: N->getOperand(Num: 2), VT, Base, Bytes, Dist, DAG);
14977 }
14978
14979 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14980 EVT VT;
14981 switch (N->getConstantOperandVal(Num: 1)) {
14982 default: return false;
14983 case Intrinsic::ppc_altivec_stvx:
14984 case Intrinsic::ppc_altivec_stvxl:
14985 case Intrinsic::ppc_vsx_stxvw4x:
14986 VT = MVT::v4i32;
14987 break;
14988 case Intrinsic::ppc_vsx_stxvd2x:
14989 VT = MVT::v2f64;
14990 break;
14991 case Intrinsic::ppc_vsx_stxvw4x_be:
14992 VT = MVT::v4i32;
14993 break;
14994 case Intrinsic::ppc_vsx_stxvd2x_be:
14995 VT = MVT::v2f64;
14996 break;
14997 case Intrinsic::ppc_altivec_stvebx:
14998 VT = MVT::i8;
14999 break;
15000 case Intrinsic::ppc_altivec_stvehx:
15001 VT = MVT::i16;
15002 break;
15003 case Intrinsic::ppc_altivec_stvewx:
15004 VT = MVT::i32;
15005 break;
15006 }
15007
15008 return isConsecutiveLSLoc(Loc: N->getOperand(Num: 3), VT, Base, Bytes, Dist, DAG);
15009 }
15010
15011 return false;
15012}
15013
15014// Return true is there is a nearyby consecutive load to the one provided
15015// (regardless of alignment). We search up and down the chain, looking though
15016// token factors and other loads (but nothing else). As a result, a true result
15017// indicates that it is safe to create a new consecutive load adjacent to the
15018// load provided.
15019static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
15020 SDValue Chain = LD->getChain();
15021 EVT VT = LD->getMemoryVT();
15022
15023 SmallPtrSet<SDNode *, 16> LoadRoots;
15024 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15025 SmallPtrSet<SDNode *, 16> Visited;
15026
15027 // First, search up the chain, branching to follow all token-factor operands.
15028 // If we find a consecutive load, then we're done, otherwise, record all
15029 // nodes just above the top-level loads and token factors.
15030 while (!Queue.empty()) {
15031 SDNode *ChainNext = Queue.pop_back_val();
15032 if (!Visited.insert(Ptr: ChainNext).second)
15033 continue;
15034
15035 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: ChainNext)) {
15036 if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: 1, DAG))
15037 return true;
15038
15039 if (!Visited.count(Ptr: ChainLD->getChain().getNode()))
15040 Queue.push_back(Elt: ChainLD->getChain().getNode());
15041 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15042 for (const SDUse &O : ChainNext->ops())
15043 if (!Visited.count(Ptr: O.getNode()))
15044 Queue.push_back(Elt: O.getNode());
15045 } else
15046 LoadRoots.insert(Ptr: ChainNext);
15047 }
15048
15049 // Second, search down the chain, starting from the top-level nodes recorded
15050 // in the first phase. These top-level nodes are the nodes just above all
15051 // loads and token factors. Starting with their uses, recursively look though
15052 // all loads (just the chain uses) and token factors to find a consecutive
15053 // load.
15054 Visited.clear();
15055 Queue.clear();
15056
15057 for (SDNode *I : LoadRoots) {
15058 Queue.push_back(Elt: I);
15059
15060 while (!Queue.empty()) {
15061 SDNode *LoadRoot = Queue.pop_back_val();
15062 if (!Visited.insert(Ptr: LoadRoot).second)
15063 continue;
15064
15065 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: LoadRoot))
15066 if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: 1, DAG))
15067 return true;
15068
15069 for (SDNode *U : LoadRoot->users())
15070 if (((isa<MemSDNode>(Val: U) &&
15071 cast<MemSDNode>(Val: U)->getChain().getNode() == LoadRoot) ||
15072 U->getOpcode() == ISD::TokenFactor) &&
15073 !Visited.count(Ptr: U))
15074 Queue.push_back(Elt: U);
15075 }
15076 }
15077
15078 return false;
15079}
15080
15081/// This function is called when we have proved that a SETCC node can be replaced
15082/// by subtraction (and other supporting instructions) so that the result of
15083/// comparison is kept in a GPR instead of CR. This function is purely for
15084/// codegen purposes and has some flags to guide the codegen process.
15085static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15086 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15087 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15088
15089 // Zero extend the operands to the largest legal integer. Originally, they
15090 // must be of a strictly smaller size.
15091 auto Op0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: 0),
15092 N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
15093 auto Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: 1),
15094 N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
15095
15096 // Swap if needed. Depends on the condition code.
15097 if (Swap)
15098 std::swap(a&: Op0, b&: Op1);
15099
15100 // Subtract extended integers.
15101 auto SubNode = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: Op0, N2: Op1);
15102
15103 // Move the sign bit to the least significant position and zero out the rest.
15104 // Now the least significant bit carries the result of original comparison.
15105 auto Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SubNode,
15106 N2: DAG.getConstant(Val: Size - 1, DL, VT: MVT::i32));
15107 auto Final = Shifted;
15108
15109 // Complement the result if needed. Based on the condition code.
15110 if (Complement)
15111 Final = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Shifted,
15112 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i64));
15113
15114 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Final);
15115}
15116
15117SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15118 DAGCombinerInfo &DCI) const {
15119 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15120
15121 SelectionDAG &DAG = DCI.DAG;
15122 SDLoc DL(N);
15123
15124 // Size of integers being compared has a critical role in the following
15125 // analysis, so we prefer to do this when all types are legal.
15126 if (!DCI.isAfterLegalizeDAG())
15127 return SDValue();
15128
15129 // If all users of SETCC extend its value to a legal integer type
15130 // then we replace SETCC with a subtraction
15131 for (const SDNode *U : N->users())
15132 if (U->getOpcode() != ISD::ZERO_EXTEND)
15133 return SDValue();
15134
15135 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15136 auto OpSize = N->getOperand(Num: 0).getValueSizeInBits();
15137
15138 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
15139
15140 if (OpSize < Size) {
15141 switch (CC) {
15142 default: break;
15143 case ISD::SETULT:
15144 return generateEquivalentSub(N, Size, Complement: false, Swap: false, DL, DAG);
15145 case ISD::SETULE:
15146 return generateEquivalentSub(N, Size, Complement: true, Swap: true, DL, DAG);
15147 case ISD::SETUGT:
15148 return generateEquivalentSub(N, Size, Complement: false, Swap: true, DL, DAG);
15149 case ISD::SETUGE:
15150 return generateEquivalentSub(N, Size, Complement: true, Swap: false, DL, DAG);
15151 }
15152 }
15153
15154 return SDValue();
15155}
15156
15157SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15158 DAGCombinerInfo &DCI) const {
15159 SelectionDAG &DAG = DCI.DAG;
15160 SDLoc dl(N);
15161
15162 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15163 // If we're tracking CR bits, we need to be careful that we don't have:
15164 // trunc(binary-ops(zext(x), zext(y)))
15165 // or
15166 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15167 // such that we're unnecessarily moving things into GPRs when it would be
15168 // better to keep them in CR bits.
15169
15170 // Note that trunc here can be an actual i1 trunc, or can be the effective
15171 // truncation that comes from a setcc or select_cc.
15172 if (N->getOpcode() == ISD::TRUNCATE &&
15173 N->getValueType(ResNo: 0) != MVT::i1)
15174 return SDValue();
15175
15176 if (N->getOperand(Num: 0).getValueType() != MVT::i32 &&
15177 N->getOperand(Num: 0).getValueType() != MVT::i64)
15178 return SDValue();
15179
15180 if (N->getOpcode() == ISD::SETCC ||
15181 N->getOpcode() == ISD::SELECT_CC) {
15182 // If we're looking at a comparison, then we need to make sure that the
15183 // high bits (all except for the first) don't matter the result.
15184 ISD::CondCode CC =
15185 cast<CondCodeSDNode>(Val: N->getOperand(
15186 Num: N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15187 unsigned OpBits = N->getOperand(Num: 0).getValueSizeInBits();
15188
15189 if (ISD::isSignedIntSetCC(Code: CC)) {
15190 if (DAG.ComputeNumSignBits(Op: N->getOperand(Num: 0)) != OpBits ||
15191 DAG.ComputeNumSignBits(Op: N->getOperand(Num: 1)) != OpBits)
15192 return SDValue();
15193 } else if (ISD::isUnsignedIntSetCC(Code: CC)) {
15194 if (!DAG.MaskedValueIsZero(Op: N->getOperand(Num: 0),
15195 Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-1)) ||
15196 !DAG.MaskedValueIsZero(Op: N->getOperand(Num: 1),
15197 Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-1)))
15198 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15199 : SDValue());
15200 } else {
15201 // This is neither a signed nor an unsigned comparison, just make sure
15202 // that the high bits are equal.
15203 KnownBits Op1Known = DAG.computeKnownBits(Op: N->getOperand(Num: 0));
15204 KnownBits Op2Known = DAG.computeKnownBits(Op: N->getOperand(Num: 1));
15205
15206 // We don't really care about what is known about the first bit (if
15207 // anything), so pretend that it is known zero for both to ensure they can
15208 // be compared as constants.
15209 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(BitPosition: 0);
15210 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(BitPosition: 0);
15211
15212 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15213 Op1Known.getConstant() != Op2Known.getConstant())
15214 return SDValue();
15215 }
15216 }
15217
15218 // We now know that the higher-order bits are irrelevant, we just need to
15219 // make sure that all of the intermediate operations are bit operations, and
15220 // all inputs are extensions.
15221 if (N->getOperand(Num: 0).getOpcode() != ISD::AND &&
15222 N->getOperand(Num: 0).getOpcode() != ISD::OR &&
15223 N->getOperand(Num: 0).getOpcode() != ISD::XOR &&
15224 N->getOperand(Num: 0).getOpcode() != ISD::SELECT &&
15225 N->getOperand(Num: 0).getOpcode() != ISD::SELECT_CC &&
15226 N->getOperand(Num: 0).getOpcode() != ISD::TRUNCATE &&
15227 N->getOperand(Num: 0).getOpcode() != ISD::SIGN_EXTEND &&
15228 N->getOperand(Num: 0).getOpcode() != ISD::ZERO_EXTEND &&
15229 N->getOperand(Num: 0).getOpcode() != ISD::ANY_EXTEND)
15230 return SDValue();
15231
15232 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15233 N->getOperand(Num: 1).getOpcode() != ISD::AND &&
15234 N->getOperand(Num: 1).getOpcode() != ISD::OR &&
15235 N->getOperand(Num: 1).getOpcode() != ISD::XOR &&
15236 N->getOperand(Num: 1).getOpcode() != ISD::SELECT &&
15237 N->getOperand(Num: 1).getOpcode() != ISD::SELECT_CC &&
15238 N->getOperand(Num: 1).getOpcode() != ISD::TRUNCATE &&
15239 N->getOperand(Num: 1).getOpcode() != ISD::SIGN_EXTEND &&
15240 N->getOperand(Num: 1).getOpcode() != ISD::ZERO_EXTEND &&
15241 N->getOperand(Num: 1).getOpcode() != ISD::ANY_EXTEND)
15242 return SDValue();
15243
15244 SmallVector<SDValue, 4> Inputs;
15245 SmallVector<SDValue, 8> BinOps, PromOps;
15246 SmallPtrSet<SDNode *, 16> Visited;
15247
15248 for (unsigned i = 0; i < 2; ++i) {
15249 if (((N->getOperand(Num: i).getOpcode() == ISD::SIGN_EXTEND ||
15250 N->getOperand(Num: i).getOpcode() == ISD::ZERO_EXTEND ||
15251 N->getOperand(Num: i).getOpcode() == ISD::ANY_EXTEND) &&
15252 N->getOperand(Num: i).getOperand(i: 0).getValueType() == MVT::i1) ||
15253 isa<ConstantSDNode>(Val: N->getOperand(Num: i)))
15254 Inputs.push_back(Elt: N->getOperand(Num: i));
15255 else
15256 BinOps.push_back(Elt: N->getOperand(Num: i));
15257
15258 if (N->getOpcode() == ISD::TRUNCATE)
15259 break;
15260 }
15261
15262 // Visit all inputs, collect all binary operations (and, or, xor and
15263 // select) that are all fed by extensions.
15264 while (!BinOps.empty()) {
15265 SDValue BinOp = BinOps.pop_back_val();
15266
15267 if (!Visited.insert(Ptr: BinOp.getNode()).second)
15268 continue;
15269
15270 PromOps.push_back(Elt: BinOp);
15271
15272 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15273 // The condition of the select is not promoted.
15274 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15275 continue;
15276 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15277 continue;
15278
15279 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15280 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15281 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15282 BinOp.getOperand(i).getOperand(i: 0).getValueType() == MVT::i1) ||
15283 isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15284 Inputs.push_back(Elt: BinOp.getOperand(i));
15285 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15286 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15287 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15288 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15289 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15290 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15291 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15292 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15293 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15294 BinOps.push_back(Elt: BinOp.getOperand(i));
15295 } else {
15296 // We have an input that is not an extension or another binary
15297 // operation; we'll abort this transformation.
15298 return SDValue();
15299 }
15300 }
15301 }
15302
15303 // Make sure that this is a self-contained cluster of operations (which
15304 // is not quite the same thing as saying that everything has only one
15305 // use).
15306 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15307 if (isa<ConstantSDNode>(Val: Inputs[i]))
15308 continue;
15309
15310 for (const SDNode *User : Inputs[i].getNode()->users()) {
15311 if (User != N && !Visited.count(Ptr: User))
15312 return SDValue();
15313
15314 // Make sure that we're not going to promote the non-output-value
15315 // operand(s) or SELECT or SELECT_CC.
15316 // FIXME: Although we could sometimes handle this, and it does occur in
15317 // practice that one of the condition inputs to the select is also one of
15318 // the outputs, we currently can't deal with this.
15319 if (User->getOpcode() == ISD::SELECT) {
15320 if (User->getOperand(Num: 0) == Inputs[i])
15321 return SDValue();
15322 } else if (User->getOpcode() == ISD::SELECT_CC) {
15323 if (User->getOperand(Num: 0) == Inputs[i] ||
15324 User->getOperand(Num: 1) == Inputs[i])
15325 return SDValue();
15326 }
15327 }
15328 }
15329
15330 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15331 for (const SDNode *User : PromOps[i].getNode()->users()) {
15332 if (User != N && !Visited.count(Ptr: User))
15333 return SDValue();
15334
15335 // Make sure that we're not going to promote the non-output-value
15336 // operand(s) or SELECT or SELECT_CC.
15337 // FIXME: Although we could sometimes handle this, and it does occur in
15338 // practice that one of the condition inputs to the select is also one of
15339 // the outputs, we currently can't deal with this.
15340 if (User->getOpcode() == ISD::SELECT) {
15341 if (User->getOperand(Num: 0) == PromOps[i])
15342 return SDValue();
15343 } else if (User->getOpcode() == ISD::SELECT_CC) {
15344 if (User->getOperand(Num: 0) == PromOps[i] ||
15345 User->getOperand(Num: 1) == PromOps[i])
15346 return SDValue();
15347 }
15348 }
15349 }
15350
15351 // Replace all inputs with the extension operand.
15352 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15353 // Constants may have users outside the cluster of to-be-promoted nodes,
15354 // and so we need to replace those as we do the promotions.
15355 if (isa<ConstantSDNode>(Val: Inputs[i]))
15356 continue;
15357 else
15358 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i], To: Inputs[i].getOperand(i: 0));
15359 }
15360
15361 std::list<HandleSDNode> PromOpHandles;
15362 for (auto &PromOp : PromOps)
15363 PromOpHandles.emplace_back(args&: PromOp);
15364
15365 // Replace all operations (these are all the same, but have a different
15366 // (i1) return type). DAG.getNode will validate that the types of
15367 // a binary operator match, so go through the list in reverse so that
15368 // we've likely promoted both operands first. Any intermediate truncations or
15369 // extensions disappear.
15370 while (!PromOpHandles.empty()) {
15371 SDValue PromOp = PromOpHandles.back().getValue();
15372 PromOpHandles.pop_back();
15373
15374 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15375 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15376 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15377 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15378 if (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: 0)) &&
15379 PromOp.getOperand(i: 0).getValueType() != MVT::i1) {
15380 // The operand is not yet ready (see comment below).
15381 PromOpHandles.emplace_front(args&: PromOp);
15382 continue;
15383 }
15384
15385 SDValue RepValue = PromOp.getOperand(i: 0);
15386 if (isa<ConstantSDNode>(Val: RepValue))
15387 RepValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: RepValue);
15388
15389 DAG.ReplaceAllUsesOfValueWith(From: PromOp, To: RepValue);
15390 continue;
15391 }
15392
15393 unsigned C;
15394 switch (PromOp.getOpcode()) {
15395 default: C = 0; break;
15396 case ISD::SELECT: C = 1; break;
15397 case ISD::SELECT_CC: C = 2; break;
15398 }
15399
15400 if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15401 PromOp.getOperand(i: C).getValueType() != MVT::i1) ||
15402 (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+1)) &&
15403 PromOp.getOperand(i: C+1).getValueType() != MVT::i1)) {
15404 // The to-be-promoted operands of this node have not yet been
15405 // promoted (this should be rare because we're going through the
15406 // list backward, but if one of the operands has several users in
15407 // this cluster of to-be-promoted nodes, it is possible).
15408 PromOpHandles.emplace_front(args&: PromOp);
15409 continue;
15410 }
15411
15412 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15413
15414 // If there are any constant inputs, make sure they're replaced now.
15415 for (unsigned i = 0; i < 2; ++i)
15416 if (isa<ConstantSDNode>(Val: Ops[C+i]))
15417 Ops[C+i] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: Ops[C+i]);
15418
15419 DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15420 To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: MVT::i1, Ops));
15421 }
15422
15423 // Now we're left with the initial truncation itself.
15424 if (N->getOpcode() == ISD::TRUNCATE)
15425 return N->getOperand(Num: 0);
15426
15427 // Otherwise, this is a comparison. The operands to be compared have just
15428 // changed type (to i1), but everything else is the same.
15429 return SDValue(N, 0);
15430}
15431
15432SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15433 DAGCombinerInfo &DCI) const {
15434 SelectionDAG &DAG = DCI.DAG;
15435 SDLoc dl(N);
15436
15437 // If we're tracking CR bits, we need to be careful that we don't have:
15438 // zext(binary-ops(trunc(x), trunc(y)))
15439 // or
15440 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15441 // such that we're unnecessarily moving things into CR bits that can more
15442 // efficiently stay in GPRs. Note that if we're not certain that the high
15443 // bits are set as required by the final extension, we still may need to do
15444 // some masking to get the proper behavior.
15445
15446 // This same functionality is important on PPC64 when dealing with
15447 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15448 // the return values of functions. Because it is so similar, it is handled
15449 // here as well.
15450
15451 if (N->getValueType(ResNo: 0) != MVT::i32 &&
15452 N->getValueType(ResNo: 0) != MVT::i64)
15453 return SDValue();
15454
15455 if (!((N->getOperand(Num: 0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15456 (N->getOperand(Num: 0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15457 return SDValue();
15458
15459 if (N->getOperand(Num: 0).getOpcode() != ISD::AND &&
15460 N->getOperand(Num: 0).getOpcode() != ISD::OR &&
15461 N->getOperand(Num: 0).getOpcode() != ISD::XOR &&
15462 N->getOperand(Num: 0).getOpcode() != ISD::SELECT &&
15463 N->getOperand(Num: 0).getOpcode() != ISD::SELECT_CC)
15464 return SDValue();
15465
15466 SmallVector<SDValue, 4> Inputs;
15467 SmallVector<SDValue, 8> BinOps(1, N->getOperand(Num: 0)), PromOps;
15468 SmallPtrSet<SDNode *, 16> Visited;
15469
15470 // Visit all inputs, collect all binary operations (and, or, xor and
15471 // select) that are all fed by truncations.
15472 while (!BinOps.empty()) {
15473 SDValue BinOp = BinOps.pop_back_val();
15474
15475 if (!Visited.insert(Ptr: BinOp.getNode()).second)
15476 continue;
15477
15478 PromOps.push_back(Elt: BinOp);
15479
15480 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15481 // The condition of the select is not promoted.
15482 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15483 continue;
15484 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15485 continue;
15486
15487 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15488 isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15489 Inputs.push_back(Elt: BinOp.getOperand(i));
15490 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15491 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15492 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15493 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15494 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15495 BinOps.push_back(Elt: BinOp.getOperand(i));
15496 } else {
15497 // We have an input that is not a truncation or another binary
15498 // operation; we'll abort this transformation.
15499 return SDValue();
15500 }
15501 }
15502 }
15503
15504 // The operands of a select that must be truncated when the select is
15505 // promoted because the operand is actually part of the to-be-promoted set.
15506 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15507
15508 // Make sure that this is a self-contained cluster of operations (which
15509 // is not quite the same thing as saying that everything has only one
15510 // use).
15511 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15512 if (isa<ConstantSDNode>(Val: Inputs[i]))
15513 continue;
15514
15515 for (SDNode *User : Inputs[i].getNode()->users()) {
15516 if (User != N && !Visited.count(Ptr: User))
15517 return SDValue();
15518
15519 // If we're going to promote the non-output-value operand(s) or SELECT or
15520 // SELECT_CC, record them for truncation.
15521 if (User->getOpcode() == ISD::SELECT) {
15522 if (User->getOperand(Num: 0) == Inputs[i])
15523 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15524 y: User->getOperand(Num: 0).getValueType()));
15525 } else if (User->getOpcode() == ISD::SELECT_CC) {
15526 if (User->getOperand(Num: 0) == Inputs[i])
15527 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15528 y: User->getOperand(Num: 0).getValueType()));
15529 if (User->getOperand(Num: 1) == Inputs[i])
15530 SelectTruncOp[1].insert(KV: std::make_pair(x&: User,
15531 y: User->getOperand(Num: 1).getValueType()));
15532 }
15533 }
15534 }
15535
15536 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15537 for (SDNode *User : PromOps[i].getNode()->users()) {
15538 if (User != N && !Visited.count(Ptr: User))
15539 return SDValue();
15540
15541 // If we're going to promote the non-output-value operand(s) or SELECT or
15542 // SELECT_CC, record them for truncation.
15543 if (User->getOpcode() == ISD::SELECT) {
15544 if (User->getOperand(Num: 0) == PromOps[i])
15545 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15546 y: User->getOperand(Num: 0).getValueType()));
15547 } else if (User->getOpcode() == ISD::SELECT_CC) {
15548 if (User->getOperand(Num: 0) == PromOps[i])
15549 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15550 y: User->getOperand(Num: 0).getValueType()));
15551 if (User->getOperand(Num: 1) == PromOps[i])
15552 SelectTruncOp[1].insert(KV: std::make_pair(x&: User,
15553 y: User->getOperand(Num: 1).getValueType()));
15554 }
15555 }
15556 }
15557
15558 unsigned PromBits = N->getOperand(Num: 0).getValueSizeInBits();
15559 bool ReallyNeedsExt = false;
15560 if (N->getOpcode() != ISD::ANY_EXTEND) {
15561 // If all of the inputs are not already sign/zero extended, then
15562 // we'll still need to do that at the end.
15563 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15564 if (isa<ConstantSDNode>(Val: Inputs[i]))
15565 continue;
15566
15567 unsigned OpBits =
15568 Inputs[i].getOperand(i: 0).getValueSizeInBits();
15569 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15570
15571 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15572 !DAG.MaskedValueIsZero(Op: Inputs[i].getOperand(i: 0),
15573 Mask: APInt::getHighBitsSet(numBits: OpBits,
15574 hiBitsSet: OpBits-PromBits))) ||
15575 (N->getOpcode() == ISD::SIGN_EXTEND &&
15576 DAG.ComputeNumSignBits(Op: Inputs[i].getOperand(i: 0)) <
15577 (OpBits-(PromBits-1)))) {
15578 ReallyNeedsExt = true;
15579 break;
15580 }
15581 }
15582 }
15583
15584 // Convert PromOps to handles before doing any RAUW operations, as these
15585 // may CSE with existing nodes, deleting the originals.
15586 std::list<HandleSDNode> PromOpHandles;
15587 for (auto &PromOp : PromOps)
15588 PromOpHandles.emplace_back(args&: PromOp);
15589
15590 // Replace all inputs, either with the truncation operand, or a
15591 // truncation or extension to the final output type.
15592 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15593 // Constant inputs need to be replaced with the to-be-promoted nodes that
15594 // use them because they might have users outside of the cluster of
15595 // promoted nodes.
15596 if (isa<ConstantSDNode>(Val: Inputs[i]))
15597 continue;
15598
15599 SDValue InSrc = Inputs[i].getOperand(i: 0);
15600 if (Inputs[i].getValueType() == N->getValueType(ResNo: 0))
15601 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i], To: InSrc);
15602 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15603 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15604 To: DAG.getSExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15605 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15606 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15607 To: DAG.getZExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15608 else
15609 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15610 To: DAG.getAnyExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15611 }
15612
15613 // Replace all operations (these are all the same, but have a different
15614 // (promoted) return type). DAG.getNode will validate that the types of
15615 // a binary operator match, so go through the list in reverse so that
15616 // we've likely promoted both operands first.
15617 while (!PromOpHandles.empty()) {
15618 SDValue PromOp = PromOpHandles.back().getValue();
15619 PromOpHandles.pop_back();
15620
15621 unsigned C;
15622 switch (PromOp.getOpcode()) {
15623 default: C = 0; break;
15624 case ISD::SELECT: C = 1; break;
15625 case ISD::SELECT_CC: C = 2; break;
15626 }
15627
15628 if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15629 PromOp.getOperand(i: C).getValueType() != N->getValueType(ResNo: 0)) ||
15630 (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+1)) &&
15631 PromOp.getOperand(i: C+1).getValueType() != N->getValueType(ResNo: 0))) {
15632 // The to-be-promoted operands of this node have not yet been
15633 // promoted (this should be rare because we're going through the
15634 // list backward, but if one of the operands has several users in
15635 // this cluster of to-be-promoted nodes, it is possible).
15636 PromOpHandles.emplace_front(args&: PromOp);
15637 continue;
15638 }
15639
15640 // For SELECT and SELECT_CC nodes, we do a similar check for any
15641 // to-be-promoted comparison inputs.
15642 if (PromOp.getOpcode() == ISD::SELECT ||
15643 PromOp.getOpcode() == ISD::SELECT_CC) {
15644 if ((SelectTruncOp[0].count(Val: PromOp.getNode()) &&
15645 PromOp.getOperand(i: 0).getValueType() != N->getValueType(ResNo: 0)) ||
15646 (SelectTruncOp[1].count(Val: PromOp.getNode()) &&
15647 PromOp.getOperand(i: 1).getValueType() != N->getValueType(ResNo: 0))) {
15648 PromOpHandles.emplace_front(args&: PromOp);
15649 continue;
15650 }
15651 }
15652
15653 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15654
15655 // If this node has constant inputs, then they'll need to be promoted here.
15656 for (unsigned i = 0; i < 2; ++i) {
15657 if (!isa<ConstantSDNode>(Val: Ops[C+i]))
15658 continue;
15659 if (Ops[C+i].getValueType() == N->getValueType(ResNo: 0))
15660 continue;
15661
15662 if (N->getOpcode() == ISD::SIGN_EXTEND)
15663 Ops[C+i] = DAG.getSExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15664 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15665 Ops[C+i] = DAG.getZExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15666 else
15667 Ops[C+i] = DAG.getAnyExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15668 }
15669
15670 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15671 // truncate them again to the original value type.
15672 if (PromOp.getOpcode() == ISD::SELECT ||
15673 PromOp.getOpcode() == ISD::SELECT_CC) {
15674 auto SI0 = SelectTruncOp[0].find(Val: PromOp.getNode());
15675 if (SI0 != SelectTruncOp[0].end())
15676 Ops[0] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI0->second, Operand: Ops[0]);
15677 auto SI1 = SelectTruncOp[1].find(Val: PromOp.getNode());
15678 if (SI1 != SelectTruncOp[1].end())
15679 Ops[1] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI1->second, Operand: Ops[1]);
15680 }
15681
15682 DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15683 To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: N->getValueType(ResNo: 0), Ops));
15684 }
15685
15686 // Now we're left with the initial extension itself.
15687 if (!ReallyNeedsExt)
15688 return N->getOperand(Num: 0);
15689
15690 // To zero extend, just mask off everything except for the first bit (in the
15691 // i1 case).
15692 if (N->getOpcode() == ISD::ZERO_EXTEND)
15693 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 0),
15694 N2: DAG.getConstant(Val: APInt::getLowBitsSet(
15695 numBits: N->getValueSizeInBits(ResNo: 0), loBitsSet: PromBits),
15696 DL: dl, VT: N->getValueType(ResNo: 0)));
15697
15698 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15699 "Invalid extension type");
15700 EVT ShiftAmountTy = getShiftAmountTy(LHSTy: N->getValueType(ResNo: 0), DL: DAG.getDataLayout());
15701 SDValue ShiftCst =
15702 DAG.getConstant(Val: N->getValueSizeInBits(ResNo: 0) - PromBits, DL: dl, VT: ShiftAmountTy);
15703 return DAG.getNode(
15704 Opcode: ISD::SRA, DL: dl, VT: N->getValueType(ResNo: 0),
15705 N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 0), N2: ShiftCst),
15706 N2: ShiftCst);
15707}
15708
15709// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15710static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS) {
15711
15712 auto isValidForConvert = [](SDValue &Operand) {
15713 if (!Operand.hasOneUse())
15714 return false;
15715
15716 if (Operand.getValueType() != MVT::i128)
15717 return false;
15718
15719 if (Operand.getOpcode() == ISD::Constant)
15720 return true;
15721
15722 auto *LoadNode = dyn_cast<LoadSDNode>(Val&: Operand);
15723 if (!LoadNode)
15724 return false;
15725
15726 // If memory operation is volatile, do not perform any
15727 // optimization or transformation. Volatile operations must be preserved
15728 // as written to ensure correct program behavior, so we return an empty
15729 // SDValue to indicate no action.
15730
15731 if (LoadNode->isVolatile())
15732 return false;
15733
15734 // Only combine loads if both use the unindexed addressing mode.
15735 // PowerPC AltiVec/VMX does not support vector loads or stores with
15736 // pre/post-increment addressing. Indexed modes may imply implicit
15737 // pointer updates, which are not compatible with AltiVec vector
15738 // instructions.
15739 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15740 return false;
15741
15742 // Only combine loads if both are non-extending loads
15743 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15744 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15745 // loaded value's semantics and are not compatible with vector loads.
15746 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15747 return false;
15748
15749 return true;
15750 };
15751
15752 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15753}
15754
15755SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N,
15756 const SDLoc &DL) {
15757
15758 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15759
15760 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15761 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15762 "CC mus be ISD::SETNE or ISD::SETEQ");
15763
15764 auto getV16i8Load = [&](const SDValue &Operand) {
15765 if (Operand.getOpcode() == ISD::Constant)
15766 return DAG.getBitcast(VT: MVT::v16i8, V: Operand);
15767
15768 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15769
15770 auto *LoadNode = cast<LoadSDNode>(Val: Operand);
15771 return DAG.getLoad(VT: MVT::v16i8, dl: DL, Chain: LoadNode->getChain(),
15772 Ptr: LoadNode->getBasePtr(), MMO: LoadNode->getMemOperand());
15773 };
15774
15775 // Following code transforms the DAG
15776 // t0: ch,glue = EntryToken
15777 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15778 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15779 // undef:i64
15780 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15781 // t5: i128,ch =
15782 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15783 // setcc t3, t5, setne:ch
15784 //
15785 // ---->
15786 //
15787 // t0: ch,glue = EntryToken
15788 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15789 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15790 // undef:i64
15791 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15792 // t5: v16i8,ch =
15793 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15794 // t6: i32 =
15795 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15796 // Constant:i32<2>, t3, t5
15797 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15798
15799 // Or transforms the DAG
15800 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15801 // t8: i1 =
15802 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15803 //
15804 // --->
15805 //
15806 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15807 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15808 // t7: i32 =
15809 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15810
15811 SDValue LHSVec = getV16i8Load(N->getOperand(Num: 0));
15812 SDValue RHSVec = getV16i8Load(N->getOperand(Num: 1));
15813
15814 SDValue IntrID =
15815 DAG.getConstant(Val: Intrinsic::ppc_altivec_vcmpequb_p, DL, VT: MVT::i32);
15816 SDValue CRSel = DAG.getConstant(Val: 2, DL, VT: MVT::i32); // which CR6 predicate field
15817 SDValue PredResult = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
15818 N1: IntrID, N2: CRSel, N3: LHSVec, N4: RHSVec);
15819 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15820 // so we need to invert the CC opcode.
15821 return DAG.getSetCC(DL, VT: N->getValueType(ResNo: 0), LHS: PredResult,
15822 RHS: DAG.getConstant(Val: 0, DL, VT: MVT::i32),
15823 Cond: CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15824}
15825
15826// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15827// If it is , return true; otherwise return false.
15828static bool canConvertSETCCToXori(SDNode *N) {
15829 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15830
15831 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15832 if (CC != ISD::SETEQ)
15833 return false;
15834
15835 SDValue LHS = N->getOperand(Num: 0);
15836 SDValue RHS = N->getOperand(Num: 1);
15837
15838 // Check the `SDValue &V` is from `and` with `1`.
15839 auto IsAndWithOne = [](SDValue &V) {
15840 if (V.getOpcode() == ISD::AND) {
15841 for (const SDValue &Op : V->ops())
15842 if (auto *C = dyn_cast<ConstantSDNode>(Val: Op))
15843 if (C->isOne())
15844 return true;
15845 }
15846 return false;
15847 };
15848
15849 // Check whether the SETCC compare with zero.
15850 auto IsCompareWithZero = [](SDValue &V) {
15851 if (auto *C = dyn_cast<ConstantSDNode>(Val&: V))
15852 if (C->isZero())
15853 return true;
15854 return false;
15855 };
15856
15857 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15858 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15859}
15860
15861// You must check whether the `SDNode* N` can be converted to Xori using
15862// the function `static bool canConvertSETCCToXori(SDNode *N)`
15863// before calling the function; otherwise, it may produce incorrect results.
15864static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG) {
15865
15866 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15867 SDValue LHS = N->getOperand(Num: 0);
15868 SDValue RHS = N->getOperand(Num: 1);
15869 SDLoc DL(N);
15870
15871 [[maybe_unused]] ISD::CondCode CC =
15872 cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15873 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15874 // Rewrite it as XORI (and X, 1), 1.
15875 auto MakeXor1 = [&](SDValue V) {
15876 EVT VT = V.getValueType();
15877 SDValue One = DAG.getConstant(Val: 1, DL, VT);
15878 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: V, N2: One);
15879 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Xor);
15880 };
15881
15882 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15883 return MakeXor1(LHS);
15884
15885 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15886 return MakeXor1(RHS);
15887
15888 llvm_unreachable("Should not reach here.");
15889}
15890
15891SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15892 DAGCombinerInfo &DCI) const {
15893 assert(N->getOpcode() == ISD::SETCC &&
15894 "Should be called with a SETCC node");
15895
15896 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15897 // If it is, rewrite it as XORI (and X, 1), 1.
15898 if (canConvertSETCCToXori(N))
15899 return ConvertSETCCToXori(N, DAG&: DCI.DAG);
15900
15901 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15902 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15903 SDValue LHS = N->getOperand(Num: 0);
15904 SDValue RHS = N->getOperand(Num: 1);
15905
15906 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15907 if (LHS.getOpcode() == ISD::SUB && isNullConstant(V: LHS.getOperand(i: 0)) &&
15908 LHS.hasOneUse())
15909 std::swap(a&: LHS, b&: RHS);
15910
15911 // x == 0-y --> x+y == 0
15912 // x != 0-y --> x+y != 0
15913 if (RHS.getOpcode() == ISD::SUB && isNullConstant(V: RHS.getOperand(i: 0)) &&
15914 RHS.hasOneUse()) {
15915 SDLoc DL(N);
15916 SelectionDAG &DAG = DCI.DAG;
15917 EVT VT = N->getValueType(ResNo: 0);
15918 EVT OpVT = LHS.getValueType();
15919 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS, N2: RHS.getOperand(i: 1));
15920 return DAG.getSetCC(DL, VT, LHS: Add, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond: CC);
15921 }
15922
15923 // Optimization: Fold i128 equality/inequality compares of two loads into a
15924 // vectorized compare using vcmpequb.p when Altivec is available.
15925 //
15926 // Rationale:
15927 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15928 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15929 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15930 // perform a full 128-bit equality check in a single vector compare.
15931 //
15932 // Example Result:
15933 // This transformation replaces memcmp(a, b, 16) with two vector loads
15934 // and one vector compare instruction.
15935
15936 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15937 return convertTwoLoadsAndCmpToVCMPEQUB(DAG&: DCI.DAG, N, DL: SDLoc(N));
15938 }
15939
15940 return DAGCombineTruncBoolExt(N, DCI);
15941}
15942
15943// Is this an extending load from an f32 to an f64?
15944static bool isFPExtLoad(SDValue Op) {
15945 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: Op.getNode()))
15946 return LD->getExtensionType() == ISD::EXTLOAD &&
15947 Op.getValueType() == MVT::f64;
15948 return false;
15949}
15950
15951/// Reduces the number of fp-to-int conversion when building a vector.
15952///
15953/// If this vector is built out of floating to integer conversions,
15954/// transform it to a vector built out of floating point values followed by a
15955/// single floating to integer conversion of the vector.
15956/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15957/// becomes (fptosi (build_vector ($A, $B, ...)))
15958SDValue PPCTargetLowering::
15959combineElementTruncationToVectorTruncation(SDNode *N,
15960 DAGCombinerInfo &DCI) const {
15961 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15962 "Should be called with a BUILD_VECTOR node");
15963
15964 SelectionDAG &DAG = DCI.DAG;
15965 SDLoc dl(N);
15966
15967 SDValue FirstInput = N->getOperand(Num: 0);
15968 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15969 "The input operand must be an fp-to-int conversion.");
15970
15971 // This combine happens after legalization so the fp_to_[su]i nodes are
15972 // already converted to PPCSISD nodes.
15973 unsigned FirstConversion = FirstInput.getOperand(i: 0).getOpcode();
15974 if (FirstConversion == PPCISD::FCTIDZ ||
15975 FirstConversion == PPCISD::FCTIDUZ ||
15976 FirstConversion == PPCISD::FCTIWZ ||
15977 FirstConversion == PPCISD::FCTIWUZ) {
15978 bool IsSplat = true;
15979 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15980 FirstConversion == PPCISD::FCTIWUZ;
15981 EVT SrcVT = FirstInput.getOperand(i: 0).getValueType();
15982 SmallVector<SDValue, 4> Ops;
15983 EVT TargetVT = N->getValueType(ResNo: 0);
15984 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15985 SDValue NextOp = N->getOperand(Num: i);
15986 if (NextOp.getOpcode() != PPCISD::MFVSR)
15987 return SDValue();
15988 unsigned NextConversion = NextOp.getOperand(i: 0).getOpcode();
15989 if (NextConversion != FirstConversion)
15990 return SDValue();
15991 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15992 // This is not valid if the input was originally double precision. It is
15993 // also not profitable to do unless this is an extending load in which
15994 // case doing this combine will allow us to combine consecutive loads.
15995 if (Is32Bit && !isFPExtLoad(Op: NextOp.getOperand(i: 0).getOperand(i: 0)))
15996 return SDValue();
15997 if (N->getOperand(Num: i) != FirstInput)
15998 IsSplat = false;
15999 }
16000
16001 // If this is a splat, we leave it as-is since there will be only a single
16002 // fp-to-int conversion followed by a splat of the integer. This is better
16003 // for 32-bit and smaller ints and neutral for 64-bit ints.
16004 if (IsSplat)
16005 return SDValue();
16006
16007 // Now that we know we have the right type of node, get its operands
16008 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16009 SDValue In = N->getOperand(Num: i).getOperand(i: 0);
16010 if (Is32Bit) {
16011 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16012 // here, we know that all inputs are extending loads so this is safe).
16013 if (In.isUndef())
16014 Ops.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
16015 else {
16016 SDValue Trunc =
16017 DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: In.getOperand(i: 0),
16018 N2: DAG.getIntPtrConstant(Val: 1, DL: dl, /*isTarget=*/true));
16019 Ops.push_back(Elt: Trunc);
16020 }
16021 } else
16022 Ops.push_back(Elt: In.isUndef() ? DAG.getUNDEF(VT: SrcVT) : In.getOperand(i: 0));
16023 }
16024
16025 unsigned Opcode;
16026 if (FirstConversion == PPCISD::FCTIDZ ||
16027 FirstConversion == PPCISD::FCTIWZ)
16028 Opcode = ISD::FP_TO_SINT;
16029 else
16030 Opcode = ISD::FP_TO_UINT;
16031
16032 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16033 SDValue BV = DAG.getBuildVector(VT: NewVT, DL: dl, Ops);
16034 return DAG.getNode(Opcode, DL: dl, VT: TargetVT, Operand: BV);
16035 }
16036 return SDValue();
16037}
16038
16039// LXVKQ instruction load VSX vector with a special quadword value
16040// based on an immediate value. This helper method returns the details of the
16041// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16042// to help generate the LXVKQ instruction and the subsequent shift instruction
16043// required to match the original build vector pattern.
16044
16045// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16046using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16047
16048static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16049
16050 // LXVKQ instruction loads the Quadword value:
16051 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16052 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16053 static const uint32_t Uim = 16;
16054
16055 // Check for direct LXVKQ match (no shift needed)
16056 if (FullVal == BasePattern)
16057 return std::make_tuple(args: Uim, args: uint8_t{0});
16058
16059 // Check if FullValue is 1 (the result of the base pattern >> 127)
16060 if (FullVal == APInt(128, 1))
16061 return std::make_tuple(args: Uim, args: uint8_t{127});
16062
16063 return std::nullopt;
16064}
16065
16066/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16067/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16068/// LXVKQ instruction load VSX vector with a special quadword value based on an
16069/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16070/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16071/// This can be used to inline the build vector constants that have the
16072/// following patterns:
16073///
16074/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16075/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16076/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16077/// combination of splatting and right shift instructions.
16078
16079SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16080 SelectionDAG &DAG) const {
16081
16082 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16083 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16084
16085 // This transformation is only supported if we are loading either a byte,
16086 // halfword, word, or doubleword.
16087 EVT VT = Op.getValueType();
16088 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16089 VT == MVT::v2i64))
16090 return SDValue();
16091
16092 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16093 << VT.getEVTString() << "): ";
16094 Op->dump());
16095
16096 unsigned NumElems = VT.getVectorNumElements();
16097 unsigned ElemBits = VT.getScalarSizeInBits();
16098
16099 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16100
16101 // Check for Non-constant operand in the build vector.
16102 for (const SDValue &Operand : Op.getNode()->op_values()) {
16103 if (!isa<ConstantSDNode>(Val: Operand))
16104 return SDValue();
16105 }
16106
16107 // Assemble build vector operands as a 128-bit register value
16108 // We need to reconstruct what the 128-bit register pattern would be
16109 // that produces this vector when interpreted with the current endianness
16110 APInt FullVal = APInt::getZero(numBits: 128);
16111
16112 for (unsigned Index = 0; Index < NumElems; ++Index) {
16113 auto *C = cast<ConstantSDNode>(Val: Op.getOperand(i: Index));
16114
16115 // Get element value as raw bits (zero-extended)
16116 uint64_t ElemValue = C->getZExtValue();
16117
16118 // Mask to element size to ensure we only get the relevant bits
16119 if (ElemBits < 64)
16120 ElemValue &= ((1ULL << ElemBits) - 1);
16121
16122 // Calculate bit position for this element in the 128-bit register
16123 unsigned BitPos =
16124 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16125
16126 // Create APInt for the element value and shift it to correct position
16127 APInt ElemAPInt(128, ElemValue);
16128 ElemAPInt <<= BitPos;
16129
16130 // Place the element value at the correct bit position
16131 FullVal |= ElemAPInt;
16132 }
16133
16134 if (FullVal.isZero() || FullVal.isAllOnes())
16135 return SDValue();
16136
16137 if (auto UIMOpt = getPatternInfo(FullVal)) {
16138 const auto &[Uim, ShiftAmount] = *UIMOpt;
16139 SDLoc Dl(Op);
16140
16141 // Generate LXVKQ instruction if the shift amount is zero.
16142 if (ShiftAmount == 0) {
16143 SDValue UimVal = DAG.getTargetConstant(Val: Uim, DL: Dl, VT: MVT::i32);
16144 SDValue LxvkqInstr =
16145 SDValue(DAG.getMachineNode(Opcode: PPC::LXVKQ, dl: Dl, VT, Op1: UimVal), 0);
16146 LLVM_DEBUG(llvm::dbgs()
16147 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16148 LxvkqInstr.dump());
16149 return LxvkqInstr;
16150 }
16151
16152 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16153
16154 // The right shifted pattern can be constructed using a combination of
16155 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16156 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16157 // value 255.
16158 SDValue ShiftAmountVec =
16159 SDValue(DAG.getMachineNode(Opcode: PPC::XXSPLTIB, dl: Dl, VT: MVT::v4i32,
16160 Op1: DAG.getTargetConstant(Val: 255, DL: Dl, VT: MVT::i32)),
16161 0);
16162 // Generate appropriate right shift instruction
16163 SDValue ShiftVec = SDValue(
16164 DAG.getMachineNode(Opcode: PPC::VSRQ, dl: Dl, VT, Op1: ShiftAmountVec, Op2: ShiftAmountVec),
16165 0);
16166 LLVM_DEBUG(llvm::dbgs()
16167 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16168 ShiftVec.dump());
16169 return ShiftVec;
16170 }
16171 // No patterns matched for build vectors.
16172 return SDValue();
16173}
16174
16175/// Reduce the number of loads when building a vector.
16176///
16177/// Building a vector out of multiple loads can be converted to a load
16178/// of the vector type if the loads are consecutive. If the loads are
16179/// consecutive but in descending order, a shuffle is added at the end
16180/// to reorder the vector.
16181static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
16182 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16183 "Should be called with a BUILD_VECTOR node");
16184
16185 SDLoc dl(N);
16186
16187 // Return early for non byte-sized type, as they can't be consecutive.
16188 if (!N->getValueType(ResNo: 0).getVectorElementType().isByteSized())
16189 return SDValue();
16190
16191 bool InputsAreConsecutiveLoads = true;
16192 bool InputsAreReverseConsecutive = true;
16193 unsigned ElemSize = N->getValueType(ResNo: 0).getScalarType().getStoreSize();
16194 SDValue FirstInput = N->getOperand(Num: 0);
16195 bool IsRoundOfExtLoad = false;
16196 LoadSDNode *FirstLoad = nullptr;
16197
16198 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16199 FirstInput.getOperand(i: 0).getOpcode() == ISD::LOAD) {
16200 FirstLoad = cast<LoadSDNode>(Val: FirstInput.getOperand(i: 0));
16201 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16202 }
16203 // Not a build vector of (possibly fp_rounded) loads.
16204 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16205 N->getNumOperands() == 1)
16206 return SDValue();
16207
16208 if (!IsRoundOfExtLoad)
16209 FirstLoad = cast<LoadSDNode>(Val&: FirstInput);
16210
16211 SmallVector<LoadSDNode *, 4> InputLoads;
16212 InputLoads.push_back(Elt: FirstLoad);
16213 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16214 // If any inputs are fp_round(extload), they all must be.
16215 if (IsRoundOfExtLoad && N->getOperand(Num: i).getOpcode() != ISD::FP_ROUND)
16216 return SDValue();
16217
16218 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(Num: i).getOperand(i: 0) :
16219 N->getOperand(Num: i);
16220 if (NextInput.getOpcode() != ISD::LOAD)
16221 return SDValue();
16222
16223 SDValue PreviousInput =
16224 IsRoundOfExtLoad ? N->getOperand(Num: i-1).getOperand(i: 0) : N->getOperand(Num: i-1);
16225 LoadSDNode *LD1 = cast<LoadSDNode>(Val&: PreviousInput);
16226 LoadSDNode *LD2 = cast<LoadSDNode>(Val&: NextInput);
16227
16228 // If any inputs are fp_round(extload), they all must be.
16229 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16230 return SDValue();
16231
16232 // We only care about regular loads. The PPC-specific load intrinsics
16233 // will not lead to a merge opportunity.
16234 if (!DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: ElemSize, Dist: 1))
16235 InputsAreConsecutiveLoads = false;
16236 if (!DAG.areNonVolatileConsecutiveLoads(LD: LD1, Base: LD2, Bytes: ElemSize, Dist: 1))
16237 InputsAreReverseConsecutive = false;
16238
16239 // Exit early if the loads are neither consecutive nor reverse consecutive.
16240 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16241 return SDValue();
16242 InputLoads.push_back(Elt: LD2);
16243 }
16244
16245 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16246 "The loads cannot be both consecutive and reverse consecutive.");
16247
16248 SDValue WideLoad;
16249 SDValue ReturnSDVal;
16250 if (InputsAreConsecutiveLoads) {
16251 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16252 WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: 0), dl, Chain: FirstLoad->getChain(),
16253 Ptr: FirstLoad->getBasePtr(), PtrInfo: FirstLoad->getPointerInfo(),
16254 Alignment: FirstLoad->getAlign());
16255 ReturnSDVal = WideLoad;
16256 } else if (InputsAreReverseConsecutive) {
16257 LoadSDNode *LastLoad = InputLoads.back();
16258 assert(LastLoad && "Input needs to be a LoadSDNode.");
16259 WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: 0), dl, Chain: LastLoad->getChain(),
16260 Ptr: LastLoad->getBasePtr(), PtrInfo: LastLoad->getPointerInfo(),
16261 Alignment: LastLoad->getAlign());
16262 SmallVector<int, 16> Ops;
16263 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16264 Ops.push_back(Elt: i);
16265
16266 ReturnSDVal = DAG.getVectorShuffle(VT: N->getValueType(ResNo: 0), dl, N1: WideLoad,
16267 N2: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)), Mask: Ops);
16268 } else
16269 return SDValue();
16270
16271 for (auto *LD : InputLoads)
16272 DAG.makeEquivalentMemoryOrdering(OldLoad: LD, NewMemOp: WideLoad);
16273 return ReturnSDVal;
16274}
16275
16276// This function adds the required vector_shuffle needed to get
16277// the elements of the vector extract in the correct position
16278// as specified by the CorrectElems encoding.
16279static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
16280 SDValue Input, uint64_t Elems,
16281 uint64_t CorrectElems) {
16282 SDLoc dl(N);
16283
16284 unsigned NumElems = Input.getValueType().getVectorNumElements();
16285 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16286
16287 // Knowing the element indices being extracted from the original
16288 // vector and the order in which they're being inserted, just put
16289 // them at element indices required for the instruction.
16290 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16291 if (DAG.getDataLayout().isLittleEndian())
16292 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16293 else
16294 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16295 CorrectElems = CorrectElems >> 8;
16296 Elems = Elems >> 8;
16297 }
16298
16299 SDValue Shuffle =
16300 DAG.getVectorShuffle(VT: Input.getValueType(), dl, N1: Input,
16301 N2: DAG.getUNDEF(VT: Input.getValueType()), Mask: ShuffleMask);
16302
16303 EVT VT = N->getValueType(ResNo: 0);
16304 SDValue Conv = DAG.getBitcast(VT, V: Shuffle);
16305
16306 EVT ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(),
16307 VT: Input.getValueType().getVectorElementType(),
16308 NumElements: VT.getVectorNumElements());
16309 return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT, N1: Conv,
16310 N2: DAG.getValueType(ExtVT));
16311}
16312
16313// Look for build vector patterns where input operands come from sign
16314// extended vector_extract elements of specific indices. If the correct indices
16315// aren't used, add a vector shuffle to fix up the indices and create
16316// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16317// during instruction selection.
16318static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
16319 // This array encodes the indices that the vector sign extend instructions
16320 // extract from when extending from one type to another for both BE and LE.
16321 // The right nibble of each byte corresponds to the LE incides.
16322 // and the left nibble of each byte corresponds to the BE incides.
16323 // For example: 0x3074B8FC byte->word
16324 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16325 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16326 // For example: 0x000070F8 byte->double word
16327 // For LE: the allowed indices are: 0x0,0x8
16328 // For BE: the allowed indices are: 0x7,0xF
16329 uint64_t TargetElems[] = {
16330 0x3074B8FC, // b->w
16331 0x000070F8, // b->d
16332 0x10325476, // h->w
16333 0x00003074, // h->d
16334 0x00001032, // w->d
16335 };
16336
16337 uint64_t Elems = 0;
16338 int Index;
16339 SDValue Input;
16340
16341 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16342 if (!Op)
16343 return false;
16344 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16345 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16346 return false;
16347
16348 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16349 // of the right width.
16350 SDValue Extract = Op.getOperand(i: 0);
16351 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16352 Extract = Extract.getOperand(i: 0);
16353 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16354 return false;
16355
16356 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: 1));
16357 if (!ExtOp)
16358 return false;
16359
16360 Index = ExtOp->getZExtValue();
16361 if (Input && Input != Extract.getOperand(i: 0))
16362 return false;
16363
16364 if (!Input)
16365 Input = Extract.getOperand(i: 0);
16366
16367 Elems = Elems << 8;
16368 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16369 Elems |= Index;
16370
16371 return true;
16372 };
16373
16374 // If the build vector operands aren't sign extended vector extracts,
16375 // of the same input vector, then return.
16376 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16377 if (!isSExtOfVecExtract(N->getOperand(Num: i))) {
16378 return SDValue();
16379 }
16380 }
16381
16382 // If the vector extract indices are not correct, add the appropriate
16383 // vector_shuffle.
16384 int TgtElemArrayIdx;
16385 int InputSize = Input.getValueType().getScalarSizeInBits();
16386 int OutputSize = N->getValueType(ResNo: 0).getScalarSizeInBits();
16387 if (InputSize + OutputSize == 40)
16388 TgtElemArrayIdx = 0;
16389 else if (InputSize + OutputSize == 72)
16390 TgtElemArrayIdx = 1;
16391 else if (InputSize + OutputSize == 48)
16392 TgtElemArrayIdx = 2;
16393 else if (InputSize + OutputSize == 80)
16394 TgtElemArrayIdx = 3;
16395 else if (InputSize + OutputSize == 96)
16396 TgtElemArrayIdx = 4;
16397 else
16398 return SDValue();
16399
16400 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16401 CorrectElems = DAG.getDataLayout().isLittleEndian()
16402 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16403 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16404 if (Elems != CorrectElems) {
16405 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16406 }
16407
16408 // Regular lowering will catch cases where a shuffle is not needed.
16409 return SDValue();
16410}
16411
16412// Look for the pattern of a load from a narrow width to i128, feeding
16413// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16414// (LXVRZX). This node represents a zero extending load that will be matched
16415// to the Load VSX Vector Rightmost instructions.
16416static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
16417 SDLoc DL(N);
16418
16419 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16420 if (N->getValueType(ResNo: 0) != MVT::v1i128)
16421 return SDValue();
16422
16423 SDValue Operand = N->getOperand(Num: 0);
16424 // Proceed with the transformation if the operand to the BUILD_VECTOR
16425 // is a load instruction.
16426 if (Operand.getOpcode() != ISD::LOAD)
16427 return SDValue();
16428
16429 auto *LD = cast<LoadSDNode>(Val&: Operand);
16430 EVT MemoryType = LD->getMemoryVT();
16431
16432 // This transformation is only valid if the we are loading either a byte,
16433 // halfword, word, or doubleword.
16434 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16435 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16436
16437 // Ensure that the load from the narrow width is being zero extended to i128.
16438 if (!ValidLDType ||
16439 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16440 LD->getExtensionType() != ISD::EXTLOAD))
16441 return SDValue();
16442
16443 SDValue LoadOps[] = {
16444 LD->getChain(), LD->getBasePtr(),
16445 DAG.getIntPtrConstant(Val: MemoryType.getScalarSizeInBits(), DL)};
16446
16447 return DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVRZX, dl: DL,
16448 VTList: DAG.getVTList(VT1: MVT::v1i128, VT2: MVT::Other),
16449 Ops: LoadOps, MemVT: MemoryType, MMO: LD->getMemOperand());
16450}
16451
16452SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16453 DAGCombinerInfo &DCI) const {
16454 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16455 "Should be called with a BUILD_VECTOR node");
16456
16457 SelectionDAG &DAG = DCI.DAG;
16458 SDLoc dl(N);
16459
16460 if (!Subtarget.hasVSX())
16461 return SDValue();
16462
16463 // The target independent DAG combiner will leave a build_vector of
16464 // float-to-int conversions intact. We can generate MUCH better code for
16465 // a float-to-int conversion of a vector of floats.
16466 SDValue FirstInput = N->getOperand(Num: 0);
16467 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16468 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16469 if (Reduced)
16470 return Reduced;
16471 }
16472
16473 // If we're building a vector out of consecutive loads, just load that
16474 // vector type.
16475 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16476 if (Reduced)
16477 return Reduced;
16478
16479 // If we're building a vector out of extended elements from another vector
16480 // we have P9 vector integer extend instructions. The code assumes legal
16481 // input types (i.e. it can't handle things like v4i16) so do not run before
16482 // legalization.
16483 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16484 Reduced = combineBVOfVecSExt(N, DAG);
16485 if (Reduced)
16486 return Reduced;
16487 }
16488
16489 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16490 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16491 // is a load from <valid narrow width> to i128.
16492 if (Subtarget.isISA3_1()) {
16493 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16494 if (BVOfZLoad)
16495 return BVOfZLoad;
16496 }
16497
16498 if (N->getValueType(ResNo: 0) != MVT::v2f64)
16499 return SDValue();
16500
16501 // Looking for:
16502 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16503 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16504 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16505 return SDValue();
16506 if (N->getOperand(Num: 1).getOpcode() != ISD::SINT_TO_FP &&
16507 N->getOperand(Num: 1).getOpcode() != ISD::UINT_TO_FP)
16508 return SDValue();
16509 if (FirstInput.getOpcode() != N->getOperand(Num: 1).getOpcode())
16510 return SDValue();
16511
16512 SDValue Ext1 = FirstInput.getOperand(i: 0);
16513 SDValue Ext2 = N->getOperand(Num: 1).getOperand(i: 0);
16514 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16515 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16516 return SDValue();
16517
16518 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Val: Ext1.getOperand(i: 1));
16519 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Val: Ext2.getOperand(i: 1));
16520 if (!Ext1Op || !Ext2Op)
16521 return SDValue();
16522 if (Ext1.getOperand(i: 0).getValueType() != MVT::v4i32 ||
16523 Ext1.getOperand(i: 0) != Ext2.getOperand(i: 0))
16524 return SDValue();
16525
16526 int FirstElem = Ext1Op->getZExtValue();
16527 int SecondElem = Ext2Op->getZExtValue();
16528 int SubvecIdx;
16529 if (FirstElem == 0 && SecondElem == 1)
16530 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16531 else if (FirstElem == 2 && SecondElem == 3)
16532 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16533 else
16534 return SDValue();
16535
16536 SDValue SrcVec = Ext1.getOperand(i: 0);
16537 auto NodeType = (N->getOperand(Num: 1).getOpcode() == ISD::SINT_TO_FP) ?
16538 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16539 return DAG.getNode(Opcode: NodeType, DL: dl, VT: MVT::v2f64,
16540 N1: SrcVec, N2: DAG.getIntPtrConstant(Val: SubvecIdx, DL: dl));
16541}
16542
16543SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16544 DAGCombinerInfo &DCI) const {
16545 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16546 N->getOpcode() == ISD::UINT_TO_FP) &&
16547 "Need an int -> FP conversion node here");
16548
16549 if (useSoftFloat() || !Subtarget.has64BitSupport())
16550 return SDValue();
16551
16552 SelectionDAG &DAG = DCI.DAG;
16553 SDLoc dl(N);
16554 SDValue Op(N, 0);
16555
16556 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16557 // from the hardware.
16558 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16559 return SDValue();
16560 if (!Op.getOperand(i: 0).getValueType().isSimple())
16561 return SDValue();
16562 if (Op.getOperand(i: 0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16563 Op.getOperand(i: 0).getValueType().getSimpleVT() > MVT(MVT::i64))
16564 return SDValue();
16565
16566 SDValue FirstOperand(Op.getOperand(i: 0));
16567 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16568 (FirstOperand.getValueType() == MVT::i8 ||
16569 FirstOperand.getValueType() == MVT::i16);
16570 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16571 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16572 bool DstDouble = Op.getValueType() == MVT::f64;
16573 unsigned ConvOp = Signed ?
16574 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16575 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16576 SDValue WidthConst =
16577 DAG.getIntPtrConstant(Val: FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16578 DL: dl, isTarget: false);
16579 LoadSDNode *LDN = cast<LoadSDNode>(Val: FirstOperand.getNode());
16580 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16581 SDValue Ld = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXSIZX, dl,
16582 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
16583 Ops, MemVT: MVT::i8, MMO: LDN->getMemOperand());
16584 DAG.makeEquivalentMemoryOrdering(OldLoad: LDN, NewMemOp: Ld);
16585
16586 // For signed conversion, we need to sign-extend the value in the VSR
16587 if (Signed) {
16588 SDValue ExtOps[] = { Ld, WidthConst };
16589 SDValue Ext = DAG.getNode(Opcode: PPCISD::VEXTS, DL: dl, VT: MVT::f64, Ops: ExtOps);
16590 return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ext);
16591 } else
16592 return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ld);
16593 }
16594
16595
16596 // For i32 intermediate values, unfortunately, the conversion functions
16597 // leave the upper 32 bits of the value are undefined. Within the set of
16598 // scalar instructions, we have no method for zero- or sign-extending the
16599 // value. Thus, we cannot handle i32 intermediate values here.
16600 if (Op.getOperand(i: 0).getValueType() == MVT::i32)
16601 return SDValue();
16602
16603 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16604 "UINT_TO_FP is supported only with FPCVT");
16605
16606 // If we have FCFIDS, then use it when converting to single-precision.
16607 // Otherwise, convert to double-precision and then round.
16608 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16609 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16610 : PPCISD::FCFIDS)
16611 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16612 : PPCISD::FCFID);
16613 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16614 ? MVT::f32
16615 : MVT::f64;
16616
16617 // If we're converting from a float, to an int, and back to a float again,
16618 // then we don't need the store/load pair at all.
16619 if ((Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_UINT &&
16620 Subtarget.hasFPCVT()) ||
16621 (Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_SINT)) {
16622 SDValue Src = Op.getOperand(i: 0).getOperand(i: 0);
16623 if (Src.getValueType() == MVT::f32) {
16624 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
16625 DCI.AddToWorklist(N: Src.getNode());
16626 } else if (Src.getValueType() != MVT::f64) {
16627 // Make sure that we don't pick up a ppc_fp128 source value.
16628 return SDValue();
16629 }
16630
16631 unsigned FCTOp =
16632 Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16633 PPCISD::FCTIDUZ;
16634
16635 SDValue Tmp = DAG.getNode(Opcode: FCTOp, DL: dl, VT: MVT::f64, Operand: Src);
16636 SDValue FP = DAG.getNode(Opcode: FCFOp, DL: dl, VT: FCFTy, Operand: Tmp);
16637
16638 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16639 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
16640 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
16641 DCI.AddToWorklist(N: FP.getNode());
16642 }
16643
16644 return FP;
16645 }
16646
16647 return SDValue();
16648}
16649
16650// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16651// builtins) into loads with swaps.
16652SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
16653 DAGCombinerInfo &DCI) const {
16654 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16655 // load combines.
16656 if (DCI.isBeforeLegalizeOps())
16657 return SDValue();
16658
16659 SelectionDAG &DAG = DCI.DAG;
16660 SDLoc dl(N);
16661 SDValue Chain;
16662 SDValue Base;
16663 MachineMemOperand *MMO;
16664
16665 switch (N->getOpcode()) {
16666 default:
16667 llvm_unreachable("Unexpected opcode for little endian VSX load");
16668 case ISD::LOAD: {
16669 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16670 Chain = LD->getChain();
16671 Base = LD->getBasePtr();
16672 MMO = LD->getMemOperand();
16673 // If the MMO suggests this isn't a load of a full vector, leave
16674 // things alone. For a built-in, we have to make the change for
16675 // correctness, so if there is a size problem that will be a bug.
16676 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16677 return SDValue();
16678 break;
16679 }
16680 case ISD::INTRINSIC_W_CHAIN: {
16681 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16682 Chain = Intrin->getChain();
16683 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16684 // us what we want. Get operand 2 instead.
16685 Base = Intrin->getOperand(Num: 2);
16686 MMO = Intrin->getMemOperand();
16687 break;
16688 }
16689 }
16690
16691 MVT VecTy = N->getValueType(ResNo: 0).getSimpleVT();
16692
16693 SDValue LoadOps[] = { Chain, Base };
16694 SDValue Load = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVD2X, dl,
16695 VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other),
16696 Ops: LoadOps, MemVT: MVT::v2f64, MMO);
16697
16698 DCI.AddToWorklist(N: Load.getNode());
16699 Chain = Load.getValue(R: 1);
16700 SDValue Swap = DAG.getNode(
16701 Opcode: PPCISD::XXSWAPD, DL: dl, VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Load);
16702 DCI.AddToWorklist(N: Swap.getNode());
16703
16704 // Add a bitcast if the resulting load type doesn't match v2f64.
16705 if (VecTy != MVT::v2f64) {
16706 SDValue N = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: Swap);
16707 DCI.AddToWorklist(N: N.getNode());
16708 // Package {bitcast value, swap's chain} to match Load's shape.
16709 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VT1: VecTy, VT2: MVT::Other),
16710 N1: N, N2: Swap.getValue(R: 1));
16711 }
16712
16713 return Swap;
16714}
16715
16716// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16717// builtins) into stores with swaps.
16718SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
16719 DAGCombinerInfo &DCI) const {
16720 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16721 // store combines.
16722 if (DCI.isBeforeLegalizeOps())
16723 return SDValue();
16724
16725 SelectionDAG &DAG = DCI.DAG;
16726 SDLoc dl(N);
16727 SDValue Chain;
16728 SDValue Base;
16729 unsigned SrcOpnd;
16730 MachineMemOperand *MMO;
16731
16732 switch (N->getOpcode()) {
16733 default:
16734 llvm_unreachable("Unexpected opcode for little endian VSX store");
16735 case ISD::STORE: {
16736 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
16737 Chain = ST->getChain();
16738 Base = ST->getBasePtr();
16739 MMO = ST->getMemOperand();
16740 SrcOpnd = 1;
16741 // If the MMO suggests this isn't a store of a full vector, leave
16742 // things alone. For a built-in, we have to make the change for
16743 // correctness, so if there is a size problem that will be a bug.
16744 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16745 return SDValue();
16746 break;
16747 }
16748 case ISD::INTRINSIC_VOID: {
16749 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16750 Chain = Intrin->getChain();
16751 // Intrin->getBasePtr() oddly does not get what we want.
16752 Base = Intrin->getOperand(Num: 3);
16753 MMO = Intrin->getMemOperand();
16754 SrcOpnd = 2;
16755 break;
16756 }
16757 }
16758
16759 SDValue Src = N->getOperand(Num: SrcOpnd);
16760 MVT VecTy = Src.getValueType().getSimpleVT();
16761
16762 // All stores are done as v2f64 and possible bit cast.
16763 if (VecTy != MVT::v2f64) {
16764 Src = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: Src);
16765 DCI.AddToWorklist(N: Src.getNode());
16766 }
16767
16768 SDValue Swap = DAG.getNode(Opcode: PPCISD::XXSWAPD, DL: dl,
16769 VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Src);
16770 DCI.AddToWorklist(N: Swap.getNode());
16771 Chain = Swap.getValue(R: 1);
16772 SDValue StoreOps[] = { Chain, Swap, Base };
16773 SDValue Store = DAG.getMemIntrinsicNode(Opcode: PPCISD::STXVD2X, dl,
16774 VTList: DAG.getVTList(VT: MVT::Other),
16775 Ops: StoreOps, MemVT: VecTy, MMO);
16776 DCI.AddToWorklist(N: Store.getNode());
16777 return Store;
16778}
16779
16780// Handle DAG combine for STORE (FP_TO_INT F).
16781SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16782 DAGCombinerInfo &DCI) const {
16783 SelectionDAG &DAG = DCI.DAG;
16784 SDLoc dl(N);
16785 unsigned Opcode = N->getOperand(Num: 1).getOpcode();
16786 (void)Opcode;
16787 bool Strict = N->getOperand(Num: 1)->isStrictFPOpcode();
16788
16789 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16790 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16791 && "Not a FP_TO_INT Instruction!");
16792
16793 SDValue Val = N->getOperand(Num: 1).getOperand(i: Strict ? 1 : 0);
16794 EVT Op1VT = N->getOperand(Num: 1).getValueType();
16795 EVT ResVT = Val.getValueType();
16796
16797 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(VT: ResVT))
16798 return SDValue();
16799
16800 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16801 bool ValidTypeForStoreFltAsInt =
16802 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16803 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16804
16805 // TODO: Lower conversion from f128 on all VSX targets
16806 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16807 return SDValue();
16808
16809 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16810 cast<StoreSDNode>(Val: N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16811 return SDValue();
16812
16813 Val = convertFPToInt(Op: N->getOperand(Num: 1), DAG, Subtarget);
16814
16815 // Set number of bytes being converted.
16816 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16817 SDValue Ops[] = {N->getOperand(Num: 0), Val, N->getOperand(Num: 2),
16818 DAG.getIntPtrConstant(Val: ByteSize, DL: dl, isTarget: false),
16819 DAG.getValueType(Op1VT)};
16820
16821 Val = DAG.getMemIntrinsicNode(Opcode: PPCISD::ST_VSR_SCAL_INT, dl,
16822 VTList: DAG.getVTList(VT: MVT::Other), Ops,
16823 MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16824 MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16825
16826 return Val;
16827}
16828
16829static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16830 // Check that the source of the element keeps flipping
16831 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16832 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16833 for (int i = 1, e = Mask.size(); i < e; i++) {
16834 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16835 return false;
16836 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16837 return false;
16838 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16839 }
16840 return true;
16841}
16842
16843static bool isSplatBV(SDValue Op) {
16844 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16845 return false;
16846 SDValue FirstOp;
16847
16848 // Find first non-undef input.
16849 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16850 FirstOp = Op.getOperand(i);
16851 if (!FirstOp.isUndef())
16852 break;
16853 }
16854
16855 // All inputs are undef or the same as the first non-undef input.
16856 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16857 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16858 return false;
16859 return true;
16860}
16861
16862static SDValue isScalarToVec(SDValue Op) {
16863 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16864 return Op;
16865 if (Op.getOpcode() != ISD::BITCAST)
16866 return SDValue();
16867 Op = Op.getOperand(i: 0);
16868 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16869 return Op;
16870 return SDValue();
16871}
16872
16873// Fix up the shuffle mask to account for the fact that the result of
16874// scalar_to_vector is not in lane zero. This just takes all values in
16875// the ranges specified by the min/max indices and adds the number of
16876// elements required to ensure each element comes from the respective
16877// position in the valid lane.
16878// On little endian, that's just the corresponding element in the other
16879// half of the vector. On big endian, it is in the same half but right
16880// justified rather than left justified in that half.
16881static void fixupShuffleMaskForPermutedSToV(
16882 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16883 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16884 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16885 int LHSEltFixup =
16886 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16887 int RHSEltFixup =
16888 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16889 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16890 int Idx = ShuffV[I];
16891 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16892 ShuffV[I] += LHSEltFixup;
16893 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16894 ShuffV[I] += RHSEltFixup;
16895 }
16896}
16897
16898// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16899// the original is:
16900// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16901// In such a case, just change the shuffle mask to extract the element
16902// from the permuted index.
16903static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
16904 const PPCSubtarget &Subtarget) {
16905 SDLoc dl(OrigSToV);
16906 EVT VT = OrigSToV.getValueType();
16907 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16908 "Expecting a SCALAR_TO_VECTOR here");
16909 SDValue Input = OrigSToV.getOperand(i: 0);
16910
16911 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16912 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Val: Input.getOperand(i: 1));
16913 SDValue OrigVector = Input.getOperand(i: 0);
16914
16915 // Can't handle non-const element indices or different vector types
16916 // for the input to the extract and the output of the scalar_to_vector.
16917 if (Idx && VT == OrigVector.getValueType()) {
16918 unsigned NumElts = VT.getVectorNumElements();
16919 assert(
16920 NumElts > 1 &&
16921 "Cannot produce a permuted scalar_to_vector for one element vector");
16922 SmallVector<int, 16> NewMask(NumElts, -1);
16923 unsigned ResultInElt = NumElts / 2;
16924 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16925 NewMask[ResultInElt] = Idx->getZExtValue();
16926 return DAG.getVectorShuffle(VT, dl, N1: OrigVector, N2: OrigVector, Mask: NewMask);
16927 }
16928 }
16929 return DAG.getNode(Opcode: PPCISD::SCALAR_TO_VECTOR_PERMUTED, DL: dl, VT,
16930 Operand: OrigSToV.getOperand(i: 0));
16931}
16932
16933static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
16934 int HalfVec, int LHSLastElementDefined,
16935 int RHSLastElementDefined) {
16936 for (int Index : ShuffV) {
16937 if (Index < 0) // Skip explicitly undefined mask indices.
16938 continue;
16939 // Handle first input vector of the vector_shuffle.
16940 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16941 (Index > LHSLastElementDefined))
16942 return false;
16943 // Handle second input vector of the vector_shuffle.
16944 if ((RHSLastElementDefined >= 0) &&
16945 (Index > HalfVec + RHSLastElementDefined))
16946 return false;
16947 }
16948 return true;
16949}
16950
16951static SDValue generateSToVPermutedForVecShuffle(
16952 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16953 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16954 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16955 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16956 // Set up the values for the shuffle vector fixup.
16957 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16958 // The last element depends on if the input comes from the LHS or RHS.
16959 //
16960 // For example:
16961 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16962 //
16963 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16964 // because elements 1 and higher of a scalar_to_vector are undefined.
16965 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16966 // because elements 1 and higher of a scalar_to_vector are undefined.
16967 // It is also not 4 because the original scalar_to_vector is wider and
16968 // actually contains two i32 elements.
16969 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16970 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16971 : FirstElt;
16972 SDValue SToVPermuted = getSToVPermuted(OrigSToV: SToVNode, DAG, Subtarget);
16973 if (SToVPermuted.getValueType() != VecShuffOperandType)
16974 SToVPermuted = DAG.getBitcast(VT: VecShuffOperandType, V: SToVPermuted);
16975 return SToVPermuted;
16976}
16977
16978// On little endian subtargets, combine shuffles such as:
16979// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16980// into:
16981// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16982// because the latter can be matched to a single instruction merge.
16983// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16984// to put the value into element zero. Adjust the shuffle mask so that the
16985// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16986// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16987// nodes with elements smaller than doubleword because all the ways
16988// of getting scalar data into a vector register put the value in the
16989// rightmost element of the left half of the vector.
16990SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16991 SelectionDAG &DAG) const {
16992 SDValue LHS = SVN->getOperand(Num: 0);
16993 SDValue RHS = SVN->getOperand(Num: 1);
16994 auto Mask = SVN->getMask();
16995 int NumElts = LHS.getValueType().getVectorNumElements();
16996 SDValue Res(SVN, 0);
16997 SDLoc dl(SVN);
16998 bool IsLittleEndian = Subtarget.isLittleEndian();
16999
17000 // On big endian targets this is only useful for subtargets with direct moves.
17001 // On little endian targets it would be useful for all subtargets with VSX.
17002 // However adding special handling for LE subtargets without direct moves
17003 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17004 // which includes direct moves.
17005 if (!Subtarget.hasDirectMove())
17006 return Res;
17007
17008 // If this is not a shuffle of a shuffle and the first element comes from
17009 // the second vector, canonicalize to the commuted form. This will make it
17010 // more likely to match one of the single instruction patterns.
17011 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17012 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17013 std::swap(a&: LHS, b&: RHS);
17014 Res = DAG.getCommutedVectorShuffle(SV: *SVN);
17015
17016 if (!isa<ShuffleVectorSDNode>(Val: Res))
17017 return Res;
17018
17019 Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
17020 }
17021
17022 // Adjust the shuffle mask if either input vector comes from a
17023 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17024 // form (to prevent the need for a swap).
17025 SmallVector<int, 16> ShuffV(Mask);
17026 SDValue SToVLHS = isScalarToVec(Op: LHS);
17027 SDValue SToVRHS = isScalarToVec(Op: RHS);
17028 if (SToVLHS || SToVRHS) {
17029 EVT VT = SVN->getValueType(ResNo: 0);
17030 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17031 int ShuffleNumElts = ShuffV.size();
17032 int HalfVec = ShuffleNumElts / 2;
17033 // The width of the "valid lane" (i.e. the lane that contains the value that
17034 // is vectorized) needs to be expressed in terms of the number of elements
17035 // of the shuffle. It is thereby the ratio of the values before and after
17036 // any bitcast, which will be set later on if the LHS or RHS are
17037 // SCALAR_TO_VECTOR nodes.
17038 unsigned LHSNumValidElts = HalfVec;
17039 unsigned RHSNumValidElts = HalfVec;
17040
17041 // Initially assume that neither input is permuted. These will be adjusted
17042 // accordingly if either input is. Note, that -1 means that all elements
17043 // are undefined.
17044 int LHSFirstElt = 0;
17045 int RHSFirstElt = ShuffleNumElts;
17046 int LHSLastElt = -1;
17047 int RHSLastElt = -1;
17048
17049 // Get the permuted scalar to vector nodes for the source(s) that come from
17050 // ISD::SCALAR_TO_VECTOR.
17051 // On big endian systems, this only makes sense for element sizes smaller
17052 // than 64 bits since for 64-bit elements, all instructions already put
17053 // the value into element zero. Since scalar size of LHS and RHS may differ
17054 // after isScalarToVec, this should be checked using their own sizes.
17055 int LHSScalarSize = 0;
17056 int RHSScalarSize = 0;
17057 if (SToVLHS) {
17058 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17059 if (!IsLittleEndian && LHSScalarSize >= 64)
17060 return Res;
17061 }
17062 if (SToVRHS) {
17063 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17064 if (!IsLittleEndian && RHSScalarSize >= 64)
17065 return Res;
17066 }
17067 if (LHSScalarSize != 0)
17068 LHS = generateSToVPermutedForVecShuffle(
17069 ScalarSize: LHSScalarSize, ShuffleEltWidth, NumValidElts&: LHSNumValidElts, FirstElt: LHSFirstElt,
17070 LastElt&: LHSLastElt, VecShuffOperand: LHS, SToVNode: SToVLHS, DAG, Subtarget);
17071 if (RHSScalarSize != 0)
17072 RHS = generateSToVPermutedForVecShuffle(
17073 ScalarSize: RHSScalarSize, ShuffleEltWidth, NumValidElts&: RHSNumValidElts, FirstElt: RHSFirstElt,
17074 LastElt&: RHSLastElt, VecShuffOperand: RHS, SToVNode: SToVRHS, DAG, Subtarget);
17075
17076 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElementDefined: LHSLastElt, RHSLastElementDefined: RHSLastElt))
17077 return Res;
17078
17079 // Fix up the shuffle mask to reflect where the desired element actually is.
17080 // The minimum and maximum indices that correspond to element zero for both
17081 // the LHS and RHS are computed and will control which shuffle mask entries
17082 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17083 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17084 fixupShuffleMaskForPermutedSToV(
17085 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17086 LHSNumValidElts, RHSNumValidElts, Subtarget);
17087 Res = DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: 0), dl, N1: LHS, N2: RHS, Mask: ShuffV);
17088
17089 // We may have simplified away the shuffle. We won't be able to do anything
17090 // further with it here.
17091 if (!isa<ShuffleVectorSDNode>(Val: Res))
17092 return Res;
17093 Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
17094 }
17095
17096 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17097 // The common case after we commuted the shuffle is that the RHS is a splat
17098 // and we have elements coming in from the splat at indices that are not
17099 // conducive to using a merge.
17100 // Example:
17101 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17102 if (!isSplatBV(Op: TheSplat))
17103 return Res;
17104
17105 // We are looking for a mask such that all even elements are from
17106 // one vector and all odd elements from the other.
17107 if (!isAlternatingShuffMask(Mask, NumElts))
17108 return Res;
17109
17110 // Adjust the mask so we are pulling in the same index from the splat
17111 // as the index from the interesting vector in consecutive elements.
17112 if (IsLittleEndian) {
17113 // Example (even elements from first vector):
17114 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17115 if (Mask[0] < NumElts)
17116 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17117 if (ShuffV[i] < 0)
17118 continue;
17119 // If element from non-splat is undef, pick first element from splat.
17120 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17121 }
17122 // Example (odd elements from first vector):
17123 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17124 else
17125 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17126 if (ShuffV[i] < 0)
17127 continue;
17128 // If element from non-splat is undef, pick first element from splat.
17129 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17130 }
17131 } else {
17132 // Example (even elements from first vector):
17133 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17134 if (Mask[0] < NumElts)
17135 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17136 if (ShuffV[i] < 0)
17137 continue;
17138 // If element from non-splat is undef, pick first element from splat.
17139 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17140 }
17141 // Example (odd elements from first vector):
17142 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17143 else
17144 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17145 if (ShuffV[i] < 0)
17146 continue;
17147 // If element from non-splat is undef, pick first element from splat.
17148 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17149 }
17150 }
17151
17152 // If the RHS has undefs, we need to remove them since we may have created
17153 // a shuffle that adds those instead of the splat value.
17154 SDValue SplatVal =
17155 cast<BuildVectorSDNode>(Val: TheSplat.getNode())->getSplatValue();
17156 TheSplat = DAG.getSplatBuildVector(VT: TheSplat.getValueType(), DL: dl, Op: SplatVal);
17157
17158 if (IsLittleEndian)
17159 RHS = TheSplat;
17160 else
17161 LHS = TheSplat;
17162 return DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: 0), dl, N1: LHS, N2: RHS, Mask: ShuffV);
17163}
17164
17165SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17166 LSBaseSDNode *LSBase,
17167 DAGCombinerInfo &DCI) const {
17168 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17169 "Not a reverse memop pattern!");
17170
17171 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17172 auto Mask = SVN->getMask();
17173 int i = 0;
17174 auto I = Mask.rbegin();
17175 auto E = Mask.rend();
17176
17177 for (; I != E; ++I) {
17178 if (*I != i)
17179 return false;
17180 i++;
17181 }
17182 return true;
17183 };
17184
17185 SelectionDAG &DAG = DCI.DAG;
17186 EVT VT = SVN->getValueType(ResNo: 0);
17187
17188 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17189 return SDValue();
17190
17191 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17192 // See comment in PPCVSXSwapRemoval.cpp.
17193 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17194 if (!Subtarget.hasP9Vector())
17195 return SDValue();
17196
17197 if(!IsElementReverse(SVN))
17198 return SDValue();
17199
17200 if (LSBase->getOpcode() == ISD::LOAD) {
17201 // If the load return value 0 has more than one user except the
17202 // shufflevector instruction, it is not profitable to replace the
17203 // shufflevector with a reverse load.
17204 for (SDUse &Use : LSBase->uses())
17205 if (Use.getResNo() == 0 &&
17206 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17207 return SDValue();
17208
17209 SDLoc dl(LSBase);
17210 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17211 return DAG.getMemIntrinsicNode(
17212 Opcode: PPCISD::LOAD_VEC_BE, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops: LoadOps,
17213 MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
17214 }
17215
17216 if (LSBase->getOpcode() == ISD::STORE) {
17217 // If there are other uses of the shuffle, the swap cannot be avoided.
17218 // Forcing the use of an X-Form (since swapped stores only have
17219 // X-Forms) without removing the swap is unprofitable.
17220 if (!SVN->hasOneUse())
17221 return SDValue();
17222
17223 SDLoc dl(LSBase);
17224 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(Num: 0),
17225 LSBase->getBasePtr()};
17226 return DAG.getMemIntrinsicNode(
17227 Opcode: PPCISD::STORE_VEC_BE, dl, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps,
17228 MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
17229 }
17230
17231 llvm_unreachable("Expected a load or store node here");
17232}
17233
17234static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17235 unsigned IntrinsicID = Intrin.getConstantOperandVal(i: 1);
17236 if (IntrinsicID == Intrinsic::ppc_stdcx)
17237 StoreWidth = 8;
17238 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17239 StoreWidth = 4;
17240 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17241 StoreWidth = 2;
17242 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17243 StoreWidth = 1;
17244 else
17245 return false;
17246 return true;
17247}
17248
17249static SDValue DAGCombineAddc(SDNode *N,
17250 llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
17251 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(Value: 1)) {
17252 // (ADDC (ADDE 0, 0, C), -1) -> C
17253 SDValue LHS = N->getOperand(Num: 0);
17254 SDValue RHS = N->getOperand(Num: 1);
17255 if (LHS->getOpcode() == PPCISD::ADDE &&
17256 isNullConstant(V: LHS->getOperand(Num: 0)) &&
17257 isNullConstant(V: LHS->getOperand(Num: 1)) && isAllOnesConstant(V: RHS)) {
17258 return DCI.CombineTo(N, Res0: SDValue(N, 0), Res1: LHS->getOperand(Num: 2));
17259 }
17260 }
17261 return SDValue();
17262}
17263
17264// Optimize zero-extension of setcc when the compared value is known to be 0
17265// or 1.
17266//
17267// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17268// -> zext(xor(Value, 1)) for seteq
17269// -> zext(Value) for setne
17270//
17271// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17272// by keeping the value in its original i32 type throughout.
17273//
17274// Example:
17275// Before: zext(setcc(test_data_class(...), 0, seteq))
17276// // test_data_class returns 0 or 1 in i32
17277// // setcc converts i32 -> i1
17278// // zext converts i1 -> i64
17279// After: zext(xor(test_data_class(...), 1))
17280// // Stays in i32, then extends to i64
17281//
17282// This is beneficial because:
17283// 1. Eliminates the setcc instruction
17284// 2. Avoids i32 -> i1 truncation
17285// 3. Keeps computation in native integer width
17286
17287static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
17288 // Check if this is a zero_extend
17289 if (N->getOpcode() != ISD::ZERO_EXTEND)
17290 return SDValue();
17291
17292 SDValue Src = N->getOperand(Num: 0);
17293
17294 // Check if the source is a setcc
17295 if (Src.getOpcode() != ISD::SETCC)
17296 return SDValue();
17297
17298 SDValue LHS = Src.getOperand(i: 0);
17299 SDValue RHS = Src.getOperand(i: 1);
17300 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get();
17301
17302 if (!isNullConstant(V: RHS) && !isNullConstant(V: LHS))
17303 return SDValue();
17304
17305 SDValue NonNullConstant = isNullConstant(V: RHS) ? LHS : RHS;
17306
17307 auto isZeroOrOne = [=](SDValue &V) {
17308 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17309 V.getConstantOperandVal(i: 0) == Intrinsic::ppc_test_data_class)
17310 return true;
17311 return false;
17312 };
17313
17314 if (!isZeroOrOne(NonNullConstant))
17315 return SDValue();
17316
17317 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17318 // zext(setcc (Value), 0, setne))
17319 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17320 // Replace with: zext(xor(Value, 1)) for seteq
17321 // or: zext(Value) for setne
17322 // This keeps the value in i32 instead of converting to i1
17323 SDLoc DL(N);
17324 EVT VType = N->getValueType(ResNo: 0);
17325 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(Op: NonNullConstant, DL, VT: VType);
17326
17327 if (CC == ISD::SETNE)
17328 return NewNonNullConstant;
17329
17330 SDValue One = DAG.getConstant(Val: 1, DL, VT: VType);
17331 return DAG.getNode(Opcode: ISD::XOR, DL, VT: VType, N1: NewNonNullConstant, N2: One);
17332 }
17333
17334 return SDValue();
17335}
17336
17337// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17338// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17339// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17340// 1, cc))
17341// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17342// 0, 1, cc))
17343// 4. etc
17344static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
17345 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17346
17347 EVT XorVT = N->getValueType(ResNo: 0);
17348 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17349 return SDValue();
17350
17351 SDValue LHS = N->getOperand(Num: 0);
17352 SDValue RHS = N->getOperand(Num: 1);
17353
17354 // Check for XOR with constant 1
17355 ConstantSDNode *XorConst = dyn_cast<ConstantSDNode>(Val&: RHS);
17356 if (!XorConst || !XorConst->isOne()) {
17357 XorConst = dyn_cast<ConstantSDNode>(Val&: LHS);
17358 if (!XorConst || !XorConst->isOne())
17359 return SDValue();
17360 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17361 std::swap(a&: LHS, b&: RHS);
17362 }
17363
17364 // Check if LHS has only one use
17365 if (!LHS.hasOneUse())
17366 return SDValue();
17367
17368 // Handle extensions: ZEXT, ANYEXT
17369 SDValue SelectNode = LHS;
17370
17371 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17372 LHS.getOpcode() == ISD::ANY_EXTEND) {
17373 SelectNode = LHS.getOperand(i: 0);
17374
17375 // Check if the extension input has only one use
17376 if (!SelectNode.hasOneUse())
17377 return SDValue();
17378 }
17379
17380 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17381 if (!SelectNode.isMachineOpcode())
17382 return SDValue();
17383
17384 unsigned MachineOpc = SelectNode.getMachineOpcode();
17385
17386 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17387 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17388 return SDValue();
17389
17390 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17391 if (SelectNode.getNumOperands() != 4)
17392 return SDValue();
17393
17394 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(Val: SelectNode.getOperand(i: 1));
17395 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(Val: SelectNode.getOperand(i: 2));
17396
17397 if (!ConstOp1 || !ConstOp2)
17398 return SDValue();
17399
17400 // Only optimize if operands are {0, 1} or {1, 0}
17401 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17402 (ConstOp1->isZero() && ConstOp2->isOne())))
17403 return SDValue();
17404
17405 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17406 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17407 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17408 // create SELECT_CC(cond, 1, 0, pred).
17409 SDLoc DL(N);
17410 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17411
17412 bool ConstOp1IsOne = ConstOp1->isOne();
17413 return SDValue(
17414 DAG.getMachineNode(Opcode: MachineOpc, dl: DL, VT: XorVT,
17415 Ops: {SelectNode.getOperand(i: 0),
17416 DAG.getConstant(Val: ConstOp1IsOne ? 0 : 1, DL, VT: XorVT),
17417 DAG.getConstant(Val: ConstOp1IsOne ? 1 : 0, DL, VT: XorVT),
17418 SelectNode.getOperand(i: 3)}),
17419 0);
17420}
17421
17422SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
17423 DAGCombinerInfo &DCI) const {
17424 SelectionDAG &DAG = DCI.DAG;
17425 SDLoc dl(N);
17426 switch (N->getOpcode()) {
17427 default: break;
17428 case ISD::ADD:
17429 return combineADD(N, DCI);
17430 case ISD::AND: {
17431 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17432 // original input as that will prevent us from selecting optimal rotates.
17433 // This only matters if the input to the extend is i32 widened to i64.
17434 SDValue Op1 = N->getOperand(Num: 0);
17435 SDValue Op2 = N->getOperand(Num: 1);
17436 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17437 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17438 !isa<ConstantSDNode>(Val: Op2) || N->getValueType(ResNo: 0) != MVT::i64 ||
17439 Op1.getOperand(i: 0).getValueType() != MVT::i32)
17440 break;
17441 SDValue NarrowOp = Op1.getOperand(i: 0);
17442 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17443 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17444 break;
17445
17446 uint64_t Imm = Op2->getAsZExtVal();
17447 // Make sure that the constant is narrow enough to fit in the narrow type.
17448 if (!isUInt<32>(x: Imm))
17449 break;
17450 SDValue ConstOp = DAG.getConstant(Val: Imm, DL: dl, VT: MVT::i32);
17451 SDValue NarrowAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: NarrowOp, N2: ConstOp);
17452 return DAG.getZExtOrTrunc(Op: NarrowAnd, DL: dl, VT: N->getValueType(ResNo: 0));
17453 }
17454 case ISD::XOR: {
17455 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17456 if (SDValue V = combineXorSelectCC(N, DAG))
17457 return V;
17458 break;
17459 }
17460 case ISD::SHL:
17461 return combineSHL(N, DCI);
17462 case ISD::SRA:
17463 return combineSRA(N, DCI);
17464 case ISD::SRL:
17465 return combineSRL(N, DCI);
17466 case ISD::MUL:
17467 return combineMUL(N, DCI);
17468 case ISD::FMA:
17469 case PPCISD::FNMSUB:
17470 return combineFMALike(N, DCI);
17471 case PPCISD::SHL:
17472 if (isNullConstant(V: N->getOperand(Num: 0))) // 0 << V -> 0.
17473 return N->getOperand(Num: 0);
17474 break;
17475 case PPCISD::SRL:
17476 if (isNullConstant(V: N->getOperand(Num: 0))) // 0 >>u V -> 0.
17477 return N->getOperand(Num: 0);
17478 break;
17479 case PPCISD::SRA:
17480 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0))) {
17481 if (C->isZero() || // 0 >>s V -> 0.
17482 C->isAllOnes()) // -1 >>s V -> -1.
17483 return N->getOperand(Num: 0);
17484 }
17485 break;
17486 case ISD::ZERO_EXTEND:
17487 if (SDValue RetV = combineZextSetccWithZero(N, DAG&: DCI.DAG))
17488 return RetV;
17489 [[fallthrough]];
17490 case ISD::SIGN_EXTEND:
17491 case ISD::ANY_EXTEND:
17492 return DAGCombineExtBoolTrunc(N, DCI);
17493 case ISD::TRUNCATE:
17494 return combineTRUNCATE(N, DCI);
17495 case ISD::SETCC:
17496 if (SDValue CSCC = combineSetCC(N, DCI))
17497 return CSCC;
17498 [[fallthrough]];
17499 case ISD::SELECT_CC:
17500 return DAGCombineTruncBoolExt(N, DCI);
17501 case ISD::SINT_TO_FP:
17502 case ISD::UINT_TO_FP:
17503 return combineFPToIntToFP(N, DCI);
17504 case ISD::VECTOR_SHUFFLE:
17505 if (ISD::isNormalLoad(N: N->getOperand(Num: 0).getNode())) {
17506 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(Val: N->getOperand(Num: 0));
17507 return combineVReverseMemOP(SVN: cast<ShuffleVectorSDNode>(Val: N), LSBase, DCI);
17508 }
17509 return combineVectorShuffle(SVN: cast<ShuffleVectorSDNode>(Val: N), DAG&: DCI.DAG);
17510 case ISD::STORE: {
17511
17512 EVT Op1VT = N->getOperand(Num: 1).getValueType();
17513 unsigned Opcode = N->getOperand(Num: 1).getOpcode();
17514
17515 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17516 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17517 SDValue Val = combineStoreFPToInt(N, DCI);
17518 if (Val)
17519 return Val;
17520 }
17521
17522 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17523 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: 1));
17524 SDValue Val= combineVReverseMemOP(SVN, LSBase: cast<LSBaseSDNode>(Val: N), DCI);
17525 if (Val)
17526 return Val;
17527 }
17528
17529 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17530 if (cast<StoreSDNode>(Val: N)->isUnindexed() && Opcode == ISD::BSWAP &&
17531 N->getOperand(Num: 1).getNode()->hasOneUse() &&
17532 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17533 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17534
17535 // STBRX can only handle simple types and it makes no sense to store less
17536 // two bytes in byte-reversed order.
17537 EVT mVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
17538 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17539 break;
17540
17541 SDValue BSwapOp = N->getOperand(Num: 1).getOperand(i: 0);
17542 // Do an any-extend to 32-bits if this is a half-word input.
17543 if (BSwapOp.getValueType() == MVT::i16)
17544 BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: BSwapOp);
17545
17546 // If the type of BSWAP operand is wider than stored memory width
17547 // it need to be shifted to the right side before STBRX.
17548 if (Op1VT.bitsGT(VT: mVT)) {
17549 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17550 BSwapOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: Op1VT, N1: BSwapOp,
17551 N2: DAG.getConstant(Val: Shift, DL: dl, VT: MVT::i32));
17552 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17553 if (Op1VT == MVT::i64)
17554 BSwapOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: BSwapOp);
17555 }
17556
17557 SDValue Ops[] = {
17558 N->getOperand(Num: 0), BSwapOp, N->getOperand(Num: 2), DAG.getValueType(mVT)
17559 };
17560 return
17561 DAG.getMemIntrinsicNode(Opcode: PPCISD::STBRX, dl, VTList: DAG.getVTList(VT: MVT::Other),
17562 Ops, MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
17563 MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
17564 }
17565
17566 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17567 // So it can increase the chance of CSE constant construction.
17568 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17569 isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) && Op1VT == MVT::i32) {
17570 // Need to sign-extended to 64-bits to handle negative values.
17571 EVT MemVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
17572 uint64_t Val64 = SignExtend64(X: N->getConstantOperandVal(Num: 1),
17573 B: MemVT.getSizeInBits());
17574 SDValue Const64 = DAG.getConstant(Val: Val64, DL: dl, VT: MVT::i64);
17575
17576 auto *ST = cast<StoreSDNode>(Val: N);
17577 SDValue NewST = DAG.getStore(Chain: ST->getChain(), dl, Val: Const64,
17578 Ptr: ST->getBasePtr(), Offset: ST->getOffset(), SVT: MemVT,
17579 MMO: ST->getMemOperand(), AM: ST->getAddressingMode(),
17580 /*IsTruncating=*/true);
17581 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17582 // new store which will change the constant by removing non-demanded bits.
17583 return ST->isUnindexed()
17584 ? DCI.CombineTo(N, Res: NewST, /*AddTo=*/false)
17585 : DCI.CombineTo(N, Res0: NewST, Res1: NewST.getValue(R: 1), /*AddTo=*/false);
17586 }
17587
17588 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17589 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17590 if (Op1VT.isSimple()) {
17591 MVT StoreVT = Op1VT.getSimpleVT();
17592 if (Subtarget.needsSwapsForVSXMemOps() &&
17593 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17594 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17595 return expandVSXStoreForLE(N, DCI);
17596 }
17597 break;
17598 }
17599 case ISD::LOAD: {
17600 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
17601 EVT VT = LD->getValueType(ResNo: 0);
17602
17603 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17604 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17605 if (VT.isSimple()) {
17606 MVT LoadVT = VT.getSimpleVT();
17607 if (Subtarget.needsSwapsForVSXMemOps() &&
17608 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17609 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17610 return expandVSXLoadForLE(N, DCI);
17611 }
17612
17613 // We sometimes end up with a 64-bit integer load, from which we extract
17614 // two single-precision floating-point numbers. This happens with
17615 // std::complex<float>, and other similar structures, because of the way we
17616 // canonicalize structure copies. However, if we lack direct moves,
17617 // then the final bitcasts from the extracted integer values to the
17618 // floating-point numbers turn into store/load pairs. Even with direct moves,
17619 // just loading the two floating-point numbers is likely better.
17620 auto ReplaceTwoFloatLoad = [&]() {
17621 if (VT != MVT::i64)
17622 return false;
17623
17624 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17625 LD->isVolatile())
17626 return false;
17627
17628 // We're looking for a sequence like this:
17629 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17630 // t16: i64 = srl t13, Constant:i32<32>
17631 // t17: i32 = truncate t16
17632 // t18: f32 = bitcast t17
17633 // t19: i32 = truncate t13
17634 // t20: f32 = bitcast t19
17635
17636 if (!LD->hasNUsesOfValue(NUses: 2, Value: 0))
17637 return false;
17638
17639 auto UI = LD->user_begin();
17640 while (UI.getUse().getResNo() != 0) ++UI;
17641 SDNode *Trunc = *UI++;
17642 while (UI.getUse().getResNo() != 0) ++UI;
17643 SDNode *RightShift = *UI;
17644 if (Trunc->getOpcode() != ISD::TRUNCATE)
17645 std::swap(a&: Trunc, b&: RightShift);
17646
17647 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17648 Trunc->getValueType(ResNo: 0) != MVT::i32 ||
17649 !Trunc->hasOneUse())
17650 return false;
17651 if (RightShift->getOpcode() != ISD::SRL ||
17652 !isa<ConstantSDNode>(Val: RightShift->getOperand(Num: 1)) ||
17653 RightShift->getConstantOperandVal(Num: 1) != 32 ||
17654 !RightShift->hasOneUse())
17655 return false;
17656
17657 SDNode *Trunc2 = *RightShift->user_begin();
17658 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17659 Trunc2->getValueType(ResNo: 0) != MVT::i32 ||
17660 !Trunc2->hasOneUse())
17661 return false;
17662
17663 SDNode *Bitcast = *Trunc->user_begin();
17664 SDNode *Bitcast2 = *Trunc2->user_begin();
17665
17666 if (Bitcast->getOpcode() != ISD::BITCAST ||
17667 Bitcast->getValueType(ResNo: 0) != MVT::f32)
17668 return false;
17669 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17670 Bitcast2->getValueType(ResNo: 0) != MVT::f32)
17671 return false;
17672
17673 if (Subtarget.isLittleEndian())
17674 std::swap(a&: Bitcast, b&: Bitcast2);
17675
17676 // Bitcast has the second float (in memory-layout order) and Bitcast2
17677 // has the first one.
17678
17679 SDValue BasePtr = LD->getBasePtr();
17680 if (LD->isIndexed()) {
17681 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17682 "Non-pre-inc AM on PPC?");
17683 BasePtr =
17684 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
17685 N2: LD->getOffset());
17686 }
17687
17688 auto MMOFlags =
17689 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17690 SDValue FloatLoad = DAG.getLoad(VT: MVT::f32, dl, Chain: LD->getChain(), Ptr: BasePtr,
17691 PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign(),
17692 MMOFlags, AAInfo: LD->getAAInfo());
17693 SDValue AddPtr =
17694 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(),
17695 N1: BasePtr, N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
17696 SDValue FloatLoad2 = DAG.getLoad(
17697 VT: MVT::f32, dl, Chain: SDValue(FloatLoad.getNode(), 1), Ptr: AddPtr,
17698 PtrInfo: LD->getPointerInfo().getWithOffset(O: 4),
17699 Alignment: commonAlignment(A: LD->getAlign(), Offset: 4), MMOFlags, AAInfo: LD->getAAInfo());
17700
17701 if (LD->isIndexed()) {
17702 // Note that DAGCombine should re-form any pre-increment load(s) from
17703 // what is produced here if that makes sense.
17704 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, 1), To: BasePtr);
17705 }
17706
17707 DCI.CombineTo(N: Bitcast2, Res: FloatLoad);
17708 DCI.CombineTo(N: Bitcast, Res: FloatLoad2);
17709
17710 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, LD->isIndexed() ? 2 : 1),
17711 To: SDValue(FloatLoad2.getNode(), 1));
17712 return true;
17713 };
17714
17715 if (ReplaceTwoFloatLoad())
17716 return SDValue(N, 0);
17717
17718 EVT MemVT = LD->getMemoryVT();
17719 Type *Ty = MemVT.getTypeForEVT(Context&: *DAG.getContext());
17720 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17721 if (LD->isUnindexed() && VT.isVector() &&
17722 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17723 // P8 and later hardware should just use LOAD.
17724 !Subtarget.hasP8Vector() &&
17725 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17726 VT == MVT::v4f32))) &&
17727 LD->getAlign() < ABIAlignment) {
17728 // This is a type-legal unaligned Altivec load.
17729 SDValue Chain = LD->getChain();
17730 SDValue Ptr = LD->getBasePtr();
17731 bool isLittleEndian = Subtarget.isLittleEndian();
17732
17733 // This implements the loading of unaligned vectors as described in
17734 // the venerable Apple Velocity Engine overview. Specifically:
17735 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17736 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17737 //
17738 // The general idea is to expand a sequence of one or more unaligned
17739 // loads into an alignment-based permutation-control instruction (lvsl
17740 // or lvsr), a series of regular vector loads (which always truncate
17741 // their input address to an aligned address), and a series of
17742 // permutations. The results of these permutations are the requested
17743 // loaded values. The trick is that the last "extra" load is not taken
17744 // from the address you might suspect (sizeof(vector) bytes after the
17745 // last requested load), but rather sizeof(vector) - 1 bytes after the
17746 // last requested vector. The point of this is to avoid a page fault if
17747 // the base address happened to be aligned. This works because if the
17748 // base address is aligned, then adding less than a full vector length
17749 // will cause the last vector in the sequence to be (re)loaded.
17750 // Otherwise, the next vector will be fetched as you might suspect was
17751 // necessary.
17752
17753 // We might be able to reuse the permutation generation from
17754 // a different base address offset from this one by an aligned amount.
17755 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17756 // optimization later.
17757 Intrinsic::ID Intr, IntrLD, IntrPerm;
17758 MVT PermCntlTy, PermTy, LDTy;
17759 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17760 : Intrinsic::ppc_altivec_lvsl;
17761 IntrLD = Intrinsic::ppc_altivec_lvx;
17762 IntrPerm = Intrinsic::ppc_altivec_vperm;
17763 PermCntlTy = MVT::v16i8;
17764 PermTy = MVT::v4i32;
17765 LDTy = MVT::v4i32;
17766
17767 SDValue PermCntl = BuildIntrinsicOp(IID: Intr, Op: Ptr, DAG, dl, DestVT: PermCntlTy);
17768
17769 // Create the new MMO for the new base load. It is like the original MMO,
17770 // but represents an area in memory almost twice the vector size centered
17771 // on the original address. If the address is unaligned, we might start
17772 // reading up to (sizeof(vector)-1) bytes below the address of the
17773 // original unaligned load.
17774 MachineFunction &MF = DAG.getMachineFunction();
17775 MachineMemOperand *BaseMMO =
17776 MF.getMachineMemOperand(MMO: LD->getMemOperand(),
17777 Offset: -(int64_t)MemVT.getStoreSize()+1,
17778 Size: 2*MemVT.getStoreSize()-1);
17779
17780 // Create the new base load.
17781 SDValue LDXIntID =
17782 DAG.getTargetConstant(Val: IntrLD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
17783 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17784 SDValue BaseLoad =
17785 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
17786 VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
17787 Ops: BaseLoadOps, MemVT: LDTy, MMO: BaseMMO);
17788
17789 // Note that the value of IncOffset (which is provided to the next
17790 // load's pointer info offset value, and thus used to calculate the
17791 // alignment), and the value of IncValue (which is actually used to
17792 // increment the pointer value) are different! This is because we
17793 // require the next load to appear to be aligned, even though it
17794 // is actually offset from the base pointer by a lesser amount.
17795 int IncOffset = VT.getSizeInBits() / 8;
17796 int IncValue = IncOffset;
17797
17798 // Walk (both up and down) the chain looking for another load at the real
17799 // (aligned) offset (the alignment of the other load does not matter in
17800 // this case). If found, then do not use the offset reduction trick, as
17801 // that will prevent the loads from being later combined (as they would
17802 // otherwise be duplicates).
17803 if (!findConsecutiveLoad(LD, DAG))
17804 --IncValue;
17805
17806 SDValue Increment =
17807 DAG.getConstant(Val: IncValue, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
17808 Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Ptr.getValueType(), N1: Ptr, N2: Increment);
17809
17810 MachineMemOperand *ExtraMMO =
17811 MF.getMachineMemOperand(MMO: LD->getMemOperand(),
17812 Offset: 1, Size: 2*MemVT.getStoreSize()-1);
17813 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17814 SDValue ExtraLoad =
17815 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
17816 VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
17817 Ops: ExtraLoadOps, MemVT: LDTy, MMO: ExtraMMO);
17818
17819 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
17820 N1: BaseLoad.getValue(R: 1), N2: ExtraLoad.getValue(R: 1));
17821
17822 // Because vperm has a big-endian bias, we must reverse the order
17823 // of the input vectors and complement the permute control vector
17824 // when generating little endian code. We have already handled the
17825 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17826 // and ExtraLoad here.
17827 SDValue Perm;
17828 if (isLittleEndian)
17829 Perm = BuildIntrinsicOp(IID: IntrPerm,
17830 Op0: ExtraLoad, Op1: BaseLoad, Op2: PermCntl, DAG, dl);
17831 else
17832 Perm = BuildIntrinsicOp(IID: IntrPerm,
17833 Op0: BaseLoad, Op1: ExtraLoad, Op2: PermCntl, DAG, dl);
17834
17835 if (VT != PermTy)
17836 Perm = Subtarget.hasAltivec()
17837 ? DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Perm)
17838 : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Perm,
17839 N2: DAG.getTargetConstant(Val: 1, DL: dl, VT: MVT::i64));
17840 // second argument is 1 because this rounding
17841 // is always exact.
17842
17843 // The output of the permutation is our loaded result, the TokenFactor is
17844 // our new chain.
17845 DCI.CombineTo(N, Res0: Perm, Res1: TF);
17846 return SDValue(N, 0);
17847 }
17848 }
17849 break;
17850 case ISD::INTRINSIC_WO_CHAIN: {
17851 bool isLittleEndian = Subtarget.isLittleEndian();
17852 unsigned IID = N->getConstantOperandVal(Num: 0);
17853 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17854 : Intrinsic::ppc_altivec_lvsl);
17855 if (IID == Intr && N->getOperand(Num: 1)->getOpcode() == ISD::ADD) {
17856 SDValue Add = N->getOperand(Num: 1);
17857
17858 int Bits = 4 /* 16 byte alignment */;
17859
17860 if (DAG.MaskedValueIsZero(Op: Add->getOperand(Num: 1),
17861 Mask: APInt::getAllOnes(numBits: Bits /* alignment */)
17862 .zext(width: Add.getScalarValueSizeInBits()))) {
17863 SDNode *BasePtr = Add->getOperand(Num: 0).getNode();
17864 for (SDNode *U : BasePtr->users()) {
17865 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17866 U->getConstantOperandVal(Num: 0) == IID) {
17867 // We've found another LVSL/LVSR, and this address is an aligned
17868 // multiple of that one. The results will be the same, so use the
17869 // one we've just found instead.
17870
17871 return SDValue(U, 0);
17872 }
17873 }
17874 }
17875
17876 if (isa<ConstantSDNode>(Val: Add->getOperand(Num: 1))) {
17877 SDNode *BasePtr = Add->getOperand(Num: 0).getNode();
17878 for (SDNode *U : BasePtr->users()) {
17879 if (U->getOpcode() == ISD::ADD &&
17880 isa<ConstantSDNode>(Val: U->getOperand(Num: 1)) &&
17881 (Add->getConstantOperandVal(Num: 1) - U->getConstantOperandVal(Num: 1)) %
17882 (1ULL << Bits) ==
17883 0) {
17884 SDNode *OtherAdd = U;
17885 for (SDNode *V : OtherAdd->users()) {
17886 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17887 V->getConstantOperandVal(Num: 0) == IID) {
17888 return SDValue(V, 0);
17889 }
17890 }
17891 }
17892 }
17893 }
17894 }
17895
17896 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17897 // Expose the vabsduw/h/b opportunity for down stream
17898 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17899 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17900 IID == Intrinsic::ppc_altivec_vmaxsh ||
17901 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17902 SDValue V1 = N->getOperand(Num: 1);
17903 SDValue V2 = N->getOperand(Num: 2);
17904 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17905 V1.getSimpleValueType() == MVT::v8i16 ||
17906 V1.getSimpleValueType() == MVT::v16i8) &&
17907 V1.getSimpleValueType() == V2.getSimpleValueType()) {
17908 // (0-a, a)
17909 if (V1.getOpcode() == ISD::SUB &&
17910 ISD::isBuildVectorAllZeros(N: V1.getOperand(i: 0).getNode()) &&
17911 V1.getOperand(i: 1) == V2) {
17912 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V2.getValueType(), Operand: V2);
17913 }
17914 // (a, 0-a)
17915 if (V2.getOpcode() == ISD::SUB &&
17916 ISD::isBuildVectorAllZeros(N: V2.getOperand(i: 0).getNode()) &&
17917 V2.getOperand(i: 1) == V1) {
17918 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17919 }
17920 // (x-y, y-x)
17921 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17922 V1.getOperand(i: 0) == V2.getOperand(i: 1) &&
17923 V1.getOperand(i: 1) == V2.getOperand(i: 0)) {
17924 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17925 }
17926 }
17927 }
17928 }
17929
17930 break;
17931 case ISD::INTRINSIC_W_CHAIN:
17932 switch (N->getConstantOperandVal(Num: 1)) {
17933 default:
17934 break;
17935 case Intrinsic::ppc_altivec_vsum4sbs:
17936 case Intrinsic::ppc_altivec_vsum4shs:
17937 case Intrinsic::ppc_altivec_vsum4ubs: {
17938 // These sum-across intrinsics only have a chain due to the side effect
17939 // that they may set the SAT bit. If we know the SAT bit will not be set
17940 // for some inputs, we can replace any uses of their chain with the
17941 // input chain.
17942 if (BuildVectorSDNode *BVN =
17943 dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 3))) {
17944 APInt APSplatBits, APSplatUndef;
17945 unsigned SplatBitSize;
17946 bool HasAnyUndefs;
17947 bool BVNIsConstantSplat = BVN->isConstantSplat(
17948 SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 0,
17949 isBigEndian: !Subtarget.isLittleEndian());
17950 // If the constant splat vector is 0, the SAT bit will not be set.
17951 if (BVNIsConstantSplat && APSplatBits == 0)
17952 DAG.ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: N->getOperand(Num: 0));
17953 }
17954 return SDValue();
17955 }
17956 case Intrinsic::ppc_vsx_lxvw4x:
17957 case Intrinsic::ppc_vsx_lxvd2x:
17958 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17959 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17960 if (Subtarget.needsSwapsForVSXMemOps())
17961 return expandVSXLoadForLE(N, DCI);
17962 break;
17963 }
17964 break;
17965 case ISD::INTRINSIC_VOID:
17966 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17967 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17968 if (Subtarget.needsSwapsForVSXMemOps()) {
17969 switch (N->getConstantOperandVal(Num: 1)) {
17970 default:
17971 break;
17972 case Intrinsic::ppc_vsx_stxvw4x:
17973 case Intrinsic::ppc_vsx_stxvd2x:
17974 return expandVSXStoreForLE(N, DCI);
17975 }
17976 }
17977 break;
17978 case ISD::BSWAP: {
17979 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17980 // For subtargets without LDBRX, we can still do better than the default
17981 // expansion even for 64-bit BSWAP (LOAD).
17982 bool Is64BitBswapOn64BitTgt =
17983 Subtarget.isPPC64() && N->getValueType(ResNo: 0) == MVT::i64;
17984 bool IsSingleUseNormalLd = ISD::isNormalLoad(N: N->getOperand(Num: 0).getNode()) &&
17985 N->getOperand(Num: 0).hasOneUse();
17986 if (IsSingleUseNormalLd &&
17987 (N->getValueType(ResNo: 0) == MVT::i32 || N->getValueType(ResNo: 0) == MVT::i16 ||
17988 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17989 SDValue Load = N->getOperand(Num: 0);
17990 LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
17991 // Create the byte-swapping load.
17992 SDValue Ops[] = {
17993 LD->getChain(), // Chain
17994 LD->getBasePtr(), // Ptr
17995 DAG.getValueType(N->getValueType(ResNo: 0)) // VT
17996 };
17997 SDValue BSLoad =
17998 DAG.getMemIntrinsicNode(Opcode: PPCISD::LBRX, dl,
17999 VTList: DAG.getVTList(VT1: N->getValueType(ResNo: 0) == MVT::i64 ?
18000 MVT::i64 : MVT::i32, VT2: MVT::Other),
18001 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
18002
18003 // If this is an i16 load, insert the truncate.
18004 SDValue ResVal = BSLoad;
18005 if (N->getValueType(ResNo: 0) == MVT::i16)
18006 ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i16, Operand: BSLoad);
18007
18008 // First, combine the bswap away. This makes the value produced by the
18009 // load dead.
18010 DCI.CombineTo(N, Res: ResVal);
18011
18012 // Next, combine the load away, we give it a bogus result value but a real
18013 // chain result. The result value is dead because the bswap is dead.
18014 DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: 1));
18015
18016 // Return N so it doesn't get rechecked!
18017 return SDValue(N, 0);
18018 }
18019 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18020 // before legalization so that the BUILD_PAIR is handled correctly.
18021 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18022 !IsSingleUseNormalLd)
18023 return SDValue();
18024 LoadSDNode *LD = cast<LoadSDNode>(Val: N->getOperand(Num: 0));
18025
18026 // Can't split volatile or atomic loads.
18027 if (!LD->isSimple())
18028 return SDValue();
18029 SDValue BasePtr = LD->getBasePtr();
18030 SDValue Lo = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr,
18031 PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign());
18032 Lo = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Lo);
18033 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
18034 N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
18035 MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
18036 MMO: LD->getMemOperand(), Offset: 4, Size: 4);
18037 SDValue Hi = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr, MMO: NewMMO);
18038 Hi = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Hi);
18039 SDValue Res;
18040 if (Subtarget.isLittleEndian())
18041 Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Hi, N2: Lo);
18042 else
18043 Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
18044 SDValue TF =
18045 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
18046 N1: Hi.getOperand(i: 0).getValue(R: 1), N2: Lo.getOperand(i: 0).getValue(R: 1));
18047 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, 1), To: TF);
18048 return Res;
18049 }
18050 case PPCISD::VCMP:
18051 // If a VCMP_rec node already exists with exactly the same operands as this
18052 // node, use its result instead of this node (VCMP_rec computes both a CR6
18053 // and a normal output).
18054 //
18055 if (!N->getOperand(Num: 0).hasOneUse() &&
18056 !N->getOperand(Num: 1).hasOneUse() &&
18057 !N->getOperand(Num: 2).hasOneUse()) {
18058
18059 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18060 SDNode *VCMPrecNode = nullptr;
18061
18062 SDNode *LHSN = N->getOperand(Num: 0).getNode();
18063 for (SDNode *User : LHSN->users())
18064 if (User->getOpcode() == PPCISD::VCMP_rec &&
18065 User->getOperand(Num: 1) == N->getOperand(Num: 1) &&
18066 User->getOperand(Num: 2) == N->getOperand(Num: 2) &&
18067 User->getOperand(Num: 0) == N->getOperand(Num: 0)) {
18068 VCMPrecNode = User;
18069 break;
18070 }
18071
18072 // If there is no VCMP_rec node, or if the flag value has a single use,
18073 // don't transform this.
18074 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(NUses: 0, Value: 1))
18075 break;
18076
18077 // Look at the (necessarily single) use of the flag value. If it has a
18078 // chain, this transformation is more complex. Note that multiple things
18079 // could use the value result, which we should ignore.
18080 SDNode *FlagUser = nullptr;
18081 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18082 FlagUser == nullptr; ++UI) {
18083 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18084 SDNode *User = UI->getUser();
18085 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18086 if (User->getOperand(Num: i) == SDValue(VCMPrecNode, 1)) {
18087 FlagUser = User;
18088 break;
18089 }
18090 }
18091 }
18092
18093 // If the user is a MFOCRF instruction, we know this is safe.
18094 // Otherwise we give up for right now.
18095 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18096 return SDValue(VCMPrecNode, 0);
18097 }
18098 break;
18099 case ISD::BR_CC: {
18100 // If this is a branch on an altivec predicate comparison, lower this so
18101 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18102 // lowering is done pre-legalize, because the legalizer lowers the predicate
18103 // compare down to code that is difficult to reassemble.
18104 // This code also handles branches that depend on the result of a store
18105 // conditional.
18106 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 1))->get();
18107 SDValue LHS = N->getOperand(Num: 2), RHS = N->getOperand(Num: 3);
18108
18109 int CompareOpc;
18110 bool isDot;
18111
18112 if (!isa<ConstantSDNode>(Val: RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18113 break;
18114
18115 // Since we are doing this pre-legalize, the RHS can be a constant of
18116 // arbitrary bitwidth which may cause issues when trying to get the value
18117 // from the underlying APInt.
18118 auto RHSAPInt = RHS->getAsAPIntVal();
18119 if (!RHSAPInt.isIntN(N: 64))
18120 break;
18121
18122 unsigned Val = RHSAPInt.getZExtValue();
18123 auto isImpossibleCompare = [&]() {
18124 // If this is a comparison against something other than 0/1, then we know
18125 // that the condition is never/always true.
18126 if (Val != 0 && Val != 1) {
18127 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18128 return N->getOperand(Num: 0);
18129 // Always !=, turn it into an unconditional branch.
18130 return DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other,
18131 N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 4));
18132 }
18133 return SDValue();
18134 };
18135 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18136 unsigned StoreWidth = 0;
18137 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18138 isStoreConditional(Intrin: LHS, StoreWidth)) {
18139 if (SDValue Impossible = isImpossibleCompare())
18140 return Impossible;
18141 PPC::Predicate CompOpc;
18142 // eq 0 => ne
18143 // ne 0 => eq
18144 // eq 1 => eq
18145 // ne 1 => ne
18146 if (Val == 0)
18147 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18148 else
18149 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18150
18151 SDValue Ops[] = {LHS.getOperand(i: 0), LHS.getOperand(i: 2), LHS.getOperand(i: 3),
18152 DAG.getConstant(Val: StoreWidth, DL: dl, VT: MVT::i32)};
18153 auto *MemNode = cast<MemSDNode>(Val&: LHS);
18154 SDValue ConstSt = DAG.getMemIntrinsicNode(
18155 Opcode: PPCISD::STORE_COND, dl,
18156 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other, VT3: MVT::Glue), Ops,
18157 MemVT: MemNode->getMemoryVT(), MMO: MemNode->getMemOperand());
18158
18159 SDValue InChain;
18160 // Unchain the branch from the original store conditional.
18161 if (N->getOperand(Num: 0) == LHS.getValue(R: 1))
18162 InChain = LHS.getOperand(i: 0);
18163 else if (N->getOperand(Num: 0).getOpcode() == ISD::TokenFactor) {
18164 SmallVector<SDValue, 4> InChains;
18165 SDValue InTF = N->getOperand(Num: 0);
18166 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18167 if (InTF.getOperand(i) != LHS.getValue(R: 1))
18168 InChains.push_back(Elt: InTF.getOperand(i));
18169 InChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: InChains);
18170 }
18171
18172 return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: InChain,
18173 N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
18174 N3: DAG.getRegister(Reg: PPC::CR0, VT: MVT::i32), N4: N->getOperand(Num: 4),
18175 N5: ConstSt.getValue(R: 2));
18176 }
18177
18178 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18179 getVectorCompareInfo(Intrin: LHS, CompareOpc, isDot, Subtarget)) {
18180 assert(isDot && "Can't compare against a vector result!");
18181
18182 if (SDValue Impossible = isImpossibleCompare())
18183 return Impossible;
18184
18185 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18186 // Create the PPCISD altivec 'dot' comparison node.
18187 SDValue Ops[] = {
18188 LHS.getOperand(i: 2), // LHS of compare
18189 LHS.getOperand(i: 3), // RHS of compare
18190 DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
18191 };
18192 EVT VTs[] = { LHS.getOperand(i: 2).getValueType(), MVT::Glue };
18193 SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
18194
18195 // Unpack the result based on how the target uses it.
18196 PPC::Predicate CompOpc;
18197 switch (LHS.getConstantOperandVal(i: 1)) {
18198 default: // Can't happen, don't crash on invalid number though.
18199 case 0: // Branch on the value of the EQ bit of CR6.
18200 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18201 break;
18202 case 1: // Branch on the inverted value of the EQ bit of CR6.
18203 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18204 break;
18205 case 2: // Branch on the value of the LT bit of CR6.
18206 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18207 break;
18208 case 3: // Branch on the inverted value of the LT bit of CR6.
18209 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18210 break;
18211 }
18212
18213 return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: N->getOperand(Num: 0),
18214 N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
18215 N3: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32),
18216 N4: N->getOperand(Num: 4), N5: CompNode.getValue(R: 1));
18217 }
18218 break;
18219 }
18220 case ISD::BUILD_VECTOR:
18221 return DAGCombineBuildVector(N, DCI);
18222 case PPCISD::ADDC:
18223 return DAGCombineAddc(N, DCI);
18224 }
18225
18226 return SDValue();
18227}
18228
18229SDValue
18230PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
18231 SelectionDAG &DAG,
18232 SmallVectorImpl<SDNode *> &Created) const {
18233 // fold (sdiv X, pow2)
18234 EVT VT = N->getValueType(ResNo: 0);
18235 if (VT == MVT::i64 && !Subtarget.isPPC64())
18236 return SDValue();
18237 if ((VT != MVT::i32 && VT != MVT::i64) ||
18238 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18239 return SDValue();
18240
18241 SDLoc DL(N);
18242 SDValue N0 = N->getOperand(Num: 0);
18243
18244 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18245 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18246 SDValue ShiftAmt = DAG.getConstant(Val: Lg2, DL, VT);
18247
18248 SDValue Op = DAG.getNode(Opcode: PPCISD::SRA_ADDZE, DL, VT, N1: N0, N2: ShiftAmt);
18249 Created.push_back(Elt: Op.getNode());
18250
18251 if (IsNegPow2) {
18252 Op = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
18253 Created.push_back(Elt: Op.getNode());
18254 }
18255
18256 return Op;
18257}
18258
18259//===----------------------------------------------------------------------===//
18260// Inline Assembly Support
18261//===----------------------------------------------------------------------===//
18262
18263void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
18264 KnownBits &Known,
18265 const APInt &DemandedElts,
18266 const SelectionDAG &DAG,
18267 unsigned Depth) const {
18268 Known.resetAll();
18269 switch (Op.getOpcode()) {
18270 default: break;
18271 case PPCISD::LBRX: {
18272 // lhbrx is known to have the top bits cleared out.
18273 if (cast<VTSDNode>(Val: Op.getOperand(i: 2))->getVT() == MVT::i16)
18274 Known.Zero = 0xFFFF0000;
18275 break;
18276 }
18277 case PPCISD::ADDE: {
18278 if (Op.getResNo() == 0) {
18279 // (0|1), _ = ADDE 0, 0, CARRY
18280 SDValue LHS = Op.getOperand(i: 0);
18281 SDValue RHS = Op.getOperand(i: 1);
18282 if (isNullConstant(V: LHS) && isNullConstant(V: RHS))
18283 Known.Zero = ~1ULL;
18284 }
18285 break;
18286 }
18287 case ISD::INTRINSIC_WO_CHAIN: {
18288 switch (Op.getConstantOperandVal(i: 0)) {
18289 default: break;
18290 case Intrinsic::ppc_altivec_vcmpbfp_p:
18291 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18292 case Intrinsic::ppc_altivec_vcmpequb_p:
18293 case Intrinsic::ppc_altivec_vcmpequh_p:
18294 case Intrinsic::ppc_altivec_vcmpequw_p:
18295 case Intrinsic::ppc_altivec_vcmpequd_p:
18296 case Intrinsic::ppc_altivec_vcmpequq_p:
18297 case Intrinsic::ppc_altivec_vcmpgefp_p:
18298 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18299 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18300 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18301 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18302 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18303 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18304 case Intrinsic::ppc_altivec_vcmpgtub_p:
18305 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18306 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18307 case Intrinsic::ppc_altivec_vcmpgtud_p:
18308 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18309 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18310 break;
18311 }
18312 break;
18313 }
18314 case ISD::INTRINSIC_W_CHAIN: {
18315 switch (Op.getConstantOperandVal(i: 1)) {
18316 default:
18317 break;
18318 case Intrinsic::ppc_load2r:
18319 // Top bits are cleared for load2r (which is the same as lhbrx).
18320 Known.Zero = 0xFFFF0000;
18321 break;
18322 }
18323 break;
18324 }
18325 }
18326}
18327
18328Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
18329 switch (Subtarget.getCPUDirective()) {
18330 default: break;
18331 case PPC::DIR_970:
18332 case PPC::DIR_PWR4:
18333 case PPC::DIR_PWR5:
18334 case PPC::DIR_PWR5X:
18335 case PPC::DIR_PWR6:
18336 case PPC::DIR_PWR6X:
18337 case PPC::DIR_PWR7:
18338 case PPC::DIR_PWR8:
18339 case PPC::DIR_PWR9:
18340 case PPC::DIR_PWR10:
18341 case PPC::DIR_PWR11:
18342 case PPC::DIR_PWR_FUTURE: {
18343 if (!ML)
18344 break;
18345
18346 if (!DisableInnermostLoopAlign32) {
18347 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18348 // so that we can decrease cache misses and branch-prediction misses.
18349 // Actual alignment of the loop will depend on the hotness check and other
18350 // logic in alignBlocks.
18351 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18352 return Align(32);
18353 }
18354
18355 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18356
18357 // For small loops (between 5 and 8 instructions), align to a 32-byte
18358 // boundary so that the entire loop fits in one instruction-cache line.
18359 uint64_t LoopSize = 0;
18360 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18361 for (const MachineInstr &J : **I) {
18362 LoopSize += TII->getInstSizeInBytes(MI: J);
18363 if (LoopSize > 32)
18364 break;
18365 }
18366
18367 if (LoopSize > 16 && LoopSize <= 32)
18368 return Align(32);
18369
18370 break;
18371 }
18372 }
18373
18374 return TargetLowering::getPrefLoopAlignment(ML);
18375}
18376
18377/// getConstraintType - Given a constraint, return the type of
18378/// constraint it is for this target.
18379PPCTargetLowering::ConstraintType
18380PPCTargetLowering::getConstraintType(StringRef Constraint) const {
18381 if (Constraint.size() == 1) {
18382 switch (Constraint[0]) {
18383 default: break;
18384 case 'b':
18385 case 'r':
18386 case 'f':
18387 case 'd':
18388 case 'v':
18389 case 'y':
18390 return C_RegisterClass;
18391 case 'Z':
18392 // FIXME: While Z does indicate a memory constraint, it specifically
18393 // indicates an r+r address (used in conjunction with the 'y' modifier
18394 // in the replacement string). Currently, we're forcing the base
18395 // register to be r0 in the asm printer (which is interpreted as zero)
18396 // and forming the complete address in the second register. This is
18397 // suboptimal.
18398 return C_Memory;
18399 }
18400 } else if (Constraint == "wc") { // individual CR bits.
18401 return C_RegisterClass;
18402 } else if (Constraint == "wa" || Constraint == "wd" ||
18403 Constraint == "wf" || Constraint == "ws" ||
18404 Constraint == "wi" || Constraint == "ww") {
18405 return C_RegisterClass; // VSX registers.
18406 }
18407 return TargetLowering::getConstraintType(Constraint);
18408}
18409
18410/// Examine constraint type and operand type and determine a weight value.
18411/// This object must already have been set up with the operand type
18412/// and the current alternative constraint selected.
18413TargetLowering::ConstraintWeight
18414PPCTargetLowering::getSingleConstraintMatchWeight(
18415 AsmOperandInfo &info, const char *constraint) const {
18416 ConstraintWeight weight = CW_Invalid;
18417 Value *CallOperandVal = info.CallOperandVal;
18418 // If we don't have a value, we can't do a match,
18419 // but allow it at the lowest weight.
18420 if (!CallOperandVal)
18421 return CW_Default;
18422 Type *type = CallOperandVal->getType();
18423
18424 // Look at the constraint type.
18425 if (StringRef(constraint) == "wc" && type->isIntegerTy(Bitwidth: 1))
18426 return CW_Register; // an individual CR bit.
18427 else if ((StringRef(constraint) == "wa" ||
18428 StringRef(constraint) == "wd" ||
18429 StringRef(constraint) == "wf") &&
18430 type->isVectorTy())
18431 return CW_Register;
18432 else if (StringRef(constraint) == "wi" && type->isIntegerTy(Bitwidth: 64))
18433 return CW_Register; // just hold 64-bit integers data.
18434 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18435 return CW_Register;
18436 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18437 return CW_Register;
18438
18439 switch (*constraint) {
18440 default:
18441 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
18442 break;
18443 case 'b':
18444 if (type->isIntegerTy())
18445 weight = CW_Register;
18446 break;
18447 case 'f':
18448 if (type->isFloatTy())
18449 weight = CW_Register;
18450 break;
18451 case 'd':
18452 if (type->isDoubleTy())
18453 weight = CW_Register;
18454 break;
18455 case 'v':
18456 if (type->isVectorTy())
18457 weight = CW_Register;
18458 break;
18459 case 'y':
18460 weight = CW_Register;
18461 break;
18462 case 'Z':
18463 weight = CW_Memory;
18464 break;
18465 }
18466 return weight;
18467}
18468
18469std::pair<unsigned, const TargetRegisterClass *>
18470PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
18471 StringRef Constraint,
18472 MVT VT) const {
18473 if (Constraint.size() == 1) {
18474 // GCC RS6000 Constraint Letters
18475 switch (Constraint[0]) {
18476 case 'b': // R1-R31
18477 if (VT == MVT::i64 && Subtarget.isPPC64())
18478 return std::make_pair(x: 0U, y: &PPC::G8RC_NOX0RegClass);
18479 return std::make_pair(x: 0U, y: &PPC::GPRC_NOR0RegClass);
18480 case 'r': // R0-R31
18481 if (VT == MVT::i64 && Subtarget.isPPC64())
18482 return std::make_pair(x: 0U, y: &PPC::G8RCRegClass);
18483 return std::make_pair(x: 0U, y: &PPC::GPRCRegClass);
18484 // 'd' and 'f' constraints are both defined to be "the floating point
18485 // registers", where one is for 32-bit and the other for 64-bit. We don't
18486 // really care overly much here so just give them all the same reg classes.
18487 case 'd':
18488 case 'f':
18489 if (Subtarget.hasSPE()) {
18490 if (VT == MVT::f32 || VT == MVT::i32)
18491 return std::make_pair(x: 0U, y: &PPC::GPRCRegClass);
18492 if (VT == MVT::f64 || VT == MVT::i64)
18493 return std::make_pair(x: 0U, y: &PPC::SPERCRegClass);
18494 } else {
18495 if (VT == MVT::f32 || VT == MVT::i32)
18496 return std::make_pair(x: 0U, y: &PPC::F4RCRegClass);
18497 if (VT == MVT::f64 || VT == MVT::i64)
18498 return std::make_pair(x: 0U, y: &PPC::F8RCRegClass);
18499 }
18500 break;
18501 case 'v':
18502 if (Subtarget.hasAltivec() && VT.isVector())
18503 return std::make_pair(x: 0U, y: &PPC::VRRCRegClass);
18504 else if (Subtarget.hasVSX())
18505 // Scalars in Altivec registers only make sense with VSX.
18506 return std::make_pair(x: 0U, y: &PPC::VFRCRegClass);
18507 break;
18508 case 'y': // crrc
18509 return std::make_pair(x: 0U, y: &PPC::CRRCRegClass);
18510 }
18511 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18512 // An individual CR bit.
18513 return std::make_pair(x: 0U, y: &PPC::CRBITRCRegClass);
18514 } else if ((Constraint == "wa" || Constraint == "wd" ||
18515 Constraint == "wf" || Constraint == "wi") &&
18516 Subtarget.hasVSX()) {
18517 // A VSX register for either a scalar (FP) or vector. There is no
18518 // support for single precision scalars on subtargets prior to Power8.
18519 if (VT.isVector())
18520 return std::make_pair(x: 0U, y: &PPC::VSRCRegClass);
18521 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18522 return std::make_pair(x: 0U, y: &PPC::VSSRCRegClass);
18523 return std::make_pair(x: 0U, y: &PPC::VSFRCRegClass);
18524 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18525 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18526 return std::make_pair(x: 0U, y: &PPC::VSSRCRegClass);
18527 else
18528 return std::make_pair(x: 0U, y: &PPC::VSFRCRegClass);
18529 } else if (Constraint == "lr") {
18530 if (VT == MVT::i64)
18531 return std::make_pair(x: 0U, y: &PPC::LR8RCRegClass);
18532 else
18533 return std::make_pair(x: 0U, y: &PPC::LRRCRegClass);
18534 }
18535
18536 // Handle special cases of physical registers that are not properly handled
18537 // by the base class.
18538 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18539 // If we name a VSX register, we can't defer to the base class because it
18540 // will not recognize the correct register (their names will be VSL{0-31}
18541 // and V{0-31} so they won't match). So we match them here.
18542 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18543 int VSNum = atoi(nptr: Constraint.data() + 3);
18544 assert(VSNum >= 0 && VSNum <= 63 &&
18545 "Attempted to access a vsr out of range");
18546 if (VSNum < 32)
18547 return std::make_pair(x: PPC::VSL0 + VSNum, y: &PPC::VSRCRegClass);
18548 return std::make_pair(x: PPC::V0 + VSNum - 32, y: &PPC::VSRCRegClass);
18549 }
18550
18551 // For float registers, we can't defer to the base class as it will match
18552 // the SPILLTOVSRRC class.
18553 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18554 int RegNum = atoi(nptr: Constraint.data() + 2);
18555 if (RegNum > 31 || RegNum < 0)
18556 report_fatal_error(reason: "Invalid floating point register number");
18557 if (VT == MVT::f32 || VT == MVT::i32)
18558 return Subtarget.hasSPE()
18559 ? std::make_pair(x: PPC::R0 + RegNum, y: &PPC::GPRCRegClass)
18560 : std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F4RCRegClass);
18561 if (VT == MVT::f64 || VT == MVT::i64)
18562 return Subtarget.hasSPE()
18563 ? std::make_pair(x: PPC::S0 + RegNum, y: &PPC::SPERCRegClass)
18564 : std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F8RCRegClass);
18565 }
18566 }
18567
18568 std::pair<unsigned, const TargetRegisterClass *> R =
18569 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18570
18571 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18572 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18573 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18574 // register.
18575 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18576 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18577 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18578 PPC::GPRCRegClass.contains(Reg: R.first))
18579 return std::make_pair(x: TRI->getMatchingSuperReg(Reg: R.first,
18580 SubIdx: PPC::sub_32, RC: &PPC::G8RCRegClass),
18581 y: &PPC::G8RCRegClass);
18582
18583 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18584 if (!R.second && StringRef("{cc}").equals_insensitive(RHS: Constraint)) {
18585 R.first = PPC::CR0;
18586 R.second = &PPC::CRRCRegClass;
18587 }
18588 // FIXME: This warning should ideally be emitted in the front end.
18589 const auto &TM = getTargetMachine();
18590 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18591 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18592 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18593 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18594 errs() << "warning: vector registers 20 to 32 are reserved in the "
18595 "default AIX AltiVec ABI and cannot be used\n";
18596 }
18597
18598 return R;
18599}
18600
18601/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18602/// vector. If it is invalid, don't add anything to Ops.
18603void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
18604 StringRef Constraint,
18605 std::vector<SDValue> &Ops,
18606 SelectionDAG &DAG) const {
18607 SDValue Result;
18608
18609 // Only support length 1 constraints.
18610 if (Constraint.size() > 1)
18611 return;
18612
18613 char Letter = Constraint[0];
18614 switch (Letter) {
18615 default: break;
18616 case 'I':
18617 case 'J':
18618 case 'K':
18619 case 'L':
18620 case 'M':
18621 case 'N':
18622 case 'O':
18623 case 'P': {
18624 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Val&: Op);
18625 if (!CST) return; // Must be an immediate to match.
18626 SDLoc dl(Op);
18627 int64_t Value = CST->getSExtValue();
18628 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18629 // numbers are printed as such.
18630 switch (Letter) {
18631 default: llvm_unreachable("Unknown constraint letter!");
18632 case 'I': // "I" is a signed 16-bit constant.
18633 if (isInt<16>(x: Value))
18634 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18635 break;
18636 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18637 if (isShiftedUInt<16, 16>(x: Value))
18638 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18639 break;
18640 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18641 if (isShiftedInt<16, 16>(x: Value))
18642 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18643 break;
18644 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18645 if (isUInt<16>(x: Value))
18646 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18647 break;
18648 case 'M': // "M" is a constant that is greater than 31.
18649 if (Value > 31)
18650 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18651 break;
18652 case 'N': // "N" is a positive constant that is an exact power of two.
18653 if (Value > 0 && isPowerOf2_64(Value))
18654 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18655 break;
18656 case 'O': // "O" is the constant zero.
18657 if (Value == 0)
18658 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18659 break;
18660 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18661 if (isInt<16>(x: -Value))
18662 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18663 break;
18664 }
18665 break;
18666 }
18667 }
18668
18669 if (Result.getNode()) {
18670 Ops.push_back(x: Result);
18671 return;
18672 }
18673
18674 // Handle standard constraint letters.
18675 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
18676}
18677
18678void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
18679 SmallVectorImpl<SDValue> &Ops,
18680 SelectionDAG &DAG) const {
18681 if (I.getNumOperands() <= 1)
18682 return;
18683 if (!isa<ConstantSDNode>(Val: Ops[1].getNode()))
18684 return;
18685 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18686 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18687 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18688 return;
18689
18690 if (MDNode *MDN = I.getMetadata(KindID: LLVMContext::MD_annotation))
18691 Ops.push_back(Elt: DAG.getMDNode(MD: MDN));
18692}
18693
18694// isLegalAddressingMode - Return true if the addressing mode represented
18695// by AM is legal for this target, for a load/store of the specified type.
18696bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
18697 const AddrMode &AM, Type *Ty,
18698 unsigned AS,
18699 Instruction *I) const {
18700 // Vector type r+i form is supported since power9 as DQ form. We don't check
18701 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18702 // imm form is preferred and the offset can be adjusted to use imm form later
18703 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18704 // max offset to check legal addressing mode, we should be a little aggressive
18705 // to contain other offsets for that LSRUse.
18706 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18707 return false;
18708
18709 // PPC allows a sign-extended 16-bit immediate field.
18710 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18711 return false;
18712
18713 // No global is ever allowed as a base.
18714 if (AM.BaseGV)
18715 return false;
18716
18717 // PPC only support r+r,
18718 switch (AM.Scale) {
18719 case 0: // "r+i" or just "i", depending on HasBaseReg.
18720 break;
18721 case 1:
18722 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18723 return false;
18724 // Otherwise we have r+r or r+i.
18725 break;
18726 case 2:
18727 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18728 return false;
18729 // Allow 2*r as r+r.
18730 break;
18731 default:
18732 // No other scales are supported.
18733 return false;
18734 }
18735
18736 return true;
18737}
18738
18739SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18740 SelectionDAG &DAG) const {
18741 MachineFunction &MF = DAG.getMachineFunction();
18742 MachineFrameInfo &MFI = MF.getFrameInfo();
18743 MFI.setReturnAddressIsTaken(true);
18744
18745 SDLoc dl(Op);
18746 unsigned Depth = Op.getConstantOperandVal(i: 0);
18747
18748 // Make sure the function does not optimize away the store of the RA to
18749 // the stack.
18750 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18751 FuncInfo->setLRStoreRequired();
18752 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
18753
18754 if (Depth > 0) {
18755 // The link register (return address) is saved in the caller's frame
18756 // not the callee's stack frame. So we must get the caller's frame
18757 // address and load the return address at the LR offset from there.
18758 SDValue FrameAddr =
18759 DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
18760 Ptr: LowerFRAMEADDR(Op, DAG), PtrInfo: MachinePointerInfo());
18761 SDValue Offset =
18762 DAG.getConstant(Val: Subtarget.getFrameLowering()->getReturnSaveOffset(), DL: dl,
18763 VT: Subtarget.getScalarIntVT());
18764 return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(),
18765 Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FrameAddr, N2: Offset),
18766 PtrInfo: MachinePointerInfo());
18767 }
18768
18769 // Just load the return address off the stack.
18770 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18771 return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: RetAddrFI,
18772 PtrInfo: MachinePointerInfo());
18773}
18774
18775SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18776 SelectionDAG &DAG) const {
18777 SDLoc dl(Op);
18778 unsigned Depth = Op.getConstantOperandVal(i: 0);
18779
18780 MachineFunction &MF = DAG.getMachineFunction();
18781 MachineFrameInfo &MFI = MF.getFrameInfo();
18782 MFI.setFrameAddressIsTaken(true);
18783
18784 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
18785 bool isPPC64 = PtrVT == MVT::i64;
18786
18787 // Naked functions never have a frame pointer, and so we use r1. For all
18788 // other functions, this decision must be delayed until during PEI.
18789 unsigned FrameReg;
18790 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
18791 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18792 else
18793 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18794
18795 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: FrameReg,
18796 VT: PtrVT);
18797 while (Depth--)
18798 FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
18799 Ptr: FrameAddr, PtrInfo: MachinePointerInfo());
18800 return FrameAddr;
18801}
18802
18803#define GET_REGISTER_MATCHER
18804#include "PPCGenAsmMatcher.inc"
18805
18806Register PPCTargetLowering::getRegisterByName(const char *RegName, LLT VT,
18807 const MachineFunction &MF) const {
18808 bool IsPPC64 = Subtarget.isPPC64();
18809
18810 bool Is64Bit = IsPPC64 && VT == LLT::scalar(SizeInBits: 64);
18811 if (!Is64Bit && VT != LLT::scalar(SizeInBits: 32))
18812 report_fatal_error(reason: "Invalid register global variable type");
18813
18814 Register Reg = MatchRegisterName(Name: RegName);
18815 if (!Reg)
18816 return Reg;
18817
18818 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18819 // Need followup investigation as to why.
18820 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18821 report_fatal_error(reason: Twine("Trying to reserve an invalid register \"" +
18822 StringRef(RegName) + "\"."));
18823
18824 // Convert GPR to GP8R register for 64bit.
18825 if (Is64Bit && StringRef(RegName).starts_with_insensitive(Prefix: "r"))
18826 Reg = Reg.id() - PPC::R0 + PPC::X0;
18827
18828 return Reg;
18829}
18830
18831bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
18832 // 32-bit SVR4 ABI access everything as got-indirect.
18833 if (Subtarget.is32BitELFABI())
18834 return true;
18835
18836 // AIX accesses everything indirectly through the TOC, which is similar to
18837 // the GOT.
18838 if (Subtarget.isAIXABI())
18839 return true;
18840
18841 CodeModel::Model CModel = getTargetMachine().getCodeModel();
18842 // If it is small or large code model, module locals are accessed
18843 // indirectly by loading their address from .toc/.got.
18844 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18845 return true;
18846
18847 // JumpTable and BlockAddress are accessed as got-indirect.
18848 if (isa<JumpTableSDNode>(Val: GA) || isa<BlockAddressSDNode>(Val: GA))
18849 return true;
18850
18851 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: GA))
18852 return Subtarget.isGVIndirectSymbol(GV: G->getGlobal());
18853
18854 return false;
18855}
18856
18857bool
18858PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
18859 // The PowerPC target isn't yet aware of offsets.
18860 return false;
18861}
18862
18863void PPCTargetLowering::getTgtMemIntrinsic(
18864 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
18865 MachineFunction &MF, unsigned Intrinsic) const {
18866 IntrinsicInfo Info;
18867 switch (Intrinsic) {
18868 case Intrinsic::ppc_atomicrmw_xchg_i128:
18869 case Intrinsic::ppc_atomicrmw_add_i128:
18870 case Intrinsic::ppc_atomicrmw_sub_i128:
18871 case Intrinsic::ppc_atomicrmw_nand_i128:
18872 case Intrinsic::ppc_atomicrmw_and_i128:
18873 case Intrinsic::ppc_atomicrmw_or_i128:
18874 case Intrinsic::ppc_atomicrmw_xor_i128:
18875 case Intrinsic::ppc_cmpxchg_i128:
18876 Info.opc = ISD::INTRINSIC_W_CHAIN;
18877 Info.memVT = MVT::i128;
18878 Info.ptrVal = I.getArgOperand(i: 0);
18879 Info.offset = 0;
18880 Info.align = Align(16);
18881 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
18882 MachineMemOperand::MOVolatile;
18883 Infos.push_back(Elt: Info);
18884 return;
18885 case Intrinsic::ppc_atomic_load_i128:
18886 Info.opc = ISD::INTRINSIC_W_CHAIN;
18887 Info.memVT = MVT::i128;
18888 Info.ptrVal = I.getArgOperand(i: 0);
18889 Info.offset = 0;
18890 Info.align = Align(16);
18891 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
18892 Infos.push_back(Elt: Info);
18893 return;
18894 case Intrinsic::ppc_atomic_store_i128:
18895 Info.opc = ISD::INTRINSIC_VOID;
18896 Info.memVT = MVT::i128;
18897 Info.ptrVal = I.getArgOperand(i: 2);
18898 Info.offset = 0;
18899 Info.align = Align(16);
18900 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
18901 Infos.push_back(Elt: Info);
18902 return;
18903 case Intrinsic::ppc_altivec_lvx:
18904 case Intrinsic::ppc_altivec_lvxl:
18905 case Intrinsic::ppc_altivec_lvebx:
18906 case Intrinsic::ppc_altivec_lvehx:
18907 case Intrinsic::ppc_altivec_lvewx:
18908 case Intrinsic::ppc_vsx_lxvd2x:
18909 case Intrinsic::ppc_vsx_lxvw4x:
18910 case Intrinsic::ppc_vsx_lxvd2x_be:
18911 case Intrinsic::ppc_vsx_lxvw4x_be:
18912 case Intrinsic::ppc_vsx_lxvl:
18913 case Intrinsic::ppc_vsx_lxvll: {
18914 EVT VT;
18915 switch (Intrinsic) {
18916 case Intrinsic::ppc_altivec_lvebx:
18917 VT = MVT::i8;
18918 break;
18919 case Intrinsic::ppc_altivec_lvehx:
18920 VT = MVT::i16;
18921 break;
18922 case Intrinsic::ppc_altivec_lvewx:
18923 VT = MVT::i32;
18924 break;
18925 case Intrinsic::ppc_vsx_lxvd2x:
18926 case Intrinsic::ppc_vsx_lxvd2x_be:
18927 VT = MVT::v2f64;
18928 break;
18929 default:
18930 VT = MVT::v4i32;
18931 break;
18932 }
18933
18934 Info.opc = ISD::INTRINSIC_W_CHAIN;
18935 Info.memVT = VT;
18936 Info.ptrVal = I.getArgOperand(i: 0);
18937 Info.offset = -VT.getStoreSize()+1;
18938 Info.size = 2*VT.getStoreSize()-1;
18939 Info.align = Align(1);
18940 Info.flags = MachineMemOperand::MOLoad;
18941 Infos.push_back(Elt: Info);
18942 return;
18943 }
18944 case Intrinsic::ppc_altivec_stvx:
18945 case Intrinsic::ppc_altivec_stvxl:
18946 case Intrinsic::ppc_altivec_stvebx:
18947 case Intrinsic::ppc_altivec_stvehx:
18948 case Intrinsic::ppc_altivec_stvewx:
18949 case Intrinsic::ppc_vsx_stxvd2x:
18950 case Intrinsic::ppc_vsx_stxvw4x:
18951 case Intrinsic::ppc_vsx_stxvd2x_be:
18952 case Intrinsic::ppc_vsx_stxvw4x_be:
18953 case Intrinsic::ppc_vsx_stxvl:
18954 case Intrinsic::ppc_vsx_stxvll: {
18955 EVT VT;
18956 switch (Intrinsic) {
18957 case Intrinsic::ppc_altivec_stvebx:
18958 VT = MVT::i8;
18959 break;
18960 case Intrinsic::ppc_altivec_stvehx:
18961 VT = MVT::i16;
18962 break;
18963 case Intrinsic::ppc_altivec_stvewx:
18964 VT = MVT::i32;
18965 break;
18966 case Intrinsic::ppc_vsx_stxvd2x:
18967 case Intrinsic::ppc_vsx_stxvd2x_be:
18968 VT = MVT::v2f64;
18969 break;
18970 default:
18971 VT = MVT::v4i32;
18972 break;
18973 }
18974
18975 Info.opc = ISD::INTRINSIC_VOID;
18976 Info.memVT = VT;
18977 Info.ptrVal = I.getArgOperand(i: 1);
18978 Info.offset = -VT.getStoreSize()+1;
18979 Info.size = 2*VT.getStoreSize()-1;
18980 Info.align = Align(1);
18981 Info.flags = MachineMemOperand::MOStore;
18982 Infos.push_back(Elt: Info);
18983 return;
18984 }
18985 case Intrinsic::ppc_stdcx:
18986 case Intrinsic::ppc_stwcx:
18987 case Intrinsic::ppc_sthcx:
18988 case Intrinsic::ppc_stbcx: {
18989 EVT VT;
18990 auto Alignment = Align(8);
18991 switch (Intrinsic) {
18992 case Intrinsic::ppc_stdcx:
18993 VT = MVT::i64;
18994 break;
18995 case Intrinsic::ppc_stwcx:
18996 VT = MVT::i32;
18997 Alignment = Align(4);
18998 break;
18999 case Intrinsic::ppc_sthcx:
19000 VT = MVT::i16;
19001 Alignment = Align(2);
19002 break;
19003 case Intrinsic::ppc_stbcx:
19004 VT = MVT::i8;
19005 Alignment = Align(1);
19006 break;
19007 }
19008 Info.opc = ISD::INTRINSIC_W_CHAIN;
19009 Info.memVT = VT;
19010 Info.ptrVal = I.getArgOperand(i: 0);
19011 Info.offset = 0;
19012 Info.align = Alignment;
19013 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
19014 Infos.push_back(Elt: Info);
19015 return;
19016 }
19017 default:
19018 break;
19019 }
19020}
19021
19022/// It returns EVT::Other if the type should be determined using generic
19023/// target-independent logic.
19024EVT PPCTargetLowering::getOptimalMemOpType(
19025 LLVMContext &Context, const MemOp &Op,
19026 const AttributeList &FuncAttributes) const {
19027 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19028 // We should use Altivec/VSX loads and stores when available. For unaligned
19029 // addresses, unaligned VSX loads are only fast starting with the P8.
19030 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19031 if (Op.isMemset() && Subtarget.hasVSX()) {
19032 uint64_t TailSize = Op.size() % 16;
19033 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19034 // element if vector element type matches tail store. For tail size
19035 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19036 if (TailSize > 2 && TailSize <= 4) {
19037 return MVT::v8i16;
19038 }
19039 return MVT::v4i32;
19040 }
19041 if (Op.isAligned(AlignCheck: Align(16)) || Subtarget.hasP8Vector())
19042 return MVT::v4i32;
19043 }
19044 }
19045
19046 if (Subtarget.isPPC64()) {
19047 return MVT::i64;
19048 }
19049
19050 return MVT::i32;
19051}
19052
19053/// Returns true if it is beneficial to convert a load of a constant
19054/// to just the constant itself.
19055bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
19056 Type *Ty) const {
19057 assert(Ty->isIntegerTy());
19058
19059 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19060 return !(BitSize == 0 || BitSize > 64);
19061}
19062
19063bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
19064 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19065 return false;
19066 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19067 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19068 return NumBits1 == 64 && NumBits2 == 32;
19069}
19070
19071bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
19072 if (!VT1.isInteger() || !VT2.isInteger())
19073 return false;
19074 unsigned NumBits1 = VT1.getSizeInBits();
19075 unsigned NumBits2 = VT2.getSizeInBits();
19076 return NumBits1 == 64 && NumBits2 == 32;
19077}
19078
19079bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
19080 // Generally speaking, zexts are not free, but they are free when they can be
19081 // folded with other operations.
19082 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19083 EVT MemVT = LD->getMemoryVT();
19084 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19085 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19086 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19087 LD->getExtensionType() == ISD::ZEXTLOAD))
19088 return true;
19089 }
19090
19091 // FIXME: Add other cases...
19092 // - 32-bit shifts with a zext to i64
19093 // - zext after ctlz, bswap, etc.
19094 // - zext after and by a constant mask
19095
19096 return TargetLowering::isZExtFree(Val, VT2);
19097}
19098
19099bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19100 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19101 "invalid fpext types");
19102 // Extending to float128 is not free.
19103 if (DestVT == MVT::f128)
19104 return false;
19105 return true;
19106}
19107
19108bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
19109 return isInt<16>(x: Imm) || isUInt<16>(x: Imm);
19110}
19111
19112bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
19113 return isInt<16>(x: Imm) || isUInt<16>(x: Imm);
19114}
19115
19116bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
19117 MachineMemOperand::Flags,
19118 unsigned *Fast) const {
19119 if (DisablePPCUnaligned)
19120 return false;
19121
19122 // PowerPC supports unaligned memory access for simple non-vector types.
19123 // Although accessing unaligned addresses is not as efficient as accessing
19124 // aligned addresses, it is generally more efficient than manual expansion,
19125 // and generally only traps for software emulation when crossing page
19126 // boundaries.
19127
19128 if (!VT.isSimple())
19129 return false;
19130
19131 if (VT.isFloatingPoint() && !VT.isVector() &&
19132 !Subtarget.allowsUnalignedFPAccess())
19133 return false;
19134
19135 if (VT.getSimpleVT().isVector()) {
19136 if (Subtarget.hasVSX()) {
19137 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19138 VT != MVT::v4f32 && VT != MVT::v4i32)
19139 return false;
19140 } else {
19141 return false;
19142 }
19143 }
19144
19145 if (VT == MVT::ppcf128)
19146 return false;
19147
19148 if (Fast)
19149 *Fast = 1;
19150
19151 return true;
19152}
19153
19154bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
19155 SDValue C) const {
19156 // Check integral scalar types.
19157 if (!VT.isScalarInteger())
19158 return false;
19159 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
19160 if (!ConstNode->getAPIntValue().isSignedIntN(N: 64))
19161 return false;
19162 // This transformation will generate >= 2 operations. But the following
19163 // cases will generate <= 2 instructions during ISEL. So exclude them.
19164 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19165 // HW instruction, ie. MULLI
19166 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19167 // instruction is needed than case 1, ie. MULLI and RLDICR
19168 int64_t Imm = ConstNode->getSExtValue();
19169 unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
19170 Imm >>= Shift;
19171 if (isInt<16>(x: Imm))
19172 return false;
19173 uint64_t UImm = static_cast<uint64_t>(Imm);
19174 if (isPowerOf2_64(Value: UImm + 1) || isPowerOf2_64(Value: UImm - 1) ||
19175 isPowerOf2_64(Value: 1 - UImm) || isPowerOf2_64(Value: -1 - UImm))
19176 return true;
19177 }
19178 return false;
19179}
19180
19181bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19182 EVT VT) const {
19183 return isFMAFasterThanFMulAndFAdd(
19184 F: MF.getFunction(), Ty: VT.getTypeForEVT(Context&: MF.getFunction().getContext()));
19185}
19186
19187bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
19188 Type *Ty) const {
19189 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19190 return false;
19191 switch (Ty->getScalarType()->getTypeID()) {
19192 case Type::FloatTyID:
19193 case Type::DoubleTyID:
19194 return true;
19195 case Type::FP128TyID:
19196 return Subtarget.hasP9Vector();
19197 default:
19198 return false;
19199 }
19200}
19201
19202// FIXME: add more patterns which are not profitable to hoist.
19203bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
19204 if (!I->hasOneUse())
19205 return true;
19206
19207 Instruction *User = I->user_back();
19208 assert(User && "A single use instruction with no uses.");
19209
19210 switch (I->getOpcode()) {
19211 case Instruction::FMul: {
19212 // Don't break FMA, PowerPC prefers FMA.
19213 if (User->getOpcode() != Instruction::FSub &&
19214 User->getOpcode() != Instruction::FAdd)
19215 return true;
19216
19217 const TargetOptions &Options = getTargetMachine().Options;
19218 const Function *F = I->getFunction();
19219 const DataLayout &DL = F->getDataLayout();
19220 Type *Ty = User->getOperand(i: 0)->getType();
19221 bool AllowContract = I->getFastMathFlags().allowContract() &&
19222 User->getFastMathFlags().allowContract();
19223
19224 return !(isFMAFasterThanFMulAndFAdd(F: *F, Ty) &&
19225 isOperationLegalOrCustom(Op: ISD::FMA, VT: getValueType(DL, Ty)) &&
19226 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19227 }
19228 case Instruction::Load: {
19229 // Don't break "store (load float*)" pattern, this pattern will be combined
19230 // to "store (load int32)" in later InstCombine pass. See function
19231 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19232 // cycles than loading a 32 bit integer.
19233 LoadInst *LI = cast<LoadInst>(Val: I);
19234 // For the loads that combineLoadToOperationType does nothing, like
19235 // ordered load, it should be profitable to hoist them.
19236 // For swifterror load, it can only be used for pointer to pointer type, so
19237 // later type check should get rid of this case.
19238 if (!LI->isUnordered())
19239 return true;
19240
19241 if (User->getOpcode() != Instruction::Store)
19242 return true;
19243
19244 if (I->getType()->getTypeID() != Type::FloatTyID)
19245 return true;
19246
19247 return false;
19248 }
19249 default:
19250 return true;
19251 }
19252 return true;
19253}
19254
19255const MCPhysReg *
19256PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
19257 // LR is a callee-save register, but we must treat it as clobbered by any call
19258 // site. Hence we include LR in the scratch registers, which are in turn added
19259 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19260 // to CTR, which is used by any indirect call.
19261 static const MCPhysReg ScratchRegs[] = {
19262 PPC::X12, PPC::LR8, PPC::CTR8, 0
19263 };
19264
19265 return ScratchRegs;
19266}
19267
19268Register PPCTargetLowering::getExceptionPointerRegister(
19269 const Constant *PersonalityFn) const {
19270 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19271}
19272
19273Register PPCTargetLowering::getExceptionSelectorRegister(
19274 const Constant *PersonalityFn) const {
19275 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19276}
19277
19278bool
19279PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
19280 EVT VT , unsigned DefinedValues) const {
19281 if (VT == MVT::v2i64)
19282 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19283
19284 if (Subtarget.hasVSX())
19285 return true;
19286
19287 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
19288}
19289
19290Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
19291 if (DisableILPPref || Subtarget.enableMachineScheduler())
19292 return TargetLowering::getSchedulingPreference(N);
19293
19294 return Sched::ILP;
19295}
19296
19297// Create a fast isel object.
19298FastISel *PPCTargetLowering::createFastISel(
19299 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19300 const LibcallLoweringInfo *LibcallLowering) const {
19301 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19302}
19303
19304// 'Inverted' means the FMA opcode after negating one multiplicand.
19305// For example, (fma -a b c) = (fnmsub a b c)
19306static unsigned invertFMAOpcode(unsigned Opc) {
19307 switch (Opc) {
19308 default:
19309 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19310 case ISD::FMA:
19311 return PPCISD::FNMSUB;
19312 case PPCISD::FNMSUB:
19313 return ISD::FMA;
19314 }
19315}
19316
19317SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
19318 bool LegalOps, bool OptForSize,
19319 NegatibleCost &Cost,
19320 unsigned Depth) const {
19321 if (Depth > SelectionDAG::MaxRecursionDepth)
19322 return SDValue();
19323
19324 unsigned Opc = Op.getOpcode();
19325 EVT VT = Op.getValueType();
19326 SDNodeFlags Flags = Op.getNode()->getFlags();
19327
19328 switch (Opc) {
19329 case PPCISD::FNMSUB:
19330 if (!Op.hasOneUse() || !isTypeLegal(VT))
19331 break;
19332
19333 SDValue N0 = Op.getOperand(i: 0);
19334 SDValue N1 = Op.getOperand(i: 1);
19335 SDValue N2 = Op.getOperand(i: 2);
19336 SDLoc Loc(Op);
19337
19338 NegatibleCost N2Cost = NegatibleCost::Expensive;
19339 SDValue NegN2 =
19340 getNegatedExpression(Op: N2, DAG, LegalOps, OptForSize, Cost&: N2Cost, Depth: Depth + 1);
19341
19342 if (!NegN2)
19343 return SDValue();
19344
19345 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19346 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19347 // These transformations may change sign of zeroes. For example,
19348 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19349 if (Flags.hasNoSignedZeros()) {
19350 // Try and choose the cheaper one to negate.
19351 NegatibleCost N0Cost = NegatibleCost::Expensive;
19352 SDValue NegN0 = getNegatedExpression(Op: N0, DAG, LegalOps, OptForSize,
19353 Cost&: N0Cost, Depth: Depth + 1);
19354
19355 NegatibleCost N1Cost = NegatibleCost::Expensive;
19356 SDValue NegN1 = getNegatedExpression(Op: N1, DAG, LegalOps, OptForSize,
19357 Cost&: N1Cost, Depth: Depth + 1);
19358
19359 if (NegN0 && N0Cost <= N1Cost) {
19360 Cost = std::min(a: N0Cost, b: N2Cost);
19361 return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: NegN0, N2: N1, N3: NegN2, Flags);
19362 } else if (NegN1) {
19363 Cost = std::min(a: N1Cost, b: N2Cost);
19364 return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: N0, N2: NegN1, N3: NegN2, Flags);
19365 }
19366 }
19367
19368 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19369 if (isOperationLegal(Op: ISD::FMA, VT)) {
19370 Cost = N2Cost;
19371 return DAG.getNode(Opcode: ISD::FMA, DL: Loc, VT, N1: N0, N2: N1, N3: NegN2, Flags);
19372 }
19373
19374 break;
19375 }
19376
19377 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19378 Cost, Depth);
19379}
19380
19381// Override to enable LOAD_STACK_GUARD lowering on Linux.
19382bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const {
19383 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19384 return true;
19385 return TargetLowering::useLoadStackGuardNode(M);
19386}
19387
19388bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
19389 bool ForCodeSize) const {
19390 if (!VT.isSimple() || !Subtarget.hasVSX())
19391 return false;
19392
19393 switch(VT.getSimpleVT().SimpleTy) {
19394 default:
19395 // For FP types that are currently not supported by PPC backend, return
19396 // false. Examples: f16, f80.
19397 return false;
19398 case MVT::f32:
19399 case MVT::f64: {
19400 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19401 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19402 return true;
19403 }
19404 bool IsExact;
19405 APSInt IntResult(16, false);
19406 // The rounding mode doesn't really matter because we only care about floats
19407 // that can be converted to integers exactly.
19408 Imm.convertToInteger(Result&: IntResult, RM: APFloat::rmTowardZero, IsExact: &IsExact);
19409 // For exact values in the range [-16, 15] we can materialize the float.
19410 if (IsExact && IntResult <= 15 && IntResult >= -16)
19411 return true;
19412 return Imm.isZero();
19413 }
19414 case MVT::ppcf128:
19415 return Imm.isPosZero();
19416 }
19417}
19418
19419// For vector shift operation op, fold
19420// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19421static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
19422 SelectionDAG &DAG) {
19423 SDValue N0 = N->getOperand(Num: 0);
19424 SDValue N1 = N->getOperand(Num: 1);
19425 EVT VT = N0.getValueType();
19426 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19427 unsigned Opcode = N->getOpcode();
19428 unsigned TargetOpcode;
19429
19430 switch (Opcode) {
19431 default:
19432 llvm_unreachable("Unexpected shift operation");
19433 case ISD::SHL:
19434 TargetOpcode = PPCISD::SHL;
19435 break;
19436 case ISD::SRL:
19437 TargetOpcode = PPCISD::SRL;
19438 break;
19439 case ISD::SRA:
19440 TargetOpcode = PPCISD::SRA;
19441 break;
19442 }
19443
19444 if (VT.isVector() && TLI.isOperationLegal(Op: Opcode, VT) &&
19445 N1->getOpcode() == ISD::AND)
19446 if (ConstantSDNode *Mask = isConstOrConstSplat(N: N1->getOperand(Num: 1)))
19447 if (Mask->getZExtValue() == OpSizeInBits - 1)
19448 return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc(N), VT, N1: N0, N2: N1->getOperand(Num: 0));
19449
19450 return SDValue();
19451}
19452
19453SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19454 DAGCombinerInfo &DCI) const {
19455 EVT VT = N->getValueType(ResNo: 0);
19456 assert(VT.isVector() && "Vector type expected.");
19457
19458 unsigned Opc = N->getOpcode();
19459 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19460 "Unexpected opcode.");
19461
19462 if (!isOperationLegal(Op: Opc, VT))
19463 return SDValue();
19464
19465 EVT EltTy = VT.getScalarType();
19466 unsigned EltBits = EltTy.getSizeInBits();
19467 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19468 return SDValue();
19469
19470 SDValue N1 = N->getOperand(Num: 1);
19471 uint64_t SplatBits = 0;
19472 bool AddSplatCase = false;
19473 unsigned OpcN1 = N1.getOpcode();
19474 if (OpcN1 == PPCISD::VADD_SPLAT &&
19475 N1.getConstantOperandVal(i: 1) == VT.getVectorNumElements()) {
19476 AddSplatCase = true;
19477 SplatBits = N1.getConstantOperandVal(i: 0);
19478 }
19479
19480 if (!AddSplatCase) {
19481 if (OpcN1 != ISD::BUILD_VECTOR)
19482 return SDValue();
19483
19484 unsigned SplatBitSize;
19485 bool HasAnyUndefs;
19486 APInt APSplatBits, APSplatUndef;
19487 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Val&: N1);
19488 bool BVNIsConstantSplat =
19489 BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
19490 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian());
19491 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19492 return SDValue();
19493 SplatBits = APSplatBits.getZExtValue();
19494 }
19495
19496 SDLoc DL(N);
19497 SDValue N0 = N->getOperand(Num: 0);
19498 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19499 // shift vector, which means the max value is 31/63. A shift vector of all
19500 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19501 // -16 to 15 range.
19502 if (SplatBits == (EltBits - 1)) {
19503 unsigned NewOpc;
19504 switch (Opc) {
19505 case ISD::SHL:
19506 NewOpc = PPCISD::SHL;
19507 break;
19508 case ISD::SRL:
19509 NewOpc = PPCISD::SRL;
19510 break;
19511 case ISD::SRA:
19512 NewOpc = PPCISD::SRA;
19513 break;
19514 }
19515 SDValue SplatOnes = getCanonicalConstSplat(Val: 255, SplatSize: 1, VT, DAG&: DCI.DAG, dl: DL);
19516 return DCI.DAG.getNode(Opcode: NewOpc, DL, VT, N1: N0, N2: SplatOnes);
19517 }
19518
19519 if (Opc != ISD::SHL || !isOperationLegal(Op: ISD::ADD, VT))
19520 return SDValue();
19521
19522 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19523 // before the BUILD_VECTOR is replaced by a load.
19524 if (EltTy != MVT::i64 || SplatBits != 1)
19525 return SDValue();
19526
19527 return DCI.DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT, N1: N0, N2: N0);
19528}
19529
19530SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19531 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19532 return Value;
19533
19534 if (N->getValueType(ResNo: 0).isVector())
19535 return combineVectorShift(N, DCI);
19536
19537 SDValue N0 = N->getOperand(Num: 0);
19538 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
19539 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19540 N0.getOpcode() != ISD::SIGN_EXTEND ||
19541 N0.getOperand(i: 0).getValueType() != MVT::i32 || CN1 == nullptr ||
19542 N->getValueType(ResNo: 0) != MVT::i64)
19543 return SDValue();
19544
19545 // We can't save an operation here if the value is already extended, and
19546 // the existing shift is easier to combine.
19547 SDValue ExtsSrc = N0.getOperand(i: 0);
19548 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19549 ExtsSrc.getOperand(i: 0).getOpcode() == ISD::AssertSext)
19550 return SDValue();
19551
19552 SDLoc DL(N0);
19553 SDValue ShiftBy = SDValue(CN1, 0);
19554 // We want the shift amount to be i32 on the extswli, but the shift could
19555 // have an i64.
19556 if (ShiftBy.getValueType() == MVT::i64)
19557 ShiftBy = DCI.DAG.getConstant(Val: CN1->getZExtValue(), DL, VT: MVT::i32);
19558
19559 return DCI.DAG.getNode(Opcode: PPCISD::EXTSWSLI, DL, VT: MVT::i64, N1: N0->getOperand(Num: 0),
19560 N2: ShiftBy);
19561}
19562
19563SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19564 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19565 return Value;
19566
19567 if (N->getValueType(ResNo: 0).isVector())
19568 return combineVectorShift(N, DCI);
19569
19570 return SDValue();
19571}
19572
19573SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19574 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19575 return Value;
19576
19577 if (N->getValueType(ResNo: 0).isVector())
19578 return combineVectorShift(N, DCI);
19579
19580 return SDValue();
19581}
19582
19583// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19584// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19585// When C is zero, the equation (addi Z, -C) can be simplified to Z
19586// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19587static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
19588 const PPCSubtarget &Subtarget) {
19589 if (!Subtarget.isPPC64())
19590 return SDValue();
19591
19592 SDValue LHS = N->getOperand(Num: 0);
19593 SDValue RHS = N->getOperand(Num: 1);
19594
19595 auto isZextOfCompareWithConstant = [](SDValue Op) {
19596 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19597 Op.getValueType() != MVT::i64)
19598 return false;
19599
19600 SDValue Cmp = Op.getOperand(i: 0);
19601 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19602 Cmp.getOperand(i: 0).getValueType() != MVT::i64)
19603 return false;
19604
19605 if (auto *Constant = dyn_cast<ConstantSDNode>(Val: Cmp.getOperand(i: 1))) {
19606 int64_t NegConstant = 0 - Constant->getSExtValue();
19607 // Due to the limitations of the addi instruction,
19608 // -C is required to be [-32768, 32767].
19609 return isInt<16>(x: NegConstant);
19610 }
19611
19612 return false;
19613 };
19614
19615 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19616 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19617
19618 // If there is a pattern, canonicalize a zext operand to the RHS.
19619 if (LHSHasPattern && !RHSHasPattern)
19620 std::swap(a&: LHS, b&: RHS);
19621 else if (!LHSHasPattern && !RHSHasPattern)
19622 return SDValue();
19623
19624 SDLoc DL(N);
19625 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19626 SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: CarryType);
19627 SDValue Cmp = RHS.getOperand(i: 0);
19628 SDValue Z = Cmp.getOperand(i: 0);
19629 auto *Constant = cast<ConstantSDNode>(Val: Cmp.getOperand(i: 1));
19630 int64_t NegConstant = 0 - Constant->getSExtValue();
19631
19632 switch(cast<CondCodeSDNode>(Val: Cmp.getOperand(i: 2))->get()) {
19633 default: break;
19634 case ISD::SETNE: {
19635 // when C == 0
19636 // --> addze X, (addic Z, -1).carry
19637 // /
19638 // add X, (zext(setne Z, C))--
19639 // \ when -32768 <= -C <= 32767 && C != 0
19640 // --> addze X, (addic (addi Z, -C), -1).carry
19641 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
19642 N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
19643 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19644 SDValue Addc =
19645 DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
19646 N1: AddOrZ, N2: DAG.getAllOnesConstant(DL, VT: MVT::i64),
19647 N3: DAG.getConstant(Val: 0, DL, VT: CarryType));
19648 return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
19649 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i64),
19650 N3: SDValue(Addc.getNode(), 1));
19651 }
19652 case ISD::SETEQ: {
19653 // when C == 0
19654 // --> addze X, (subfic Z, 0).carry
19655 // /
19656 // add X, (zext(sete Z, C))--
19657 // \ when -32768 <= -C <= 32767 && C != 0
19658 // --> addze X, (subfic (addi Z, -C), 0).carry
19659 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
19660 N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
19661 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19662 SDValue Subc =
19663 DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
19664 N1: DAG.getConstant(Val: 0, DL, VT: MVT::i64), N2: AddOrZ,
19665 N3: DAG.getConstant(Val: 0, DL, VT: CarryType));
19666 SDValue Invert = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Subc.getValue(R: 1),
19667 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryType));
19668 return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
19669 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i64), N3: Invert);
19670 }
19671 }
19672
19673 return SDValue();
19674}
19675
19676// Transform
19677// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19678// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19679// In this case both C1 and C2 must be known constants.
19680// C1+C2 must fit into a 34 bit signed integer.
19681static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
19682 const PPCSubtarget &Subtarget) {
19683 if (!Subtarget.isUsingPCRelativeCalls())
19684 return SDValue();
19685
19686 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19687 // If we find that node try to cast the Global Address and the Constant.
19688 SDValue LHS = N->getOperand(Num: 0);
19689 SDValue RHS = N->getOperand(Num: 1);
19690
19691 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19692 std::swap(a&: LHS, b&: RHS);
19693
19694 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19695 return SDValue();
19696
19697 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19698 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(Val: LHS.getOperand(i: 0));
19699 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(Val&: RHS);
19700
19701 // Check that both casts succeeded.
19702 if (!GSDN || !ConstNode)
19703 return SDValue();
19704
19705 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19706 SDLoc DL(GSDN);
19707
19708 // The signed int offset needs to fit in 34 bits.
19709 if (!isInt<34>(x: NewOffset))
19710 return SDValue();
19711
19712 // The new global address is a copy of the old global address except
19713 // that it has the updated Offset.
19714 SDValue GA =
19715 DAG.getTargetGlobalAddress(GV: GSDN->getGlobal(), DL, VT: GSDN->getValueType(ResNo: 0),
19716 offset: NewOffset, TargetFlags: GSDN->getTargetFlags());
19717 SDValue MatPCRel =
19718 DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: GSDN->getValueType(ResNo: 0), Operand: GA);
19719 return MatPCRel;
19720}
19721
19722// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19723// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19724// Mathematical identity: X + 1 = X - (-1)
19725// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19726// Requirement: VSX feature for efficient xxleqv generation
19727static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG,
19728 const PPCSubtarget &Subtarget) {
19729
19730 EVT VT = N->getValueType(ResNo: 0);
19731 if (!Subtarget.hasVSX())
19732 return SDValue();
19733
19734 // Handle v2i64, v4i32, v8i16 and v16i8 types
19735 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19736 VT == MVT::v2i64))
19737 return SDValue();
19738
19739 SDValue LHS = N->getOperand(Num: 0);
19740 SDValue RHS = N->getOperand(Num: 1);
19741
19742 // Check if RHS is BUILD_VECTOR
19743 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19744 return SDValue();
19745
19746 // Check if all the elements are 1
19747 unsigned NumOfEles = RHS.getNumOperands();
19748 for (unsigned i = 0; i < NumOfEles; ++i) {
19749 auto *CN = dyn_cast<ConstantSDNode>(Val: RHS.getOperand(i));
19750 if (!CN || CN->getSExtValue() != 1)
19751 return SDValue();
19752 }
19753 SDLoc DL(N);
19754
19755 SDValue MinusOne = DAG.getConstant(Val: APInt::getAllOnes(numBits: 32), DL, VT: MVT::i32);
19756 SmallVector<SDValue, 4> Ops(4, MinusOne);
19757 SDValue AllOnesVec = DAG.getBuildVector(VT: MVT::v4i32, DL, Ops);
19758
19759 // Bitcast to the target vector type
19760 SDValue Bitcast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: AllOnesVec);
19761
19762 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: LHS, N2: Bitcast);
19763}
19764
19765SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19766 if (auto Value = combineADDToADDZE(N, DAG&: DCI.DAG, Subtarget))
19767 return Value;
19768
19769 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DAG&: DCI.DAG, Subtarget))
19770 return Value;
19771
19772 if (auto Value = combineADDToSUB(N, DAG&: DCI.DAG, Subtarget))
19773 return Value;
19774 return SDValue();
19775}
19776
19777// Detect TRUNCATE operations on bitcasts of float128 values.
19778// What we are looking for here is the situtation where we extract a subset
19779// of bits from a 128 bit float.
19780// This can be of two forms:
19781// 1) BITCAST of f128 feeding TRUNCATE
19782// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19783// The reason this is required is because we do not have a legal i128 type
19784// and so we want to prevent having to store the f128 and then reload part
19785// of it.
19786SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19787 DAGCombinerInfo &DCI) const {
19788 // If we are using CRBits then try that first.
19789 if (Subtarget.useCRBits()) {
19790 // Check if CRBits did anything and return that if it did.
19791 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19792 return CRTruncValue;
19793 }
19794
19795 SDLoc dl(N);
19796 SDValue Op0 = N->getOperand(Num: 0);
19797
19798 // Looking for a truncate of i128 to i64.
19799 if (Op0.getValueType() != MVT::i128 || N->getValueType(ResNo: 0) != MVT::i64)
19800 return SDValue();
19801
19802 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19803
19804 // SRL feeding TRUNCATE.
19805 if (Op0.getOpcode() == ISD::SRL) {
19806 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
19807 // The right shift has to be by 64 bits.
19808 if (!ConstNode || ConstNode->getZExtValue() != 64)
19809 return SDValue();
19810
19811 // Switch the element number to extract.
19812 EltToExtract = EltToExtract ? 0 : 1;
19813 // Update Op0 past the SRL.
19814 Op0 = Op0.getOperand(i: 0);
19815 }
19816
19817 // BITCAST feeding a TRUNCATE possibly via SRL.
19818 if (Op0.getOpcode() == ISD::BITCAST &&
19819 Op0.getValueType() == MVT::i128 &&
19820 Op0.getOperand(i: 0).getValueType() == MVT::f128) {
19821 SDValue Bitcast = DCI.DAG.getBitcast(VT: MVT::v2i64, V: Op0.getOperand(i: 0));
19822 return DCI.DAG.getNode(
19823 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Bitcast,
19824 N2: DCI.DAG.getTargetConstant(Val: EltToExtract, DL: dl, VT: MVT::i32));
19825 }
19826 return SDValue();
19827}
19828
19829SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19830 SelectionDAG &DAG = DCI.DAG;
19831
19832 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N: N->getOperand(Num: 1));
19833 if (!ConstOpOrElement)
19834 return SDValue();
19835
19836 // An imul is usually smaller than the alternative sequence for legal type.
19837 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
19838 isOperationLegal(Op: ISD::MUL, VT: N->getValueType(ResNo: 0)))
19839 return SDValue();
19840
19841 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19842 switch (this->Subtarget.getCPUDirective()) {
19843 default:
19844 // TODO: enhance the condition for subtarget before pwr8
19845 return false;
19846 case PPC::DIR_PWR8:
19847 // type mul add shl
19848 // scalar 4 1 1
19849 // vector 7 2 2
19850 return true;
19851 case PPC::DIR_PWR9:
19852 case PPC::DIR_PWR10:
19853 case PPC::DIR_PWR11:
19854 case PPC::DIR_PWR_FUTURE:
19855 // type mul add shl
19856 // scalar 5 2 2
19857 // vector 7 2 2
19858
19859 // The cycle RATIO of related operations are showed as a table above.
19860 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19861 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19862 // are 4, it is always profitable; but for 3 instrs patterns
19863 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19864 // So we should only do it for vector type.
19865 return IsAddOne && IsNeg ? VT.isVector() : true;
19866 }
19867 };
19868
19869 EVT VT = N->getValueType(ResNo: 0);
19870 SDLoc DL(N);
19871
19872 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19873 bool IsNeg = MulAmt.isNegative();
19874 APInt MulAmtAbs = MulAmt.abs();
19875
19876 if ((MulAmtAbs - 1).isPowerOf2()) {
19877 // (mul x, 2^N + 1) => (add (shl x, N), x)
19878 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19879
19880 if (!IsProfitable(IsNeg, true, VT))
19881 return SDValue();
19882
19883 SDValue Op0 = N->getOperand(Num: 0);
19884 SDValue Op1 =
19885 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
19886 N2: DAG.getConstant(Val: (MulAmtAbs - 1).logBase2(), DL, VT));
19887 SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
19888
19889 if (!IsNeg)
19890 return Res;
19891
19892 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Res);
19893 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19894 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19895 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19896
19897 if (!IsProfitable(IsNeg, false, VT))
19898 return SDValue();
19899
19900 SDValue Op0 = N->getOperand(Num: 0);
19901 SDValue Op1 =
19902 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
19903 N2: DAG.getConstant(Val: (MulAmtAbs + 1).logBase2(), DL, VT));
19904
19905 if (!IsNeg)
19906 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op1, N2: Op0);
19907 else
19908 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
19909
19910 } else {
19911 return SDValue();
19912 }
19913}
19914
19915// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19916// in combiner since we need to check SD flags and other subtarget features.
19917SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19918 DAGCombinerInfo &DCI) const {
19919 SDValue N0 = N->getOperand(Num: 0);
19920 SDValue N1 = N->getOperand(Num: 1);
19921 SDValue N2 = N->getOperand(Num: 2);
19922 SDNodeFlags Flags = N->getFlags();
19923 EVT VT = N->getValueType(ResNo: 0);
19924 SelectionDAG &DAG = DCI.DAG;
19925 unsigned Opc = N->getOpcode();
19926 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
19927 bool LegalOps = !DCI.isBeforeLegalizeOps();
19928 SDLoc Loc(N);
19929
19930 if (!isOperationLegal(Op: ISD::FMA, VT))
19931 return SDValue();
19932
19933 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19934 // since (fnmsub a b c)=-0 while c-ab=+0.
19935 if (!Flags.hasNoSignedZeros())
19936 return SDValue();
19937
19938 // (fma (fneg a) b c) => (fnmsub a b c)
19939 // (fnmsub (fneg a) b c) => (fma a b c)
19940 if (SDValue NegN0 = getCheaperNegatedExpression(Op: N0, DAG, LegalOps, OptForSize: CodeSize))
19941 return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: NegN0, N2: N1, N3: N2, Flags);
19942
19943 // (fma a (fneg b) c) => (fnmsub a b c)
19944 // (fnmsub a (fneg b) c) => (fma a b c)
19945 if (SDValue NegN1 = getCheaperNegatedExpression(Op: N1, DAG, LegalOps, OptForSize: CodeSize))
19946 return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: N0, N2: NegN1, N3: N2, Flags);
19947
19948 return SDValue();
19949}
19950
19951bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19952 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19953 if (!Subtarget.is64BitELFABI())
19954 return false;
19955
19956 // If not a tail call then no need to proceed.
19957 if (!CI->isTailCall())
19958 return false;
19959
19960 // If sibling calls have been disabled and tail-calls aren't guaranteed
19961 // there is no reason to duplicate.
19962 auto &TM = getTargetMachine();
19963 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19964 return false;
19965
19966 // Can't tail call a function called indirectly, or if it has variadic args.
19967 const Function *Callee = CI->getCalledFunction();
19968 if (!Callee || Callee->isVarArg())
19969 return false;
19970
19971 // Make sure the callee and caller calling conventions are eligible for tco.
19972 const Function *Caller = CI->getParent()->getParent();
19973 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC: Caller->getCallingConv(),
19974 CalleeCC: CI->getCallingConv()))
19975 return false;
19976
19977 // If the function is local then we have a good chance at tail-calling it
19978 return getTargetMachine().shouldAssumeDSOLocal(GV: Callee);
19979}
19980
19981bool PPCTargetLowering::
19982isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19983 const Value *Mask = AndI.getOperand(i: 1);
19984 // If the mask is suitable for andi. or andis. we should sink the and.
19985 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Mask)) {
19986 // Can't handle constants wider than 64-bits.
19987 if (CI->getBitWidth() > 64)
19988 return false;
19989 int64_t ConstVal = CI->getZExtValue();
19990 return isUInt<16>(x: ConstVal) ||
19991 (isUInt<16>(x: ConstVal >> 16) && !(ConstVal & 0xFFFF));
19992 }
19993
19994 // For non-constant masks, we can always use the record-form and.
19995 return true;
19996}
19997
19998/// getAddrModeForFlags - Based on the set of address flags, select the most
19999/// optimal instruction format to match by.
20000PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20001 // This is not a node we should be handling here.
20002 if (Flags == PPC::MOF_None)
20003 return PPC::AM_None;
20004 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20005 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DForm))
20006 if ((Flags & FlagSet) == FlagSet)
20007 return PPC::AM_DForm;
20008 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DSForm))
20009 if ((Flags & FlagSet) == FlagSet)
20010 return PPC::AM_DSForm;
20011 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DQForm))
20012 if ((Flags & FlagSet) == FlagSet)
20013 return PPC::AM_DQForm;
20014 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_PrefixDForm))
20015 if ((Flags & FlagSet) == FlagSet)
20016 return PPC::AM_PrefixDForm;
20017 // If no other forms are selected, return an X-Form as it is the most
20018 // general addressing mode.
20019 return PPC::AM_XForm;
20020}
20021
20022/// Set alignment flags based on whether or not the Frame Index is aligned.
20023/// Utilized when computing flags for address computation when selecting
20024/// load and store instructions.
20025static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20026 SelectionDAG &DAG) {
20027 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20028 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: IsAdd ? N.getOperand(i: 0) : N);
20029 if (!FI)
20030 return;
20031 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
20032 unsigned FrameIndexAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
20033 // If this is (add $FI, $S16Imm), the alignment flags are already set
20034 // based on the immediate. We just need to clear the alignment flags
20035 // if the FI alignment is weaker.
20036 if ((FrameIndexAlign % 4) != 0)
20037 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20038 if ((FrameIndexAlign % 16) != 0)
20039 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20040 // If the address is a plain FrameIndex, set alignment flags based on
20041 // FI alignment.
20042 if (!IsAdd) {
20043 if ((FrameIndexAlign % 4) == 0)
20044 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20045 if ((FrameIndexAlign % 16) == 0)
20046 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20047 }
20048}
20049
20050/// Given a node, compute flags that are used for address computation when
20051/// selecting load and store instructions. The flags computed are stored in
20052/// FlagSet. This function takes into account whether the node is a constant,
20053/// an ADD, OR, or a constant, and computes the address flags accordingly.
20054static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20055 SelectionDAG &DAG) {
20056 // Set the alignment flags for the node depending on if the node is
20057 // 4-byte or 16-byte aligned.
20058 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20059 if ((Imm & 0x3) == 0)
20060 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20061 if ((Imm & 0xf) == 0)
20062 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20063 };
20064
20065 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
20066 // All 32-bit constants can be computed as LIS + Disp.
20067 const APInt &ConstImm = CN->getAPIntValue();
20068 if (ConstImm.isSignedIntN(N: 32)) { // Flag to handle 32-bit constants.
20069 FlagSet |= PPC::MOF_AddrIsSImm32;
20070 SetAlignFlagsForImm(ConstImm.getZExtValue());
20071 setAlignFlagsForFI(N, FlagSet, DAG);
20072 }
20073 if (ConstImm.isSignedIntN(N: 34)) // Flag to handle 34-bit constants.
20074 FlagSet |= PPC::MOF_RPlusSImm34;
20075 else // Let constant materialization handle large constants.
20076 FlagSet |= PPC::MOF_NotAddNorCst;
20077 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20078 // This address can be represented as an addition of:
20079 // - Register + Imm16 (possibly a multiple of 4/16)
20080 // - Register + Imm34
20081 // - Register + PPCISD::Lo
20082 // - Register + Register
20083 // In any case, we won't have to match this as Base + Zero.
20084 SDValue RHS = N.getOperand(i: 1);
20085 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: RHS)) {
20086 const APInt &ConstImm = CN->getAPIntValue();
20087 if (ConstImm.isSignedIntN(N: 16)) {
20088 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20089 SetAlignFlagsForImm(ConstImm.getZExtValue());
20090 setAlignFlagsForFI(N, FlagSet, DAG);
20091 }
20092 if (ConstImm.isSignedIntN(N: 34))
20093 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20094 else
20095 FlagSet |= PPC::MOF_RPlusR; // Register.
20096 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(i: 1))
20097 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20098 else
20099 FlagSet |= PPC::MOF_RPlusR;
20100 } else { // The address computation is not a constant or an addition.
20101 setAlignFlagsForFI(N, FlagSet, DAG);
20102 FlagSet |= PPC::MOF_NotAddNorCst;
20103 }
20104}
20105
20106static bool isPCRelNode(SDValue N) {
20107 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20108 isValidPCRelNode<ConstantPoolSDNode>(N) ||
20109 isValidPCRelNode<GlobalAddressSDNode>(N) ||
20110 isValidPCRelNode<JumpTableSDNode>(N) ||
20111 isValidPCRelNode<BlockAddressSDNode>(N));
20112}
20113
20114/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20115/// the address flags of the load/store instruction that is to be matched.
20116unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20117 SelectionDAG &DAG) const {
20118 unsigned FlagSet = PPC::MOF_None;
20119
20120 // Compute subtarget flags.
20121 if (!Subtarget.hasP9Vector())
20122 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20123 else
20124 FlagSet |= PPC::MOF_SubtargetP9;
20125
20126 if (Subtarget.hasPrefixInstrs())
20127 FlagSet |= PPC::MOF_SubtargetP10;
20128
20129 if (Subtarget.hasSPE())
20130 FlagSet |= PPC::MOF_SubtargetSPE;
20131
20132 // Check if we have a PCRel node and return early.
20133 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20134 return FlagSet;
20135
20136 // If the node is the paired load/store intrinsics, compute flags for
20137 // address computation and return early.
20138 unsigned ParentOp = Parent->getOpcode();
20139 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20140 (ParentOp == ISD::INTRINSIC_VOID))) {
20141 unsigned ID = Parent->getConstantOperandVal(Num: 1);
20142 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20143 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20144 ? Parent->getOperand(Num: 2)
20145 : Parent->getOperand(Num: 3);
20146 computeFlagsForAddressComputation(N: IntrinOp, FlagSet, DAG);
20147 FlagSet |= PPC::MOF_Vector;
20148 return FlagSet;
20149 }
20150 }
20151
20152 // Mark this as something we don't want to handle here if it is atomic
20153 // or pre-increment instruction.
20154 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Val: Parent))
20155 if (LSB->isIndexed())
20156 return PPC::MOF_None;
20157
20158 // Compute in-memory type flags. This is based on if there are scalars,
20159 // floats or vectors.
20160 const MemSDNode *MN = dyn_cast<MemSDNode>(Val: Parent);
20161 assert(MN && "Parent should be a MemSDNode!");
20162 EVT MemVT = MN->getMemoryVT();
20163 unsigned Size = MemVT.getSizeInBits();
20164 if (MemVT.isScalarInteger()) {
20165 assert(Size <= 128 &&
20166 "Not expecting scalar integers larger than 16 bytes!");
20167 if (Size < 32)
20168 FlagSet |= PPC::MOF_SubWordInt;
20169 else if (Size == 32)
20170 FlagSet |= PPC::MOF_WordInt;
20171 else
20172 FlagSet |= PPC::MOF_DoubleWordInt;
20173 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20174 if (Size == 128)
20175 FlagSet |= PPC::MOF_Vector;
20176 else if (Size == 256) {
20177 assert(Subtarget.pairedVectorMemops() &&
20178 "256-bit vectors are only available when paired vector memops is "
20179 "enabled!");
20180 FlagSet |= PPC::MOF_Vector;
20181 } else
20182 llvm_unreachable("Not expecting illegal vectors!");
20183 } else { // Floating point type: can be scalar, f128 or vector types.
20184 if (Size == 32 || Size == 64)
20185 FlagSet |= PPC::MOF_ScalarFloat;
20186 else if (MemVT == MVT::f128 || MemVT.isVector())
20187 FlagSet |= PPC::MOF_Vector;
20188 else
20189 llvm_unreachable("Not expecting illegal scalar floats!");
20190 }
20191
20192 // Compute flags for address computation.
20193 computeFlagsForAddressComputation(N, FlagSet, DAG);
20194
20195 // Compute type extension flags.
20196 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Val: Parent)) {
20197 switch (LN->getExtensionType()) {
20198 case ISD::SEXTLOAD:
20199 FlagSet |= PPC::MOF_SExt;
20200 break;
20201 case ISD::EXTLOAD:
20202 case ISD::ZEXTLOAD:
20203 FlagSet |= PPC::MOF_ZExt;
20204 break;
20205 case ISD::NON_EXTLOAD:
20206 FlagSet |= PPC::MOF_NoExt;
20207 break;
20208 }
20209 } else
20210 FlagSet |= PPC::MOF_NoExt;
20211
20212 // For integers, no extension is the same as zero extension.
20213 // We set the extension mode to zero extension so we don't have
20214 // to add separate entries in AddrModesMap for loads and stores.
20215 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20216 FlagSet |= PPC::MOF_ZExt;
20217 FlagSet &= ~PPC::MOF_NoExt;
20218 }
20219
20220 // If we don't have prefixed instructions, 34-bit constants should be
20221 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20222 bool IsNonP1034BitConst =
20223 ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
20224 FlagSet) == PPC::MOF_RPlusSImm34;
20225 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20226 IsNonP1034BitConst)
20227 FlagSet |= PPC::MOF_NotAddNorCst;
20228
20229 return FlagSet;
20230}
20231
20232/// SelectForceXFormMode - Given the specified address, force it to be
20233/// represented as an indexed [r+r] operation (an XForm instruction).
20234PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
20235 SDValue &Base,
20236 SelectionDAG &DAG) const {
20237
20238 PPC::AddrMode Mode = PPC::AM_XForm;
20239 int16_t ForceXFormImm = 0;
20240 if (provablyDisjointOr(DAG, N) &&
20241 !isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: ForceXFormImm)) {
20242 Disp = N.getOperand(i: 0);
20243 Base = N.getOperand(i: 1);
20244 return Mode;
20245 }
20246
20247 // If the address is the result of an add, we will utilize the fact that the
20248 // address calculation includes an implicit add. However, we can reduce
20249 // register pressure if we do not materialize a constant just for use as the
20250 // index register. We only get rid of the add if it is not an add of a
20251 // value and a 16-bit signed constant and both have a single use.
20252 if (N.getOpcode() == ISD::ADD &&
20253 (!isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: ForceXFormImm) ||
20254 !N.getOperand(i: 1).hasOneUse() || !N.getOperand(i: 0).hasOneUse())) {
20255 Disp = N.getOperand(i: 0);
20256 Base = N.getOperand(i: 1);
20257 return Mode;
20258 }
20259
20260 // Otherwise, use R0 as the base register.
20261 Disp = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20262 VT: N.getValueType());
20263 Base = N;
20264
20265 return Mode;
20266}
20267
20268bool PPCTargetLowering::splitValueIntoRegisterParts(
20269 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20270 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20271 EVT ValVT = Val.getValueType();
20272 // If we are splitting a scalar integer into f64 parts (i.e. so they
20273 // can be placed into VFRC registers), we need to zero extend and
20274 // bitcast the values. This will ensure the value is placed into a
20275 // VSR using direct moves or stack operations as needed.
20276 if (PartVT == MVT::f64 &&
20277 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20278 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
20279 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Val);
20280 Parts[0] = Val;
20281 return true;
20282 }
20283 return false;
20284}
20285
20286SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20287 SelectionDAG &DAG) const {
20288 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20289 TargetLowering::CallLoweringInfo CLI(DAG);
20290 EVT RetVT = Op.getValueType();
20291 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
20292 SDValue Callee =
20293 DAG.getExternalSymbol(Sym: LibCallName, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
20294 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: false);
20295 TargetLowering::ArgListTy Args;
20296 for (const SDValue &N : Op->op_values()) {
20297 EVT ArgVT = N.getValueType();
20298 Type *ArgTy = ArgVT.getTypeForEVT(Context&: *DAG.getContext());
20299 TargetLowering::ArgListEntry Entry(N, ArgTy);
20300 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(Ty: ArgTy, IsSigned: SignExtend);
20301 Entry.IsZExt = !Entry.IsSExt;
20302 Args.push_back(x: Entry);
20303 }
20304
20305 SDValue InChain = DAG.getEntryNode();
20306 SDValue TCChain = InChain;
20307 const Function &F = DAG.getMachineFunction().getFunction();
20308 bool isTailCall =
20309 TLI.isInTailCallPosition(DAG, Node: Op.getNode(), Chain&: TCChain) &&
20310 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20311 if (isTailCall)
20312 InChain = TCChain;
20313 CLI.setDebugLoc(SDLoc(Op))
20314 .setChain(InChain)
20315 .setLibCallee(CC: CallingConv::C, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
20316 .setTailCall(isTailCall)
20317 .setSExtResult(SignExtend)
20318 .setZExtResult(!SignExtend)
20319 .setIsPostTypeLegalization(true);
20320 return TLI.LowerCallTo(CLI).first;
20321}
20322
20323SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20324 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20325 SelectionDAG &DAG) const {
20326 if (Op.getValueType() == MVT::f32)
20327 return lowerToLibCall(LibCallName: LibCallFloatName, Op, DAG);
20328
20329 if (Op.getValueType() == MVT::f64)
20330 return lowerToLibCall(LibCallName: LibCallDoubleName, Op, DAG);
20331
20332 return SDValue();
20333}
20334
20335bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20336 SDNodeFlags Flags = Op.getNode()->getFlags();
20337 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20338 Flags.hasNoNaNs() && Flags.hasNoInfs();
20339}
20340
20341bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20342 return Op.getNode()->getFlags().hasApproximateFuncs();
20343}
20344
20345bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20346 return getTargetMachine().Options.PPCGenScalarMASSEntries;
20347}
20348
20349SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20350 const char *LibCallFloatName,
20351 const char *LibCallDoubleNameFinite,
20352 const char *LibCallFloatNameFinite,
20353 SDValue Op,
20354 SelectionDAG &DAG) const {
20355 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20356 return SDValue();
20357
20358 if (!isLowringToMASSFiniteSafe(Op))
20359 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20360 DAG);
20361
20362 return lowerLibCallBasedOnType(LibCallFloatName: LibCallFloatNameFinite,
20363 LibCallDoubleName: LibCallDoubleNameFinite, Op, DAG);
20364}
20365
20366SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20367 return lowerLibCallBase(LibCallDoubleName: "__xl_pow", LibCallFloatName: "__xl_powf", LibCallDoubleNameFinite: "__xl_pow_finite",
20368 LibCallFloatNameFinite: "__xl_powf_finite", Op, DAG);
20369}
20370
20371SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20372 return lowerLibCallBase(LibCallDoubleName: "__xl_sin", LibCallFloatName: "__xl_sinf", LibCallDoubleNameFinite: "__xl_sin_finite",
20373 LibCallFloatNameFinite: "__xl_sinf_finite", Op, DAG);
20374}
20375
20376SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20377 return lowerLibCallBase(LibCallDoubleName: "__xl_cos", LibCallFloatName: "__xl_cosf", LibCallDoubleNameFinite: "__xl_cos_finite",
20378 LibCallFloatNameFinite: "__xl_cosf_finite", Op, DAG);
20379}
20380
20381SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20382 return lowerLibCallBase(LibCallDoubleName: "__xl_log", LibCallFloatName: "__xl_logf", LibCallDoubleNameFinite: "__xl_log_finite",
20383 LibCallFloatNameFinite: "__xl_logf_finite", Op, DAG);
20384}
20385
20386SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20387 return lowerLibCallBase(LibCallDoubleName: "__xl_log10", LibCallFloatName: "__xl_log10f", LibCallDoubleNameFinite: "__xl_log10_finite",
20388 LibCallFloatNameFinite: "__xl_log10f_finite", Op, DAG);
20389}
20390
20391SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20392 return lowerLibCallBase(LibCallDoubleName: "__xl_exp", LibCallFloatName: "__xl_expf", LibCallDoubleNameFinite: "__xl_exp_finite",
20393 LibCallFloatNameFinite: "__xl_expf_finite", Op, DAG);
20394}
20395
20396// If we happen to match to an aligned D-Form, check if the Frame Index is
20397// adequately aligned. If it is not, reset the mode to match to X-Form.
20398static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20399 PPC::AddrMode &Mode) {
20400 if (!isa<FrameIndexSDNode>(Val: N))
20401 return;
20402 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20403 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
20404 Mode = PPC::AM_XForm;
20405}
20406
20407/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20408/// compute the address flags of the node, get the optimal address mode based
20409/// on the flags, and set the Base and Disp based on the address mode.
20410PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
20411 SDValue N, SDValue &Disp,
20412 SDValue &Base,
20413 SelectionDAG &DAG,
20414 MaybeAlign Align) const {
20415 SDLoc DL(Parent);
20416
20417 // Compute the address flags.
20418 unsigned Flags = computeMOFlags(Parent, N, DAG);
20419
20420 // Get the optimal address mode based on the Flags.
20421 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20422
20423 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20424 // Select an X-Form load if it is not.
20425 setXFormForUnalignedFI(N, Flags, Mode);
20426
20427 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20428 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20429 assert(Subtarget.isUsingPCRelativeCalls() &&
20430 "Must be using PC-Relative calls when a valid PC-Relative node is "
20431 "present!");
20432 Mode = PPC::AM_PCRel;
20433 }
20434
20435 // Set Base and Disp accordingly depending on the address mode.
20436 switch (Mode) {
20437 case PPC::AM_DForm:
20438 case PPC::AM_DSForm:
20439 case PPC::AM_DQForm: {
20440 // This is a register plus a 16-bit immediate. The base will be the
20441 // register and the displacement will be the immediate unless it
20442 // isn't sufficiently aligned.
20443 if (Flags & PPC::MOF_RPlusSImm16) {
20444 SDValue Op0 = N.getOperand(i: 0);
20445 SDValue Op1 = N.getOperand(i: 1);
20446 int16_t Imm = Op1->getAsZExtVal();
20447 if (!Align || isAligned(Lhs: *Align, SizeInBytes: Imm)) {
20448 Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: N.getValueType());
20449 Base = Op0;
20450 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op0)) {
20451 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20452 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
20453 }
20454 break;
20455 }
20456 }
20457 // This is a register plus the @lo relocation. The base is the register
20458 // and the displacement is the global address.
20459 else if (Flags & PPC::MOF_RPlusLo) {
20460 Disp = N.getOperand(i: 1).getOperand(i: 0); // The global address.
20461 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
20462 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
20463 Disp.getOpcode() == ISD::TargetConstantPool ||
20464 Disp.getOpcode() == ISD::TargetJumpTable);
20465 Base = N.getOperand(i: 0);
20466 break;
20467 }
20468 // This is a constant address at most 32 bits. The base will be
20469 // zero or load-immediate-shifted and the displacement will be
20470 // the low 16 bits of the address.
20471 else if (Flags & PPC::MOF_AddrIsSImm32) {
20472 auto *CN = cast<ConstantSDNode>(Val&: N);
20473 EVT CNType = CN->getValueType(ResNo: 0);
20474 uint64_t CNImm = CN->getZExtValue();
20475 // If this address fits entirely in a 16-bit sext immediate field, codegen
20476 // this as "d, 0".
20477 int16_t Imm;
20478 if (isIntS16Immediate(N: CN, Imm) && (!Align || isAligned(Lhs: *Align, SizeInBytes: Imm))) {
20479 Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: CNType);
20480 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20481 VT: CNType);
20482 break;
20483 }
20484 // Handle 32-bit sext immediate with LIS + Addr mode.
20485 if ((CNType == MVT::i32 || isInt<32>(x: CNImm)) &&
20486 (!Align || isAligned(Lhs: *Align, SizeInBytes: CNImm))) {
20487 int32_t Addr = (int32_t)CNImm;
20488 // Otherwise, break this down into LIS + Disp.
20489 Disp = DAG.getSignedTargetConstant(Val: (int16_t)Addr, DL, VT: MVT::i32);
20490 Base = DAG.getSignedTargetConstant(Val: (Addr - (int16_t)Addr) >> 16, DL,
20491 VT: MVT::i32);
20492 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20493 Base = SDValue(DAG.getMachineNode(Opcode: LIS, dl: DL, VT: CNType, Op1: Base), 0);
20494 break;
20495 }
20496 }
20497 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20498 Disp = DAG.getTargetConstant(Val: 0, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
20499 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
20500 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20501 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
20502 } else
20503 Base = N;
20504 break;
20505 }
20506 case PPC::AM_PrefixDForm: {
20507 int64_t Imm34 = 0;
20508 unsigned Opcode = N.getOpcode();
20509 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20510 (isIntS34Immediate(Op: N.getOperand(i: 1), Imm&: Imm34))) {
20511 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20512 Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
20513 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
20514 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20515 else
20516 Base = N.getOperand(i: 0);
20517 } else if (isIntS34Immediate(Op: N, Imm&: Imm34)) {
20518 // The address is a 34-bit signed immediate.
20519 Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
20520 Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
20521 }
20522 break;
20523 }
20524 case PPC::AM_PCRel: {
20525 // When selecting PC-Relative instructions, "Base" is not utilized as
20526 // we select the address as [PC+imm].
20527 Disp = N;
20528 break;
20529 }
20530 case PPC::AM_None:
20531 break;
20532 default: { // By default, X-Form is always available to be selected.
20533 // When a frame index is not aligned, we also match by XForm.
20534 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N);
20535 Base = FI ? N : N.getOperand(i: 1);
20536 Disp = FI ? DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20537 VT: N.getValueType())
20538 : N.getOperand(i: 0);
20539 break;
20540 }
20541 }
20542 return Mode;
20543}
20544
20545CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
20546 bool Return,
20547 bool IsVarArg) const {
20548 switch (CC) {
20549 case CallingConv::Cold:
20550 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20551 default:
20552 return CC_PPC64_ELF;
20553 }
20554}
20555
20556bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
20557 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20558}
20559
20560TargetLowering::AtomicExpansionKind
20561PPCTargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
20562 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20563 if (shouldInlineQuadwordAtomics() && Size == 128)
20564 return AtomicExpansionKind::MaskedIntrinsic;
20565
20566 switch (AI->getOperation()) {
20567 case AtomicRMWInst::UIncWrap:
20568 case AtomicRMWInst::UDecWrap:
20569 case AtomicRMWInst::USubCond:
20570 case AtomicRMWInst::USubSat:
20571 return AtomicExpansionKind::CmpXChg;
20572 default:
20573 return TargetLowering::shouldExpandAtomicRMWInIR(RMW: AI);
20574 }
20575
20576 llvm_unreachable("unreachable atomicrmw operation");
20577}
20578
20579TargetLowering::AtomicExpansionKind
20580PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(
20581 const AtomicCmpXchgInst *AI) const {
20582 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
20583 if (shouldInlineQuadwordAtomics() && Size == 128)
20584 return AtomicExpansionKind::MaskedIntrinsic;
20585 return AtomicExpansionKind::LLSC;
20586}
20587
20588static Intrinsic::ID
20589getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
20590 switch (BinOp) {
20591 default:
20592 llvm_unreachable("Unexpected AtomicRMW BinOp");
20593 case AtomicRMWInst::Xchg:
20594 return Intrinsic::ppc_atomicrmw_xchg_i128;
20595 case AtomicRMWInst::Add:
20596 return Intrinsic::ppc_atomicrmw_add_i128;
20597 case AtomicRMWInst::Sub:
20598 return Intrinsic::ppc_atomicrmw_sub_i128;
20599 case AtomicRMWInst::And:
20600 return Intrinsic::ppc_atomicrmw_and_i128;
20601 case AtomicRMWInst::Or:
20602 return Intrinsic::ppc_atomicrmw_or_i128;
20603 case AtomicRMWInst::Xor:
20604 return Intrinsic::ppc_atomicrmw_xor_i128;
20605 case AtomicRMWInst::Nand:
20606 return Intrinsic::ppc_atomicrmw_nand_i128;
20607 }
20608}
20609
20610Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
20611 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20612 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20613 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20614 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20615 Type *ValTy = Incr->getType();
20616 assert(ValTy->getPrimitiveSizeInBits() == 128);
20617 Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
20618 Value *IncrLo = Builder.CreateTrunc(V: Incr, DestTy: Int64Ty, Name: "incr_lo");
20619 Value *IncrHi =
20620 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Incr, RHS: 64), DestTy: Int64Ty, Name: "incr_hi");
20621 Value *LoHi = Builder.CreateIntrinsic(
20622 ID: getIntrinsicForAtomicRMWBinOp128(BinOp: AI->getOperation()), Types: {},
20623 Args: {AlignedAddr, IncrLo, IncrHi});
20624 Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: 0, Name: "lo");
20625 Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: 1, Name: "hi");
20626 Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
20627 Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
20628 return Builder.CreateOr(
20629 LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: 64)), Name: "val64");
20630}
20631
20632Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
20633 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20634 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20635 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20636 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20637 Type *ValTy = CmpVal->getType();
20638 assert(ValTy->getPrimitiveSizeInBits() == 128);
20639 Function *IntCmpXchg =
20640 Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::ppc_cmpxchg_i128);
20641 Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
20642 Value *CmpLo = Builder.CreateTrunc(V: CmpVal, DestTy: Int64Ty, Name: "cmp_lo");
20643 Value *CmpHi =
20644 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: CmpVal, RHS: 64), DestTy: Int64Ty, Name: "cmp_hi");
20645 Value *NewLo = Builder.CreateTrunc(V: NewVal, DestTy: Int64Ty, Name: "new_lo");
20646 Value *NewHi =
20647 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: NewVal, RHS: 64), DestTy: Int64Ty, Name: "new_hi");
20648 emitLeadingFence(Builder, Inst: CI, Ord);
20649 Value *LoHi =
20650 Builder.CreateCall(Callee: IntCmpXchg, Args: {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20651 emitTrailingFence(Builder, Inst: CI, Ord);
20652 Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: 0, Name: "lo");
20653 Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: 1, Name: "hi");
20654 Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
20655 Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
20656 return Builder.CreateOr(
20657 LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: 64)), Name: "val64");
20658}
20659
20660bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
20661 return Subtarget.useCRBits();
20662}
20663