1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCMCTargetDesc.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
20#include "PPCMachineFunctionInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/CodeGen/CallingConvLower.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/LivePhysRegs.h"
39#include "llvm/CodeGen/MachineBasicBlock.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunction.h"
42#include "llvm/CodeGen/MachineInstr.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachineJumpTableInfo.h"
45#include "llvm/CodeGen/MachineLoopInfo.h"
46#include "llvm/CodeGen/MachineMemOperand.h"
47#include "llvm/CodeGen/MachineModuleInfo.h"
48#include "llvm/CodeGen/MachineOperand.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/SelectionDAG.h"
51#include "llvm/CodeGen/SelectionDAGNodes.h"
52#include "llvm/CodeGen/TargetInstrInfo.h"
53#include "llvm/CodeGen/TargetLowering.h"
54#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
55#include "llvm/CodeGen/TargetRegisterInfo.h"
56#include "llvm/CodeGen/ValueTypes.h"
57#include "llvm/CodeGenTypes/MachineValueType.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/DerivedTypes.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Instructions.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
76#include "llvm/MC/MCSectionXCOFF.h"
77#include "llvm/MC/MCSymbolXCOFF.h"
78#include "llvm/Support/AtomicOrdering.h"
79#include "llvm/Support/BranchProbability.h"
80#include "llvm/Support/Casting.h"
81#include "llvm/Support/CodeGen.h"
82#include "llvm/Support/CommandLine.h"
83#include "llvm/Support/Compiler.h"
84#include "llvm/Support/Debug.h"
85#include "llvm/Support/ErrorHandling.h"
86#include "llvm/Support/Format.h"
87#include "llvm/Support/KnownBits.h"
88#include "llvm/Support/MathExtras.h"
89#include "llvm/Support/raw_ostream.h"
90#include "llvm/Target/TargetMachine.h"
91#include "llvm/Target/TargetOptions.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
105static cl::opt<bool> DisableP10StoreForward(
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(Val: false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(Val: true), cl::Hidden);
132
133cl::opt<bool> DisableAutoPairedVecSt(
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(Val: true), cl::Hidden);
137
138static cl::opt<unsigned> PPCMinimumJumpTableEntries(
139 "ppc-min-jump-table-entries", cl::init(Val: 64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
142static cl::opt<unsigned> PPCMinimumBitTestCmps(
143 "ppc-min-bit-test-cmps", cl::init(Val: 3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
147static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
148 "ppc-gather-alias-max-depth", cl::init(Val: 18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
151static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(Val: 1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
170constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
171
172// FIXME: Remove this once the bug has been fixed!
173extern cl::opt<bool> ANDIGlueBug;
174
175PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(VT: MVT::i32, RC: &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(VT: MVT::f32, RC: &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(VT: MVT::f64, RC: &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(VT: MVT::f32, RC: &PPC::F4RCRegClass);
198 addRegisterClass(VT: MVT::f64, RC: &PPC::F8RCRegClass);
199 }
200 }
201
202 setOperationAction(Op: ISD::UADDO, VT: RegVT, Action: Custom);
203 setOperationAction(Op: ISD::USUBO, VT: RegVT, Action: Custom);
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
206 setOperationAction(Op: ISD::UADDO_CARRY, VT: RegVT, Action: Custom);
207 setOperationAction(Op: ISD::USUBO_CARRY, VT: RegVT, Action: Custom);
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
212 setOperationAction(Op: ISD::SSUBO, VT: MVT::i32, Action: Custom);
213 setOperationAction(Op: ISD::SADDO, VT: MVT::i32, Action: Custom);
214 if (isPPC64) {
215 setOperationAction(Op: ISD::SSUBO, VT: MVT::i64, Action: Custom);
216 setOperationAction(Op: ISD::SADDO, VT: MVT::i64, Action: Custom);
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
221 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Legal);
222 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i64, Action: Legal);
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
225 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i32, Action: Custom);
226
227 // Custom lower inline assembly to check for special registers.
228 setOperationAction(Op: ISD::INLINEASM, VT: MVT::Other, Action: Custom);
229 setOperationAction(Op: ISD::INLINEASM_BR, VT: MVT::Other, Action: Custom);
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
233 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
234 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
235 }
236
237 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
238 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f128, Action: Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Legal);
242 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
243 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
244 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
245 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
249 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f128, Action: Expand);
250 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
251 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
252 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
253 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
254 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
255 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
256 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
257 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
258 }
259
260 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
261
262 // PowerPC has pre-inc load and store's.
263 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
264 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
265 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
266 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
267 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
268 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
269 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
270 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
271 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
272 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
273 if (!Subtarget.hasSPE()) {
274 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
275 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
276 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
277 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
278 }
279
280 if (Subtarget.useCRBits()) {
281 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
284 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Promote);
285 AddPromotedToType(Opc: ISD::STRICT_SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
286 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Promote);
287 AddPromotedToType(Opc: ISD::STRICT_UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
288
289 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Promote);
290 AddPromotedToType(Opc: ISD::SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
291 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Promote);
292 AddPromotedToType(Opc: ISD::UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
293
294 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i1, Action: Promote);
295 AddPromotedToType(Opc: ISD::STRICT_FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
296 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i1, Action: Promote);
297 AddPromotedToType(Opc: ISD::STRICT_FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
298
299 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i1, Action: Promote);
300 AddPromotedToType(Opc: ISD::FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
301 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i1, Action: Promote);
302 AddPromotedToType(Opc: ISD::FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
303 } else {
304 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Custom);
305 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Custom);
306 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Custom);
307 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Custom);
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
311 setOperationAction(Op: ISD::LOAD, VT: MVT::i1, Action: Custom);
312 setOperationAction(Op: ISD::STORE, VT: MVT::i1, Action: Custom);
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
316 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::i1, Action: Custom);
317
318 for (MVT VT : MVT::integer_valuetypes()) {
319 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
320 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
321 setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
322 }
323
324 addRegisterClass(VT: MVT::i1, RC: &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
329 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
330 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
331 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
332 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(Op: ISD::FFLOOR, VT: MVT::ppcf128, Action: Expand);
336 setOperationAction(Op: ISD::FCEIL, VT: MVT::ppcf128, Action: Expand);
337 setOperationAction(Op: ISD::FTRUNC, VT: MVT::ppcf128, Action: Expand);
338 setOperationAction(Op: ISD::FRINT, VT: MVT::ppcf128, Action: Expand);
339 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::ppcf128, Action: Expand);
340 setOperationAction(Op: ISD::FREM, VT: MVT::ppcf128, Action: LibCall);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
350 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
351 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
352 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
353 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
354 } else {
355 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Expand);
356 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Expand);
357 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Expand);
358 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Expand);
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
362 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
363 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
364 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
365 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
366 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
367 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
368 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
369 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
373 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f32, Action: Legal);
374 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f32, Action: Legal);
375 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f32, Action: Legal);
376 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f32, Action: Legal);
377 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
378
379 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f64, Action: Legal);
380 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f64, Action: Legal);
381 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f64, Action: Legal);
382 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f64, Action: Legal);
383
384 if (!Subtarget.hasSPE()) {
385 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f32, Action: Legal);
386 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f64, Action: Legal);
387 }
388
389 if (Subtarget.hasVSX()) {
390 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f32, Action: Legal);
391 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f64, Action: Legal);
392 }
393
394 if (Subtarget.hasFSQRT()) {
395 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f32, Action: Legal);
396 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f64, Action: Legal);
397 }
398
399 if (Subtarget.hasFPRND()) {
400 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f32, Action: Legal);
401 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f32, Action: Legal);
402 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f32, Action: Legal);
403 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f32, Action: Legal);
404
405 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f64, Action: Legal);
406 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f64, Action: Legal);
407 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f64, Action: Legal);
408 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f64, Action: Legal);
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
412 setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Expand);
413 setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Expand);
414 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
415 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
416 setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Expand);
417 setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Expand);
418 setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Expand);
419 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
420 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
421 setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Expand);
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
426 setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Custom);
427 setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Custom);
428 setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Custom);
429 setOperationAction(Op: ISD::FLOG, VT: MVT::f64, Action: Custom);
430 setOperationAction(Op: ISD::FLOG10, VT: MVT::f64, Action: Custom);
431 setOperationAction(Op: ISD::FEXP, VT: MVT::f64, Action: Custom);
432 setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Custom);
433 setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Custom);
434 setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Custom);
435 setOperationAction(Op: ISD::FLOG, VT: MVT::f32, Action: Custom);
436 setOperationAction(Op: ISD::FLOG10, VT: MVT::f32, Action: Custom);
437 setOperationAction(Op: ISD::FEXP, VT: MVT::f32, Action: Custom);
438 }
439
440 if (Subtarget.hasSPE()) {
441 setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Expand);
442 setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Expand);
443 } else {
444 setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Legal);
445 setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Legal);
446 setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom);
447 setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
455 setOperationAction(Op: ISD::FSQRT, VT: MVT::f64, Action: Expand);
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
459 setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Expand);
460
461 if (Subtarget.hasFCPSGN()) {
462 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Legal);
463 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Legal);
464 } else {
465 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Expand);
466 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Expand);
467 }
468
469 if (Subtarget.hasFPRND()) {
470 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f64, Action: Legal);
471 setOperationAction(Op: ISD::FCEIL, VT: MVT::f64, Action: Legal);
472 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f64, Action: Legal);
473 setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
474
475 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f32, Action: Legal);
476 setOperationAction(Op: ISD::FCEIL, VT: MVT::f32, Action: Legal);
477 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f32, Action: Legal);
478 setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
484 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Legal);
485 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64, Action: Legal);
486 } else {
487 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Expand);
488 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64,
489 Action: (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Legal);
495 setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Legal);
496 } else {
497 setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Expand);
498 setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
502 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Legal);
503 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Legal);
504 } else {
505 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Expand);
506 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Expand);
507 }
508
509 // PowerPC does not have ROTR
510 setOperationAction(Op: ISD::ROTR, VT: MVT::i32 , Action: Expand);
511 setOperationAction(Op: ISD::ROTR, VT: MVT::i64 , Action: Expand);
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
515 setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Expand);
516 setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Expand);
517 setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Expand);
518 setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Expand);
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
522 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Custom);
523 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Custom);
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
527 setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Custom);
528
529 if (Subtarget.hasFPU()) {
530 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
531 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
532 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Legal);
533
534 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
535 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
536 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Legal);
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
541 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
542
543 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
547 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Legal);
548 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Legal);
549 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Legal);
550 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Legal);
551 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Legal);
552 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Legal);
553
554 // SPE supports signaling compare of f32/f64.
555 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
556 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
559 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
560 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
561
562 // PowerPC does not have [U|S]INT_TO_FP
563 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Expand);
564 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Expand);
565 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Expand);
566 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Expand);
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
570 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Legal);
571 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Legal);
572 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Legal);
573 setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Legal);
574
575 setOperationAction(Op: ISD::STRICT_LRINT, VT: MVT::f64, Action: Custom);
576 setOperationAction(Op: ISD::STRICT_LRINT, VT: MVT::f32, Action: Custom);
577 setOperationAction(Op: ISD::STRICT_LLRINT, VT: MVT::f64, Action: Custom);
578 setOperationAction(Op: ISD::STRICT_LLRINT, VT: MVT::f32, Action: Custom);
579 setOperationAction(Op: ISD::STRICT_LROUND, VT: MVT::f64, Action: Custom);
580 setOperationAction(Op: ISD::STRICT_LROUND, VT: MVT::f32, Action: Custom);
581 setOperationAction(Op: ISD::STRICT_LLROUND, VT: MVT::f64, Action: Custom);
582 setOperationAction(Op: ISD::STRICT_LLROUND, VT: MVT::f32, Action: Custom);
583 } else {
584 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Expand);
585 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Expand);
586 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Expand);
587 setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Expand);
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
591 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
592
593 // Custom handling for PowerPC ucmp instruction
594 setOperationAction(Op: ISD::UCMP, VT: MVT::i32, Action: Custom);
595 setOperationAction(Op: ISD::UCMP, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
603 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
604 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
608 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
609 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i32, Action: Custom);
610 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i32, Action: Custom);
611 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i32, Action: Custom);
612 setOperationAction(Op: ISD::JumpTable, VT: MVT::i32, Action: Custom);
613 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i64, Action: Custom);
614 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i64, Action: Custom);
615 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i64, Action: Custom);
616 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i64, Action: Custom);
617 setOperationAction(Op: ISD::JumpTable, VT: MVT::i64, Action: Custom);
618
619 // TRAP is legal.
620 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
621
622 // TRAMPOLINE is custom lowered.
623 setOperationAction(Op: ISD::INIT_TRAMPOLINE, VT: MVT::Other, Action: Custom);
624 setOperationAction(Op: ISD::ADJUST_TRAMPOLINE, VT: MVT::Other, Action: Custom);
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
627 setOperationAction(Op: ISD::VASTART , VT: MVT::Other, Action: Custom);
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
631 setOperationAction(Op: ISD::VAARG, VT: MVT::i1, Action: Promote);
632 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i1, DestVT: MVT::i64);
633 setOperationAction(Op: ISD::VAARG, VT: MVT::i8, Action: Promote);
634 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i8, DestVT: MVT::i64);
635 setOperationAction(Op: ISD::VAARG, VT: MVT::i16, Action: Promote);
636 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i16, DestVT: MVT::i64);
637 setOperationAction(Op: ISD::VAARG, VT: MVT::i32, Action: Promote);
638 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i32, DestVT: MVT::i64);
639 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
642 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
643 setOperationAction(Op: ISD::VAARG, VT: MVT::i64, Action: Custom);
644 } else
645 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
649 setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Custom);
650 else
651 setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Expand);
652
653 // Use the default implementation.
654 setOperationAction(Op: ISD::VAEND , VT: MVT::Other, Action: Expand);
655 setOperationAction(Op: ISD::STACKSAVE , VT: MVT::Other, Action: Expand);
656 setOperationAction(Op: ISD::STACKRESTORE , VT: MVT::Other, Action: Custom);
657 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32 , Action: Custom);
658 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64 , Action: Custom);
659 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i32, Action: Custom);
660 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i64, Action: Custom);
661 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
662 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v16i1, Action: Custom);
666 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v8i1, Action: Custom);
667 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v4i1, Action: Custom);
668 setOperationAction(Op: ISD::VP_STORE, VT: MVT::v2i1, Action: Custom);
669 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v16i1, Action: Custom);
670 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v8i1, Action: Custom);
671 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v4i1, Action: Custom);
672 setOperationAction(Op: ISD::VP_LOAD, VT: MVT::v2i1, Action: Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
676 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
677 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::f64, Action: Custom);
678 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::ppcf128, Action: Custom);
679 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v4f32, Action: Custom);
680 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v2f64, Action: Custom);
681
682 // To handle counter-based loop conditions.
683 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i1, Action: Custom);
684 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
685
686 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i8, Action: Custom);
687 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i16, Action: Custom);
688 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
689 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
693 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
694 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
695 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
696 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f64, Action: Expand);
697 }
698 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
699 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f64, Action: Expand);
700 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
701 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
702 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
703 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f64, Action: Expand);
704 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
705 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
706 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f32, Action: Expand);
707 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f64, Action: Expand);
708 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
709 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
710
711 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal);
712 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal);
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
716 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
717 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Expand);
718 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
719 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Expand);
720 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
721 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
722 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
723 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
726 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
727 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
730 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
731 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
736 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Legal);
737 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Legal);
738 } else {
739 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Expand);
740 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Expand);
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
747 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
748 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Custom);
749 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
750 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Custom);
751 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
752 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Custom);
753 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
754 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
755 }
756
757 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
758 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
759 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
760 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Custom);
761 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
762 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
763 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
764 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Custom);
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(VT: MVT::i64, RC: &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
771 setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
772 // 64-bit PowerPC wants to expand i128 shifts itself.
773 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Custom);
774 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Custom);
775 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Custom);
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
778 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i32, Action: Custom);
779 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i32, Action: Custom);
780 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i32, Action: Custom);
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
786 setOperationAction(Op: ISD::FSHL, VT: MVT::i64, Action: Custom);
787 setOperationAction(Op: ISD::FSHR, VT: MVT::i64, Action: Custom);
788 }
789 setOperationAction(Op: ISD::FSHL, VT: MVT::i32, Action: Custom);
790 setOperationAction(Op: ISD::FSHR, VT: MVT::i32, Action: Custom);
791
792 if (Subtarget.hasVSX()) {
793 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
794 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
795 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
796 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
797 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
798 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
799 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
800 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
801 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
802 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
807 setOperationAction(Op: ISD::AVGCEILS, VT, Action: Legal);
808 setOperationAction(Op: ISD::AVGCEILU, VT, Action: Legal);
809 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
810 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
811 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
812 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
813 }
814 // First set operation action for all vector types to expand. Then we
815 // will selectively turn on ones that can be effectively codegen'd.
816 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
817 // add/sub are legal for all supported vector VT's.
818 setOperationAction(Op: ISD::ADD, VT, Action: Legal);
819 setOperationAction(Op: ISD::SUB, VT, Action: Legal);
820
821 // For v2i64, these are only valid with P8Vector. This is corrected after
822 // the loop.
823 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
824 setOperationAction(Op: ISD::SMAX, VT, Action: Legal);
825 setOperationAction(Op: ISD::SMIN, VT, Action: Legal);
826 setOperationAction(Op: ISD::UMAX, VT, Action: Legal);
827 setOperationAction(Op: ISD::UMIN, VT, Action: Legal);
828 }
829 else {
830 setOperationAction(Op: ISD::SMAX, VT, Action: Expand);
831 setOperationAction(Op: ISD::SMIN, VT, Action: Expand);
832 setOperationAction(Op: ISD::UMAX, VT, Action: Expand);
833 setOperationAction(Op: ISD::UMIN, VT, Action: Expand);
834 }
835
836 if (Subtarget.hasVSX()) {
837 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT, Action: Legal);
838 setOperationAction(Op: ISD::FMINNUM_IEEE, VT, Action: Legal);
839 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
840 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
841 setOperationAction(Op: ISD::FCANONICALIZE, VT, Action: Legal);
842 }
843
844 // Vector instructions introduced in P8
845 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
846 setOperationAction(Op: ISD::CTPOP, VT, Action: Legal);
847 setOperationAction(Op: ISD::CTLZ, VT, Action: Legal);
848 }
849 else {
850 setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
851 setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
852 }
853
854 // Vector instructions introduced in P9
855 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
856 setOperationAction(Op: ISD::CTTZ, VT, Action: Legal);
857 else
858 setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
859
860 // We promote all shuffles to v16i8.
861 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Promote);
862 AddPromotedToType (Opc: ISD::VECTOR_SHUFFLE, OrigVT: VT, DestVT: MVT::v16i8);
863
864 // We promote all non-typed operations to v4i32.
865 setOperationAction(Op: ISD::AND , VT, Action: Promote);
866 AddPromotedToType (Opc: ISD::AND , OrigVT: VT, DestVT: MVT::v4i32);
867 setOperationAction(Op: ISD::OR , VT, Action: Promote);
868 AddPromotedToType (Opc: ISD::OR , OrigVT: VT, DestVT: MVT::v4i32);
869 setOperationAction(Op: ISD::XOR , VT, Action: Promote);
870 AddPromotedToType (Opc: ISD::XOR , OrigVT: VT, DestVT: MVT::v4i32);
871 setOperationAction(Op: ISD::LOAD , VT, Action: Promote);
872 AddPromotedToType (Opc: ISD::LOAD , OrigVT: VT, DestVT: MVT::v4i32);
873 setOperationAction(Op: ISD::SELECT, VT, Action: Promote);
874 AddPromotedToType (Opc: ISD::SELECT, OrigVT: VT, DestVT: MVT::v4i32);
875 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
876 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Promote);
877 AddPromotedToType (Opc: ISD::SELECT_CC, OrigVT: VT, DestVT: MVT::v4i32);
878 setOperationAction(Op: ISD::STORE, VT, Action: Promote);
879 AddPromotedToType (Opc: ISD::STORE, OrigVT: VT, DestVT: MVT::v4i32);
880
881 // No other operations are legal.
882 setOperationAction(Op: ISD::MUL , VT, Action: Expand);
883 setOperationAction(Op: ISD::SDIV, VT, Action: Expand);
884 setOperationAction(Op: ISD::SREM, VT, Action: Expand);
885 setOperationAction(Op: ISD::UDIV, VT, Action: Expand);
886 setOperationAction(Op: ISD::UREM, VT, Action: Expand);
887 setOperationAction(Op: ISD::FDIV, VT, Action: Expand);
888 setOperationAction(Op: ISD::FREM, VT, Action: Expand);
889 setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
890 setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
891 setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
892 setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
893 setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
894 setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
895 setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
896 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
897 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
898 setOperationAction(Op: ISD::FABS, VT, Action: Expand);
899 setOperationAction(Op: ISD::FFLOOR, VT, Action: Expand);
900 setOperationAction(Op: ISD::FCEIL, VT, Action: Expand);
901 setOperationAction(Op: ISD::FTRUNC, VT, Action: Expand);
902 setOperationAction(Op: ISD::FRINT, VT, Action: Expand);
903 setOperationAction(Op: ISD::FLDEXP, VT, Action: Expand);
904 setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Expand);
905 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Expand);
906 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Expand);
907 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Expand);
908 setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
909 setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
910 setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
911 setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
912 setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
913 setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
914 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Expand);
915 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
916 setOperationAction(Op: ISD::BSWAP, VT, Action: Expand);
917 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT, Action: Expand);
918 setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
919 setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
920
921 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
922 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
923 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
924 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
925 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
926 }
927 }
928 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::v4i32, Action: Expand);
929 if (!Subtarget.hasP8Vector()) {
930 setOperationAction(Op: ISD::SMAX, VT: MVT::v2i64, Action: Expand);
931 setOperationAction(Op: ISD::SMIN, VT: MVT::v2i64, Action: Expand);
932 setOperationAction(Op: ISD::UMAX, VT: MVT::v2i64, Action: Expand);
933 setOperationAction(Op: ISD::UMIN, VT: MVT::v2i64, Action: Expand);
934 }
935
936 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
937 // with merges, splats, etc.
938 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v16i8, Action: Custom);
939
940 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
941 // are cheap, so handle them before they get expanded to scalar.
942 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v8i8, Action: Custom);
943 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i8, Action: Custom);
944 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i8, Action: Custom);
945 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i16, Action: Custom);
946 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i16, Action: Custom);
947
948 setOperationAction(Op: ISD::AND , VT: MVT::v4i32, Action: Legal);
949 setOperationAction(Op: ISD::OR , VT: MVT::v4i32, Action: Legal);
950 setOperationAction(Op: ISD::XOR , VT: MVT::v4i32, Action: Legal);
951 setOperationAction(Op: ISD::LOAD , VT: MVT::v4i32, Action: Legal);
952 setOperationAction(Op: ISD::SELECT, VT: MVT::v4i32,
953 Action: Subtarget.useCRBits() ? Legal : Expand);
954 setOperationAction(Op: ISD::STORE , VT: MVT::v4i32, Action: Legal);
955 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
956 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
957 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
958 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
959 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
960 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
961 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
962 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
963 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal);
964 setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal);
965 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal);
966 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
967
968 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
969 setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Custom);
970 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
971 if (Subtarget.hasAltivec())
972 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
973 setOperationAction(Op: ISD::ROTL, VT, Action: Legal);
974 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
975 if (Subtarget.hasP8Altivec())
976 setOperationAction(Op: ISD::ROTL, VT: MVT::v2i64, Action: Legal);
977
978 addRegisterClass(VT: MVT::v4f32, RC: &PPC::VRRCRegClass);
979 addRegisterClass(VT: MVT::v4i32, RC: &PPC::VRRCRegClass);
980 addRegisterClass(VT: MVT::v8i16, RC: &PPC::VRRCRegClass);
981 addRegisterClass(VT: MVT::v16i8, RC: &PPC::VRRCRegClass);
982
983 setOperationAction(Op: ISD::MUL, VT: MVT::v4f32, Action: Legal);
984 setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal);
985
986 if (Subtarget.hasVSX()) {
987 setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal);
988 setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal);
989 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
990 }
991
992 if (Subtarget.hasP8Altivec())
993 setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Legal);
994 else
995 setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Custom);
996
997 if (Subtarget.isISA3_1()) {
998 setOperationAction(Op: ISD::MUL, VT: MVT::v2i64, Action: Legal);
999 setOperationAction(Op: ISD::MULHS, VT: MVT::v2i64, Action: Legal);
1000 setOperationAction(Op: ISD::MULHU, VT: MVT::v2i64, Action: Legal);
1001 setOperationAction(Op: ISD::MULHS, VT: MVT::v4i32, Action: Legal);
1002 setOperationAction(Op: ISD::MULHU, VT: MVT::v4i32, Action: Legal);
1003 setOperationAction(Op: ISD::UDIV, VT: MVT::v2i64, Action: Legal);
1004 setOperationAction(Op: ISD::SDIV, VT: MVT::v2i64, Action: Legal);
1005 setOperationAction(Op: ISD::UDIV, VT: MVT::v4i32, Action: Legal);
1006 setOperationAction(Op: ISD::SDIV, VT: MVT::v4i32, Action: Legal);
1007 setOperationAction(Op: ISD::UREM, VT: MVT::v2i64, Action: Legal);
1008 setOperationAction(Op: ISD::SREM, VT: MVT::v2i64, Action: Legal);
1009 setOperationAction(Op: ISD::UREM, VT: MVT::v4i32, Action: Legal);
1010 setOperationAction(Op: ISD::SREM, VT: MVT::v4i32, Action: Legal);
1011 setOperationAction(Op: ISD::UREM, VT: MVT::v1i128, Action: Legal);
1012 setOperationAction(Op: ISD::SREM, VT: MVT::v1i128, Action: Legal);
1013 setOperationAction(Op: ISD::UDIV, VT: MVT::v1i128, Action: Legal);
1014 setOperationAction(Op: ISD::SDIV, VT: MVT::v1i128, Action: Legal);
1015 setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Legal);
1016 }
1017
1018 setOperationAction(Op: ISD::MUL, VT: MVT::v8i16, Action: Legal);
1019 setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Custom);
1020
1021 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Custom);
1022 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Custom);
1023 // LE is P8+/64-bit so direct moves are supported and these operations
1024 // are legal. The custom transformation requires 64-bit since we need a
1025 // pair of stores that will cover a 128-bit load for P10.
1026 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1027 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Custom);
1028 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Custom);
1029 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Custom);
1030 }
1031
1032 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v16i8, Action: Custom);
1033 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v8i16, Action: Custom);
1034 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4i32, Action: Custom);
1035 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4f32, Action: Custom);
1036
1037 // Altivec does not contain unordered floating-point compare instructions
1038 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v4f32, Action: Expand);
1039 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v4f32, Action: Expand);
1040 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v4f32, Action: Expand);
1041 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v4f32, Action: Expand);
1042
1043 if (Subtarget.hasVSX()) {
1044 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal);
1045 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1046 if (Subtarget.hasP8Vector()) {
1047 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal);
1048 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Legal);
1049 }
1050 if (Subtarget.hasDirectMove() && isPPC64) {
1051 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Legal);
1052 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Legal);
1053 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Legal);
1054 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Legal);
1055 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1056 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1057 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1058 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1059 }
1060 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1061
1062 // The nearbyint variants are not allowed to raise the inexact exception
1063 // so we can only code-gen them with fpexcept.ignore.
1064 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f64, Action: Custom);
1065 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f32, Action: Custom);
1066 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v2f64, Action: Custom);
1067 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v4f32, Action: Custom);
1068
1069 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal);
1070 setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal);
1071 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal);
1072 setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal);
1073 setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal);
1074 setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
1075 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
1076
1077 setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal);
1078 setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal);
1079 setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
1080 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
1081
1082 setOperationAction(Op: ISD::MUL, VT: MVT::v2f64, Action: Legal);
1083 setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal);
1084
1085 setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal);
1086 setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal);
1087
1088 // Share the Altivec comparison restrictions.
1089 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v2f64, Action: Expand);
1090 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v2f64, Action: Expand);
1091 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v2f64, Action: Expand);
1092 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v2f64, Action: Expand);
1093
1094 setOperationAction(Op: ISD::LOAD, VT: MVT::v2f64, Action: Legal);
1095 setOperationAction(Op: ISD::STORE, VT: MVT::v2f64, Action: Legal);
1096
1097 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2f64, Action: Custom);
1098
1099 if (Subtarget.hasP8Vector())
1100 addRegisterClass(VT: MVT::f32, RC: &PPC::VSSRCRegClass);
1101
1102 addRegisterClass(VT: MVT::f64, RC: &PPC::VSFRCRegClass);
1103
1104 addRegisterClass(VT: MVT::v4i32, RC: &PPC::VSRCRegClass);
1105 addRegisterClass(VT: MVT::v4f32, RC: &PPC::VSRCRegClass);
1106 addRegisterClass(VT: MVT::v2f64, RC: &PPC::VSRCRegClass);
1107
1108 if (Subtarget.hasP8Altivec()) {
1109 setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Legal);
1110 setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Legal);
1111 setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Legal);
1112
1113 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1114 // SRL, but not for SRA because of the instructions available:
1115 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1116 // doing
1117 setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Expand);
1118 setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Expand);
1119 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1120
1121 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Legal);
1122 }
1123 else {
1124 setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Expand);
1125 setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Expand);
1126 setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Expand);
1127
1128 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Custom);
1129
1130 // VSX v2i64 only supports non-arithmetic operations.
1131 setOperationAction(Op: ISD::ADD, VT: MVT::v2i64, Action: Expand);
1132 setOperationAction(Op: ISD::SUB, VT: MVT::v2i64, Action: Expand);
1133 }
1134
1135 if (Subtarget.isISA3_1())
1136 setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Legal);
1137 else
1138 setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Expand);
1139
1140 setOperationAction(Op: ISD::LOAD, VT: MVT::v2i64, Action: Promote);
1141 AddPromotedToType (Opc: ISD::LOAD, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1142 setOperationAction(Op: ISD::STORE, VT: MVT::v2i64, Action: Promote);
1143 AddPromotedToType (Opc: ISD::STORE, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1144
1145 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2i64, Action: Custom);
1146
1147 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1148 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1149 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1150 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1151 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1152 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1153 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1154 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1155
1156 // Custom handling for partial vectors of integers converted to
1157 // floating point. We already have optimal handling for v2i32 through
1158 // the DAG combine, so those aren't necessary.
1159 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1160 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1161 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1162 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1163 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1164 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1165 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1166 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1167 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1168 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1169 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1170 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1171 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1172 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1173 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1174 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1175
1176 setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal);
1177 setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal);
1178 setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal);
1179 setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal);
1180 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v4f32, Action: Legal);
1181 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v2f64, Action: Legal);
1182
1183 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2i64, Action: Custom);
1184 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2f64, Action: Custom);
1185
1186 // Handle constrained floating-point operations of vector.
1187 // The predictor is `hasVSX` because altivec instruction has
1188 // no exception but VSX vector instruction has.
1189 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal);
1190 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal);
1191 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal);
1192 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal);
1193 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal);
1194 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal);
1195 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v4f32, Action: Legal);
1196 setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v4f32, Action: Legal);
1197 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal);
1198 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal);
1199 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal);
1200 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal);
1201 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal);
1202
1203 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal);
1204 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal);
1205 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal);
1206 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal);
1207 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal);
1208 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal);
1209 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v2f64, Action: Legal);
1210 setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v2f64, Action: Legal);
1211 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal);
1212 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal);
1213 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal);
1214 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal);
1215 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal);
1216
1217 addRegisterClass(VT: MVT::v2i64, RC: &PPC::VSRCRegClass);
1218 addRegisterClass(VT: MVT::f128, RC: &PPC::VRRCRegClass);
1219
1220 for (MVT FPT : MVT::fp_valuetypes())
1221 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: FPT, Action: Expand);
1222
1223 // Expand the SELECT to SELECT_CC
1224 setOperationAction(Op: ISD::SELECT, VT: MVT::f128, Action: Expand);
1225
1226 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
1227 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
1228
1229 // No implementation for these ops for PowerPC.
1230 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f128, Action: Expand);
1231 setOperationAction(Op: ISD::FSIN, VT: MVT::f128, Action: Expand);
1232 setOperationAction(Op: ISD::FCOS, VT: MVT::f128, Action: Expand);
1233 setOperationAction(Op: ISD::FPOW, VT: MVT::f128, Action: Expand);
1234 setOperationAction(Op: ISD::FPOWI, VT: MVT::f128, Action: Expand);
1235 setOperationAction(Op: ISD::FREM, VT: MVT::f128, Action: LibCall);
1236 }
1237
1238 if (Subtarget.hasP8Altivec()) {
1239 addRegisterClass(VT: MVT::v2i64, RC: &PPC::VRRCRegClass);
1240 addRegisterClass(VT: MVT::v1i128, RC: &PPC::VRRCRegClass);
1241 }
1242
1243 if (Subtarget.hasP9Vector()) {
1244 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Custom);
1245 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
1246
1247 // Test data class instructions store results in CR bits.
1248 if (Subtarget.useCRBits()) {
1249 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom);
1250 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom);
1251 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f128, Action: Custom);
1252 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::ppcf128, Action: Custom);
1253 }
1254
1255 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1256 // SRL, but not for SRA because of the instructions available:
1257 // VS{RL} and VS{RL}O.
1258 setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Legal);
1259 setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Legal);
1260 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1261
1262 setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: Legal);
1263 setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: Legal);
1264 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Legal);
1265 setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Legal);
1266 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Legal);
1267
1268 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Legal);
1269 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f128, Action: Expand);
1270 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f128, Action: Expand);
1271 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f128, Action: Expand);
1272 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f128, Action: Expand);
1273 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f128, Action: Expand);
1274 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f128, Action: Expand);
1275
1276 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f128, Action: Legal);
1277 setOperationAction(Op: ISD::FRINT, VT: MVT::f128, Action: Legal);
1278 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f128, Action: Legal);
1279 setOperationAction(Op: ISD::FCEIL, VT: MVT::f128, Action: Legal);
1280 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f128, Action: Legal);
1281 setOperationAction(Op: ISD::FROUND, VT: MVT::f128, Action: Legal);
1282
1283 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f64, Action: Legal);
1284 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f32, Action: Legal);
1285 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
1286
1287 // Handle constrained floating-point operations of fp128
1288 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f128, Action: Legal);
1289 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f128, Action: Legal);
1290 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f128, Action: Legal);
1291 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f128, Action: Legal);
1292 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Legal);
1293 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f128, Action: Legal);
1294 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Legal);
1295 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f64, Action: Legal);
1296 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
1297 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f128, Action: Legal);
1298 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f128, Action: Legal);
1299 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f128, Action: Legal);
1300 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f128, Action: Legal);
1301 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f128, Action: Legal);
1302 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f128, Action: Legal);
1303 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v2f32, Action: Custom);
1304 setOperationAction(Op: ISD::BSWAP, VT: MVT::v8i16, Action: Legal);
1305 setOperationAction(Op: ISD::BSWAP, VT: MVT::v4i32, Action: Legal);
1306 setOperationAction(Op: ISD::BSWAP, VT: MVT::v2i64, Action: Legal);
1307 setOperationAction(Op: ISD::BSWAP, VT: MVT::v1i128, Action: Legal);
1308 } else if (Subtarget.hasVSX()) {
1309 setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Promote);
1310 setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Promote);
1311
1312 AddPromotedToType(Opc: ISD::LOAD, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1313 AddPromotedToType(Opc: ISD::STORE, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1314
1315 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1316 // fp_to_uint and int_to_fp.
1317 setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: LibCall);
1318 setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: LibCall);
1319
1320 setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Expand);
1321 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
1322 setOperationAction(Op: ISD::FNEG, VT: MVT::f128, Action: Expand);
1323 setOperationAction(Op: ISD::FABS, VT: MVT::f128, Action: Expand);
1324 setOperationAction(Op: ISD::FSQRT, VT: MVT::f128, Action: Expand);
1325 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand);
1326 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand);
1327
1328 // Expand the fp_extend if the target type is fp128.
1329 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Expand);
1330 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Expand);
1331
1332 // Expand the fp_round if the source type is fp128.
1333 for (MVT VT : {MVT::f32, MVT::f64}) {
1334 setOperationAction(Op: ISD::FP_ROUND, VT, Action: Custom);
1335 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Custom);
1336 }
1337
1338 setOperationAction(Op: ISD::SETCC, VT: MVT::f128, Action: Custom);
1339 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Custom);
1340 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Custom);
1341 setOperationAction(Op: ISD::BR_CC, VT: MVT::f128, Action: Expand);
1342
1343 // Lower following f128 select_cc pattern:
1344 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1345 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1346
1347 // We need to handle f128 SELECT_CC with integer result type.
1348 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Custom);
1349 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
1350 }
1351
1352 if (Subtarget.hasP9Altivec()) {
1353 if (Subtarget.isISA3_1()) {
1354 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1355 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1356 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1357 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1358 } else {
1359 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Custom);
1360 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Custom);
1361 }
1362 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i8, Action: Legal);
1363 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i16, Action: Legal);
1364 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i32, Action: Legal);
1365 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i8, Action: Legal);
1366 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i16, Action: Legal);
1367 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i32, Action: Legal);
1368 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i64, Action: Legal);
1369
1370 setOperationAction(Op: ISD::ABDU, VT: MVT::v16i8, Action: Legal);
1371 setOperationAction(Op: ISD::ABDU, VT: MVT::v8i16, Action: Legal);
1372 setOperationAction(Op: ISD::ABDU, VT: MVT::v4i32, Action: Legal);
1373 setOperationAction(Op: ISD::ABDS, VT: MVT::v4i32, Action: Legal);
1374 }
1375
1376 if (Subtarget.hasP10Vector()) {
1377 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1378 }
1379 }
1380
1381 if (Subtarget.pairedVectorMemops()) {
1382 addRegisterClass(VT: MVT::v256i1, RC: &PPC::VSRpRCRegClass);
1383 setOperationAction(Op: ISD::LOAD, VT: MVT::v256i1, Action: Custom);
1384 setOperationAction(Op: ISD::STORE, VT: MVT::v256i1, Action: Custom);
1385 }
1386 if (Subtarget.hasMMA()) {
1387 if (Subtarget.isISAFuture()) {
1388 addRegisterClass(VT: MVT::v512i1, RC: &PPC::WACCRCRegClass);
1389 addRegisterClass(VT: MVT::v1024i1, RC: &PPC::DMRRCRegClass);
1390 addRegisterClass(VT: MVT::v2048i1, RC: &PPC::DMRpRCRegClass);
1391 setOperationAction(Op: ISD::LOAD, VT: MVT::v1024i1, Action: Custom);
1392 setOperationAction(Op: ISD::STORE, VT: MVT::v1024i1, Action: Custom);
1393 setOperationAction(Op: ISD::LOAD, VT: MVT::v2048i1, Action: Custom);
1394 setOperationAction(Op: ISD::STORE, VT: MVT::v2048i1, Action: Custom);
1395 } else {
1396 addRegisterClass(VT: MVT::v512i1, RC: &PPC::UACCRCRegClass);
1397 }
1398 setOperationAction(Op: ISD::LOAD, VT: MVT::v512i1, Action: Custom);
1399 setOperationAction(Op: ISD::STORE, VT: MVT::v512i1, Action: Custom);
1400 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v512i1, Action: Custom);
1401 }
1402
1403 if (Subtarget.has64BitSupport())
1404 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal);
1405
1406 if (Subtarget.isISA3_1())
1407 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Legal);
1408
1409 setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: isPPC64 ? Legal : Custom);
1410
1411 if (!isPPC64) {
1412 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i64, Action: Expand);
1413 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i64, Action: Expand);
1414 }
1415
1416 if (shouldInlineQuadwordAtomics()) {
1417 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom);
1418 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom);
1419 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i128, Action: Custom);
1420 }
1421
1422 setBooleanContents(ZeroOrOneBooleanContent);
1423
1424 if (Subtarget.hasAltivec()) {
1425 // Altivec instructions set fields to all zeros or all ones.
1426 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1427 }
1428
1429 if (shouldInlineQuadwordAtomics())
1430 setMaxAtomicSizeInBitsSupported(128);
1431 else if (isPPC64)
1432 setMaxAtomicSizeInBitsSupported(64);
1433 else
1434 setMaxAtomicSizeInBitsSupported(32);
1435
1436 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1437
1438 // We have target-specific dag combine patterns for the following nodes:
1439 setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
1440 ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
1441 ISD::BUILD_VECTOR});
1442 if (Subtarget.hasFPCVT())
1443 setTargetDAGCombine(ISD::UINT_TO_FP);
1444 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1445 if (Subtarget.useCRBits())
1446 setTargetDAGCombine(ISD::BRCOND);
1447 setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1448 ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1449
1450 setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1451
1452 setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1453
1454 if (Subtarget.useCRBits()) {
1455 setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1456 }
1457
1458 // With 32 condition bits, we don't need to sink (and duplicate) compares
1459 // aggressively in CodeGenPrep.
1460 if (Subtarget.useCRBits()) {
1461 setJumpIsExpensive();
1462 }
1463
1464 // TODO: The default entry number is set to 64. This stops most jump table
1465 // generation on PPC. But it is good for current PPC HWs because the indirect
1466 // branch instruction mtctr to the jump table may lead to bad branch predict.
1467 // Re-evaluate this value on future HWs that can do better with mtctr.
1468 setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1469
1470 // The default minimum of largest number in a BitTest cluster is 3.
1471 setMinimumBitTestCmps(PPCMinimumBitTestCmps);
1472
1473 setMinFunctionAlignment(Align(4));
1474 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1475
1476 auto CPUDirective = Subtarget.getCPUDirective();
1477 switch (CPUDirective) {
1478 default: break;
1479 case PPC::DIR_970:
1480 case PPC::DIR_A2:
1481 case PPC::DIR_E500:
1482 case PPC::DIR_E500mc:
1483 case PPC::DIR_E5500:
1484 case PPC::DIR_PWR4:
1485 case PPC::DIR_PWR5:
1486 case PPC::DIR_PWR5X:
1487 case PPC::DIR_PWR6:
1488 case PPC::DIR_PWR6X:
1489 case PPC::DIR_PWR7:
1490 case PPC::DIR_PWR8:
1491 case PPC::DIR_PWR9:
1492 case PPC::DIR_PWR10:
1493 case PPC::DIR_PWR11:
1494 case PPC::DIR_PWR_FUTURE:
1495 setPrefLoopAlignment(Align(16));
1496 setPrefFunctionAlignment(Align(16));
1497 break;
1498 }
1499
1500 if (Subtarget.enableMachineScheduler())
1501 setSchedulingPreference(Sched::Source);
1502 else
1503 setSchedulingPreference(Sched::Hybrid);
1504
1505 computeRegisterProperties(TRI: STI.getRegisterInfo());
1506
1507 // The Freescale cores do better with aggressive inlining of memcpy and
1508 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1509 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1510 MaxStoresPerMemset = 32;
1511 MaxStoresPerMemsetOptSize = 16;
1512 MaxStoresPerMemcpy = 32;
1513 MaxStoresPerMemcpyOptSize = 8;
1514 MaxStoresPerMemmove = 32;
1515 MaxStoresPerMemmoveOptSize = 8;
1516 } else if (CPUDirective == PPC::DIR_A2) {
1517 // The A2 also benefits from (very) aggressive inlining of memcpy and
1518 // friends. The overhead of a the function call, even when warm, can be
1519 // over one hundred cycles.
1520 MaxStoresPerMemset = 128;
1521 MaxStoresPerMemcpy = 128;
1522 MaxStoresPerMemmove = 128;
1523 MaxLoadsPerMemcmp = 128;
1524 } else {
1525 MaxLoadsPerMemcmp = 8;
1526 MaxLoadsPerMemcmpOptSize = 4;
1527 }
1528
1529 // Enable generation of STXVP instructions by default for mcpu=future.
1530 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1531 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1532 DisableAutoPairedVecSt = false;
1533
1534 IsStrictFPEnabled = true;
1535
1536 // Let the subtarget (CPU) decide if a predictable select is more expensive
1537 // than the corresponding branch. This information is used in CGP to decide
1538 // when to convert selects into branches.
1539 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1540
1541 GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1542}
1543
1544// *********************************** NOTE ************************************
1545// For selecting load and store instructions, the addressing modes are defined
1546// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1547// patterns to match the load the store instructions.
1548//
1549// The TD definitions for the addressing modes correspond to their respective
1550// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1551// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1552// address mode flags of a particular node. Afterwards, the computed address
1553// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1554// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1555// accordingly, based on the preferred addressing mode.
1556//
1557// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1558// MemOpFlags contains all the possible flags that can be used to compute the
1559// optimal addressing mode for load and store instructions.
1560// AddrMode contains all the possible load and store addressing modes available
1561// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1562//
1563// When adding new load and store instructions, it is possible that new address
1564// flags may need to be added into MemOpFlags, and a new addressing mode will
1565// need to be added to AddrMode. An entry of the new addressing mode (consisting
1566// of the minimal and main distinguishing address flags for the new load/store
1567// instructions) will need to be added into initializeAddrModeMap() below.
1568// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1569// need to be updated to account for selecting the optimal addressing mode.
1570// *****************************************************************************
1571/// Initialize the map that relates the different addressing modes of the load
1572/// and store instructions to a set of flags. This ensures the load/store
1573/// instruction is correctly matched during instruction selection.
1574void PPCTargetLowering::initializeAddrModeMap() {
1575 AddrModesMap[PPC::AM_DForm] = {
1576 // LWZ, STW
1577 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1578 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1579 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1580 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1581 // LBZ, LHZ, STB, STH
1582 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1583 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1584 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1585 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1586 // LHA
1587 PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1588 PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1589 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1590 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1591 // LFS, LFD, STFS, STFD
1592 PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1593 PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1594 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1595 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1596 };
1597 AddrModesMap[PPC::AM_DSForm] = {
1598 // LWA
1599 PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1600 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1601 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1602 // LD, STD
1603 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1604 PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1605 PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1606 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1607 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1608 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1609 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1610 };
1611 AddrModesMap[PPC::AM_DQForm] = {
1612 // LXV, STXV
1613 PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1614 PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1615 PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1616 };
1617 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1618 PPC::MOF_SubtargetP10};
1619 // TODO: Add mapping for quadword load/store.
1620}
1621
1622/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1623/// the desired ByVal argument alignment.
1624static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1625 if (MaxAlign == MaxMaxAlign)
1626 return;
1627 if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
1628 if (MaxMaxAlign >= 32 &&
1629 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1630 MaxAlign = Align(32);
1631 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1632 MaxAlign < 16)
1633 MaxAlign = Align(16);
1634 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
1635 Align EltAlign;
1636 getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign, MaxMaxAlign);
1637 if (EltAlign > MaxAlign)
1638 MaxAlign = EltAlign;
1639 } else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
1640 for (auto *EltTy : STy->elements()) {
1641 Align EltAlign;
1642 getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign, MaxMaxAlign);
1643 if (EltAlign > MaxAlign)
1644 MaxAlign = EltAlign;
1645 if (MaxAlign == MaxMaxAlign)
1646 break;
1647 }
1648 }
1649}
1650
1651/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1652/// function arguments in the caller parameter area.
1653Align PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1654 const DataLayout &DL) const {
1655 // 16byte and wider vectors are passed on 16byte boundary.
1656 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1657 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1658 if (Subtarget.hasAltivec())
1659 getMaxByValAlign(Ty, MaxAlign&: Alignment, MaxMaxAlign: Align(16));
1660 return Alignment;
1661}
1662
1663bool PPCTargetLowering::useSoftFloat() const {
1664 return Subtarget.useSoftFloat();
1665}
1666
1667bool PPCTargetLowering::hasSPE() const {
1668 return Subtarget.hasSPE();
1669}
1670
1671bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1672 return VT.isScalarInteger();
1673}
1674
1675bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1676 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1677 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1678 return false;
1679
1680 if (auto *VTy = dyn_cast<VectorType>(Val: VectorTy)) {
1681 if (VTy->getScalarType()->isIntegerTy()) {
1682 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1683 if (ElemSizeInBits == 32) {
1684 Index = Subtarget.isLittleEndian() ? 2 : 1;
1685 return true;
1686 }
1687 if (ElemSizeInBits == 64) {
1688 Index = Subtarget.isLittleEndian() ? 1 : 0;
1689 return true;
1690 }
1691 }
1692 }
1693 return false;
1694}
1695
1696EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1697 EVT VT) const {
1698 if (!VT.isVector())
1699 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1700
1701 return VT.changeVectorElementTypeToInteger();
1702}
1703
1704bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1705 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1706 return true;
1707}
1708
1709//===----------------------------------------------------------------------===//
1710// Node matching predicates, for use by the tblgen matching code.
1711//===----------------------------------------------------------------------===//
1712
1713/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1714static bool isFloatingPointZero(SDValue Op) {
1715 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
1716 return CFP->getValueAPF().isZero();
1717 else if (ISD::isEXTLoad(N: Op.getNode()) || ISD::isNON_EXTLoad(N: Op.getNode())) {
1718 // Maybe this has already been legalized into the constant pool?
1719 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val: Op.getOperand(i: 1)))
1720 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: CP->getConstVal()))
1721 return CFP->getValueAPF().isZero();
1722 }
1723 return false;
1724}
1725
1726/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1727/// true if Op is undef or if it matches the specified value.
1728static bool isConstantOrUndef(int Op, int Val) {
1729 return Op < 0 || Op == Val;
1730}
1731
1732/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1733/// VPKUHUM instruction.
1734/// The ShuffleKind distinguishes between big-endian operations with
1735/// two different inputs (0), either-endian operations with two identical
1736/// inputs (1), and little-endian operations with two different inputs (2).
1737/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1738bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1739 SelectionDAG &DAG) {
1740 bool IsLE = DAG.getDataLayout().isLittleEndian();
1741 if (ShuffleKind == 0) {
1742 if (IsLE)
1743 return false;
1744 for (unsigned i = 0; i != 16; ++i)
1745 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2+1))
1746 return false;
1747 } else if (ShuffleKind == 2) {
1748 if (!IsLE)
1749 return false;
1750 for (unsigned i = 0; i != 16; ++i)
1751 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2))
1752 return false;
1753 } else if (ShuffleKind == 1) {
1754 unsigned j = IsLE ? 0 : 1;
1755 for (unsigned i = 0; i != 8; ++i)
1756 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2+j) ||
1757 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j))
1758 return false;
1759 }
1760 return true;
1761}
1762
1763/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1764/// VPKUWUM instruction.
1765/// The ShuffleKind distinguishes between big-endian operations with
1766/// two different inputs (0), either-endian operations with two identical
1767/// inputs (1), and little-endian operations with two different inputs (2).
1768/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1769bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1770 SelectionDAG &DAG) {
1771 bool IsLE = DAG.getDataLayout().isLittleEndian();
1772 if (ShuffleKind == 0) {
1773 if (IsLE)
1774 return false;
1775 for (unsigned i = 0; i != 16; i += 2)
1776 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+2) ||
1777 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+3))
1778 return false;
1779 } else if (ShuffleKind == 2) {
1780 if (!IsLE)
1781 return false;
1782 for (unsigned i = 0; i != 16; i += 2)
1783 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2) ||
1784 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+1))
1785 return false;
1786 } else if (ShuffleKind == 1) {
1787 unsigned j = IsLE ? 0 : 2;
1788 for (unsigned i = 0; i != 8; i += 2)
1789 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+j) ||
1790 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+j+1) ||
1791 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j) ||
1792 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+9), Val: i*2+j+1))
1793 return false;
1794 }
1795 return true;
1796}
1797
1798/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1799/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1800/// current subtarget.
1801///
1802/// The ShuffleKind distinguishes between big-endian operations with
1803/// two different inputs (0), either-endian operations with two identical
1804/// inputs (1), and little-endian operations with two different inputs (2).
1805/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1806bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1807 SelectionDAG &DAG) {
1808 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1809 if (!Subtarget.hasP8Vector())
1810 return false;
1811
1812 bool IsLE = DAG.getDataLayout().isLittleEndian();
1813 if (ShuffleKind == 0) {
1814 if (IsLE)
1815 return false;
1816 for (unsigned i = 0; i != 16; i += 4)
1817 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+4) ||
1818 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+5) ||
1819 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+6) ||
1820 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+7))
1821 return false;
1822 } else if (ShuffleKind == 2) {
1823 if (!IsLE)
1824 return false;
1825 for (unsigned i = 0; i != 16; i += 4)
1826 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2) ||
1827 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+1) ||
1828 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+2) ||
1829 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+3))
1830 return false;
1831 } else if (ShuffleKind == 1) {
1832 unsigned j = IsLE ? 0 : 4;
1833 for (unsigned i = 0; i != 8; i += 4)
1834 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+j) ||
1835 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+j+1) ||
1836 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+j+2) ||
1837 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+j+3) ||
1838 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j) ||
1839 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+9), Val: i*2+j+1) ||
1840 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+10), Val: i*2+j+2) ||
1841 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+11), Val: i*2+j+3))
1842 return false;
1843 }
1844 return true;
1845}
1846
1847/// isVMerge - Common function, used to match vmrg* shuffles.
1848///
1849static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1850 unsigned LHSStart, unsigned RHSStart) {
1851 if (N->getValueType(ResNo: 0) != MVT::v16i8)
1852 return false;
1853 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1854 "Unsupported merge size!");
1855
1856 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1857 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1858 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*UnitSize*2+j),
1859 Val: LHSStart+j+i*UnitSize) ||
1860 !isConstantOrUndef(Op: N->getMaskElt(Idx: i*UnitSize*2+UnitSize+j),
1861 Val: RHSStart+j+i*UnitSize))
1862 return false;
1863 }
1864 return true;
1865}
1866
1867/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1868/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1869/// The ShuffleKind distinguishes between big-endian merges with two
1870/// different inputs (0), either-endian merges with two identical inputs (1),
1871/// and little-endian merges with two different inputs (2). For the latter,
1872/// the input operands are swapped (see PPCInstrAltivec.td).
1873bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1874 unsigned ShuffleKind, SelectionDAG &DAG) {
1875 if (DAG.getDataLayout().isLittleEndian()) {
1876 if (ShuffleKind == 1) // unary
1877 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 0);
1878 else if (ShuffleKind == 2) // swapped
1879 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 16);
1880 else
1881 return false;
1882 } else {
1883 if (ShuffleKind == 1) // unary
1884 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 8);
1885 else if (ShuffleKind == 0) // normal
1886 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 24);
1887 else
1888 return false;
1889 }
1890}
1891
1892/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1893/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1894/// The ShuffleKind distinguishes between big-endian merges with two
1895/// different inputs (0), either-endian merges with two identical inputs (1),
1896/// and little-endian merges with two different inputs (2). For the latter,
1897/// the input operands are swapped (see PPCInstrAltivec.td).
1898bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1899 unsigned ShuffleKind, SelectionDAG &DAG) {
1900 if (DAG.getDataLayout().isLittleEndian()) {
1901 if (ShuffleKind == 1) // unary
1902 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 8);
1903 else if (ShuffleKind == 2) // swapped
1904 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 24);
1905 else
1906 return false;
1907 } else {
1908 if (ShuffleKind == 1) // unary
1909 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 0);
1910 else if (ShuffleKind == 0) // normal
1911 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 16);
1912 else
1913 return false;
1914 }
1915}
1916
1917/**
1918 * Common function used to match vmrgew and vmrgow shuffles
1919 *
1920 * The indexOffset determines whether to look for even or odd words in
1921 * the shuffle mask. This is based on the of the endianness of the target
1922 * machine.
1923 * - Little Endian:
1924 * - Use offset of 0 to check for odd elements
1925 * - Use offset of 4 to check for even elements
1926 * - Big Endian:
1927 * - Use offset of 0 to check for even elements
1928 * - Use offset of 4 to check for odd elements
1929 * A detailed description of the vector element ordering for little endian and
1930 * big endian can be found at
1931 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1932 * Targeting your applications - what little endian and big endian IBM XL C/C++
1933 * compiler differences mean to you
1934 *
1935 * The mask to the shuffle vector instruction specifies the indices of the
1936 * elements from the two input vectors to place in the result. The elements are
1937 * numbered in array-access order, starting with the first vector. These vectors
1938 * are always of type v16i8, thus each vector will contain 16 elements of size
1939 * 8. More info on the shuffle vector can be found in the
1940 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1941 * Language Reference.
1942 *
1943 * The RHSStartValue indicates whether the same input vectors are used (unary)
1944 * or two different input vectors are used, based on the following:
1945 * - If the instruction uses the same vector for both inputs, the range of the
1946 * indices will be 0 to 15. In this case, the RHSStart value passed should
1947 * be 0.
1948 * - If the instruction has two different vectors then the range of the
1949 * indices will be 0 to 31. In this case, the RHSStart value passed should
1950 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1951 * to 31 specify elements in the second vector).
1952 *
1953 * \param[in] N The shuffle vector SD Node to analyze
1954 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1955 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1956 * vector to the shuffle_vector instruction
1957 * \return true iff this shuffle vector represents an even or odd word merge
1958 */
1959static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1960 unsigned RHSStartValue) {
1961 if (N->getValueType(ResNo: 0) != MVT::v16i8)
1962 return false;
1963
1964 for (unsigned i = 0; i < 2; ++i)
1965 for (unsigned j = 0; j < 4; ++j)
1966 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*4+j),
1967 Val: i*RHSStartValue+j+IndexOffset) ||
1968 !isConstantOrUndef(Op: N->getMaskElt(Idx: i*4+j+8),
1969 Val: i*RHSStartValue+j+IndexOffset+8))
1970 return false;
1971 return true;
1972}
1973
1974/**
1975 * Determine if the specified shuffle mask is suitable for the vmrgew or
1976 * vmrgow instructions.
1977 *
1978 * \param[in] N The shuffle vector SD Node to analyze
1979 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1980 * \param[in] ShuffleKind Identify the type of merge:
1981 * - 0 = big-endian merge with two different inputs;
1982 * - 1 = either-endian merge with two identical inputs;
1983 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1984 * little-endian merges).
1985 * \param[in] DAG The current SelectionDAG
1986 * \return true iff this shuffle mask
1987 */
1988bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1989 unsigned ShuffleKind, SelectionDAG &DAG) {
1990 if (DAG.getDataLayout().isLittleEndian()) {
1991 unsigned indexOffset = CheckEven ? 4 : 0;
1992 if (ShuffleKind == 1) // Unary
1993 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 0);
1994 else if (ShuffleKind == 2) // swapped
1995 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 16);
1996 else
1997 return false;
1998 }
1999 else {
2000 unsigned indexOffset = CheckEven ? 0 : 4;
2001 if (ShuffleKind == 1) // Unary
2002 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 0);
2003 else if (ShuffleKind == 0) // Normal
2004 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 16);
2005 else
2006 return false;
2007 }
2008 return false;
2009}
2010
2011/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2012/// amount, otherwise return -1.
2013/// The ShuffleKind distinguishes between big-endian operations with two
2014/// different inputs (0), either-endian operations with two identical inputs
2015/// (1), and little-endian operations with two different inputs (2). For the
2016/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2017int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2018 SelectionDAG &DAG) {
2019 if (N->getValueType(ResNo: 0) != MVT::v16i8)
2020 return -1;
2021
2022 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2023
2024 // Find the first non-undef value in the shuffle mask.
2025 unsigned i;
2026 for (i = 0; i != 16 && SVOp->getMaskElt(Idx: i) < 0; ++i)
2027 /*search*/;
2028
2029 if (i == 16) return -1; // all undef.
2030
2031 // Otherwise, check to see if the rest of the elements are consecutively
2032 // numbered from this value.
2033 unsigned ShiftAmt = SVOp->getMaskElt(Idx: i);
2034 if (ShiftAmt < i) return -1;
2035
2036 ShiftAmt -= i;
2037 bool isLE = DAG.getDataLayout().isLittleEndian();
2038
2039 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2040 // Check the rest of the elements to see if they are consecutive.
2041 for (++i; i != 16; ++i)
2042 if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: ShiftAmt+i))
2043 return -1;
2044 } else if (ShuffleKind == 1) {
2045 // Check the rest of the elements to see if they are consecutive.
2046 for (++i; i != 16; ++i)
2047 if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: (ShiftAmt+i) & 15))
2048 return -1;
2049 } else
2050 return -1;
2051
2052 if (isLE)
2053 ShiftAmt = 16 - ShiftAmt;
2054
2055 return ShiftAmt;
2056}
2057
2058/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2059/// specifies a splat of a single element that is suitable for input to
2060/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2061bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2062 EVT VT = N->getValueType(ResNo: 0);
2063 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2064 return EltSize == 8 && N->getMaskElt(Idx: 0) == N->getMaskElt(Idx: 1);
2065
2066 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2067 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2068
2069 // The consecutive indices need to specify an element, not part of two
2070 // different elements. So abandon ship early if this isn't the case.
2071 if (N->getMaskElt(Idx: 0) % EltSize != 0)
2072 return false;
2073
2074 // This is a splat operation if each element of the permute is the same, and
2075 // if the value doesn't reference the second vector.
2076 unsigned ElementBase = N->getMaskElt(Idx: 0);
2077
2078 // FIXME: Handle UNDEF elements too!
2079 if (ElementBase >= 16)
2080 return false;
2081
2082 // Check that the indices are consecutive, in the case of a multi-byte element
2083 // splatted with a v16i8 mask.
2084 for (unsigned i = 1; i != EltSize; ++i)
2085 if (N->getMaskElt(Idx: i) < 0 || N->getMaskElt(Idx: i) != (int)(i+ElementBase))
2086 return false;
2087
2088 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2089 // An UNDEF element is a sequence of UNDEF bytes.
2090 if (N->getMaskElt(Idx: i) < 0) {
2091 for (unsigned j = 1; j != EltSize; ++j)
2092 if (N->getMaskElt(Idx: i + j) >= 0)
2093 return false;
2094 } else
2095 for (unsigned j = 0; j != EltSize; ++j)
2096 if (N->getMaskElt(Idx: i + j) != N->getMaskElt(Idx: j))
2097 return false;
2098 }
2099 return true;
2100}
2101
2102/// Check that the mask is shuffling N byte elements. Within each N byte
2103/// element of the mask, the indices could be either in increasing or
2104/// decreasing order as long as they are consecutive.
2105/// \param[in] N the shuffle vector SD Node to analyze
2106/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2107/// Word/DoubleWord/QuadWord).
2108/// \param[in] StepLen the delta indices number among the N byte element, if
2109/// the mask is in increasing/decreasing order then it is 1/-1.
2110/// \return true iff the mask is shuffling N byte elements.
2111static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2112 int StepLen) {
2113 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2114 "Unexpected element width.");
2115 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2116
2117 unsigned NumOfElem = 16 / Width;
2118 unsigned MaskVal[16]; // Width is never greater than 16
2119 for (unsigned i = 0; i < NumOfElem; ++i) {
2120 MaskVal[0] = N->getMaskElt(Idx: i * Width);
2121 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2122 return false;
2123 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2124 return false;
2125 }
2126
2127 for (unsigned int j = 1; j < Width; ++j) {
2128 MaskVal[j] = N->getMaskElt(Idx: i * Width + j);
2129 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2130 return false;
2131 }
2132 }
2133 }
2134
2135 return true;
2136}
2137
2138bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2139 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2140 if (!isNByteElemShuffleMask(N, Width: 4, StepLen: 1))
2141 return false;
2142
2143 // Now we look at mask elements 0,4,8,12
2144 unsigned M0 = N->getMaskElt(Idx: 0) / 4;
2145 unsigned M1 = N->getMaskElt(Idx: 4) / 4;
2146 unsigned M2 = N->getMaskElt(Idx: 8) / 4;
2147 unsigned M3 = N->getMaskElt(Idx: 12) / 4;
2148 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2149 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2150
2151 // Below, let H and L be arbitrary elements of the shuffle mask
2152 // where H is in the range [4,7] and L is in the range [0,3].
2153 // H, 1, 2, 3 or L, 5, 6, 7
2154 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2155 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2156 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2157 InsertAtByte = IsLE ? 12 : 0;
2158 Swap = M0 < 4;
2159 return true;
2160 }
2161 // 0, H, 2, 3 or 4, L, 6, 7
2162 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2163 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2164 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2165 InsertAtByte = IsLE ? 8 : 4;
2166 Swap = M1 < 4;
2167 return true;
2168 }
2169 // 0, 1, H, 3 or 4, 5, L, 7
2170 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2171 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2172 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2173 InsertAtByte = IsLE ? 4 : 8;
2174 Swap = M2 < 4;
2175 return true;
2176 }
2177 // 0, 1, 2, H or 4, 5, 6, L
2178 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2179 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2180 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2181 InsertAtByte = IsLE ? 0 : 12;
2182 Swap = M3 < 4;
2183 return true;
2184 }
2185
2186 // If both vector operands for the shuffle are the same vector, the mask will
2187 // contain only elements from the first one and the second one will be undef.
2188 if (N->getOperand(Num: 1).isUndef()) {
2189 ShiftElts = 0;
2190 Swap = true;
2191 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2192 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2193 InsertAtByte = IsLE ? 12 : 0;
2194 return true;
2195 }
2196 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2197 InsertAtByte = IsLE ? 8 : 4;
2198 return true;
2199 }
2200 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2201 InsertAtByte = IsLE ? 4 : 8;
2202 return true;
2203 }
2204 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2205 InsertAtByte = IsLE ? 0 : 12;
2206 return true;
2207 }
2208 }
2209
2210 return false;
2211}
2212
2213bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2214 bool &Swap, bool IsLE) {
2215 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2216 // Ensure each byte index of the word is consecutive.
2217 if (!isNByteElemShuffleMask(N, Width: 4, StepLen: 1))
2218 return false;
2219
2220 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2221 unsigned M0 = N->getMaskElt(Idx: 0) / 4;
2222 unsigned M1 = N->getMaskElt(Idx: 4) / 4;
2223 unsigned M2 = N->getMaskElt(Idx: 8) / 4;
2224 unsigned M3 = N->getMaskElt(Idx: 12) / 4;
2225
2226 // If both vector operands for the shuffle are the same vector, the mask will
2227 // contain only elements from the first one and the second one will be undef.
2228 if (N->getOperand(Num: 1).isUndef()) {
2229 assert(M0 < 4 && "Indexing into an undef vector?");
2230 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2231 return false;
2232
2233 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2234 Swap = false;
2235 return true;
2236 }
2237
2238 // Ensure each word index of the ShuffleVector Mask is consecutive.
2239 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2240 return false;
2241
2242 if (IsLE) {
2243 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2244 // Input vectors don't need to be swapped if the leading element
2245 // of the result is one of the 3 left elements of the second vector
2246 // (or if there is no shift to be done at all).
2247 Swap = false;
2248 ShiftElts = (8 - M0) % 8;
2249 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2250 // Input vectors need to be swapped if the leading element
2251 // of the result is one of the 3 left elements of the first vector
2252 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2253 Swap = true;
2254 ShiftElts = (4 - M0) % 4;
2255 }
2256
2257 return true;
2258 } else { // BE
2259 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2260 // Input vectors don't need to be swapped if the leading element
2261 // of the result is one of the 4 elements of the first vector.
2262 Swap = false;
2263 ShiftElts = M0;
2264 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2265 // Input vectors need to be swapped if the leading element
2266 // of the result is one of the 4 elements of the right vector.
2267 Swap = true;
2268 ShiftElts = M0 - 4;
2269 }
2270
2271 return true;
2272 }
2273}
2274
2275bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2276 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2277
2278 if (!isNByteElemShuffleMask(N, Width, StepLen: -1))
2279 return false;
2280
2281 for (int i = 0; i < 16; i += Width)
2282 if (N->getMaskElt(Idx: i) != i + Width - 1)
2283 return false;
2284
2285 return true;
2286}
2287
2288bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2289 return isXXBRShuffleMaskHelper(N, Width: 2);
2290}
2291
2292bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2293 return isXXBRShuffleMaskHelper(N, Width: 4);
2294}
2295
2296bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2297 return isXXBRShuffleMaskHelper(N, Width: 8);
2298}
2299
2300bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2301 return isXXBRShuffleMaskHelper(N, Width: 16);
2302}
2303
2304/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2305/// if the inputs to the instruction should be swapped and set \p DM to the
2306/// value for the immediate.
2307/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2308/// AND element 0 of the result comes from the first input (LE) or second input
2309/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2310/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2311/// mask.
2312bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2313 bool &Swap, bool IsLE) {
2314 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2315
2316 // Ensure each byte index of the double word is consecutive.
2317 if (!isNByteElemShuffleMask(N, Width: 8, StepLen: 1))
2318 return false;
2319
2320 unsigned M0 = N->getMaskElt(Idx: 0) / 8;
2321 unsigned M1 = N->getMaskElt(Idx: 8) / 8;
2322 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2323
2324 // If both vector operands for the shuffle are the same vector, the mask will
2325 // contain only elements from the first one and the second one will be undef.
2326 if (N->getOperand(Num: 1).isUndef()) {
2327 if ((M0 | M1) < 2) {
2328 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2329 Swap = false;
2330 return true;
2331 } else
2332 return false;
2333 }
2334
2335 if (IsLE) {
2336 if (M0 > 1 && M1 < 2) {
2337 Swap = false;
2338 } else if (M0 < 2 && M1 > 1) {
2339 M0 = (M0 + 2) % 4;
2340 M1 = (M1 + 2) % 4;
2341 Swap = true;
2342 } else
2343 return false;
2344
2345 // Note: if control flow comes here that means Swap is already set above
2346 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2347 return true;
2348 } else { // BE
2349 if (M0 < 2 && M1 > 1) {
2350 Swap = false;
2351 } else if (M0 > 1 && M1 < 2) {
2352 M0 = (M0 + 2) % 4;
2353 M1 = (M1 + 2) % 4;
2354 Swap = true;
2355 } else
2356 return false;
2357
2358 // Note: if control flow comes here that means Swap is already set above
2359 DM = (M0 << 1) + (M1 & 1);
2360 return true;
2361 }
2362}
2363
2364
2365/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2366/// appropriate for PPC mnemonics (which have a big endian bias - namely
2367/// elements are counted from the left of the vector register).
2368unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2369 SelectionDAG &DAG) {
2370 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2371 assert(isSplatShuffleMask(SVOp, EltSize));
2372 EVT VT = SVOp->getValueType(ResNo: 0);
2373
2374 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2375 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(Idx: 0)
2376 : SVOp->getMaskElt(Idx: 0);
2377
2378 if (DAG.getDataLayout().isLittleEndian())
2379 return (16 / EltSize) - 1 - (SVOp->getMaskElt(Idx: 0) / EltSize);
2380 else
2381 return SVOp->getMaskElt(Idx: 0) / EltSize;
2382}
2383
2384/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2385/// by using a vspltis[bhw] instruction of the specified element size, return
2386/// the constant being splatted. The ByteSize field indicates the number of
2387/// bytes of each element [124] -> [bhw].
2388SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2389 SDValue OpVal;
2390
2391 // If ByteSize of the splat is bigger than the element size of the
2392 // build_vector, then we have a case where we are checking for a splat where
2393 // multiple elements of the buildvector are folded together into a single
2394 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2395 unsigned EltSize = 16/N->getNumOperands();
2396 if (EltSize < ByteSize) {
2397 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2398 SDValue UniquedVals[4];
2399 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2400
2401 // See if all of the elements in the buildvector agree across.
2402 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2403 if (N->getOperand(Num: i).isUndef()) continue;
2404 // If the element isn't a constant, bail fully out.
2405 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: i))) return SDValue();
2406
2407 if (!UniquedVals[i&(Multiple-1)].getNode())
2408 UniquedVals[i&(Multiple-1)] = N->getOperand(Num: i);
2409 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(Num: i))
2410 return SDValue(); // no match.
2411 }
2412
2413 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2414 // either constant or undef values that are identical for each chunk. See
2415 // if these chunks can form into a larger vspltis*.
2416
2417 // Check to see if all of the leading entries are either 0 or -1. If
2418 // neither, then this won't fit into the immediate field.
2419 bool LeadingZero = true;
2420 bool LeadingOnes = true;
2421 for (unsigned i = 0; i != Multiple-1; ++i) {
2422 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2423
2424 LeadingZero &= isNullConstant(V: UniquedVals[i]);
2425 LeadingOnes &= isAllOnesConstant(V: UniquedVals[i]);
2426 }
2427 // Finally, check the least significant entry.
2428 if (LeadingZero) {
2429 if (!UniquedVals[Multiple-1].getNode())
2430 return DAG.getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32); // 0,0,0,undef
2431 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2432 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2433 return DAG.getTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
2434 }
2435 if (LeadingOnes) {
2436 if (!UniquedVals[Multiple-1].getNode())
2437 return DAG.getTargetConstant(Val: ~0U, DL: SDLoc(N), VT: MVT::i32); // -1,-1,-1,undef
2438 int Val =cast<ConstantSDNode>(Val&: UniquedVals[Multiple-1])->getSExtValue();
2439 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2440 return DAG.getTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
2441 }
2442
2443 return SDValue();
2444 }
2445
2446 // Check to see if this buildvec has a single non-undef value in its elements.
2447 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2448 if (N->getOperand(Num: i).isUndef()) continue;
2449 if (!OpVal.getNode())
2450 OpVal = N->getOperand(Num: i);
2451 else if (OpVal != N->getOperand(Num: i))
2452 return SDValue();
2453 }
2454
2455 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2456
2457 unsigned ValSizeInBytes = EltSize;
2458 uint64_t Value = 0;
2459 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) {
2460 Value = CN->getZExtValue();
2461 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) {
2462 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2463 Value = llvm::bit_cast<uint32_t>(from: CN->getValueAPF().convertToFloat());
2464 }
2465
2466 // If the splat value is larger than the element value, then we can never do
2467 // this splat. The only case that we could fit the replicated bits into our
2468 // immediate field for would be zero, and we prefer to use vxor for it.
2469 if (ValSizeInBytes < ByteSize) return SDValue();
2470
2471 // If the element value is larger than the splat value, check if it consists
2472 // of a repeated bit pattern of size ByteSize.
2473 if (!APInt(ValSizeInBytes * 8, Value).isSplat(SplatSizeInBits: ByteSize * 8))
2474 return SDValue();
2475
2476 // Properly sign extend the value.
2477 int MaskVal = SignExtend32(X: Value, B: ByteSize * 8);
2478
2479 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2480 if (MaskVal == 0) return SDValue();
2481
2482 // Finally, if this value fits in a 5 bit sext field, return it
2483 if (SignExtend32<5>(X: MaskVal) == MaskVal)
2484 return DAG.getSignedTargetConstant(Val: MaskVal, DL: SDLoc(N), VT: MVT::i32);
2485 return SDValue();
2486}
2487
2488//===----------------------------------------------------------------------===//
2489// Addressing Mode Selection
2490//===----------------------------------------------------------------------===//
2491
2492/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2493/// or 64-bit immediate, and if the value can be accurately represented as a
2494/// sign extension from a 16-bit value. If so, this returns true and the
2495/// immediate.
2496bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2497 if (!isa<ConstantSDNode>(Val: N))
2498 return false;
2499
2500 Imm = (int16_t)N->getAsZExtVal();
2501 if (N->getValueType(ResNo: 0) == MVT::i32)
2502 return Imm == (int32_t)N->getAsZExtVal();
2503 else
2504 return Imm == (int64_t)N->getAsZExtVal();
2505}
2506bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2507 return isIntS16Immediate(N: Op.getNode(), Imm);
2508}
2509
2510/// Used when computing address flags for selecting loads and stores.
2511/// If we have an OR, check if the LHS and RHS are provably disjoint.
2512/// An OR of two provably disjoint values is equivalent to an ADD.
2513/// Most PPC load/store instructions compute the effective address as a sum,
2514/// so doing this conversion is useful.
2515static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2516 if (N.getOpcode() != ISD::OR)
2517 return false;
2518 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2519 if (!LHSKnown.Zero.getBoolValue())
2520 return false;
2521 KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 1));
2522 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2523}
2524
2525/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2526/// be represented as an indexed [r+r] operation.
2527bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2528 SDValue &Index,
2529 SelectionDAG &DAG) const {
2530 for (SDNode *U : N->users()) {
2531 if (MemSDNode *Memop = dyn_cast<MemSDNode>(Val: U)) {
2532 if (Memop->getMemoryVT() == MVT::f64) {
2533 Base = N.getOperand(i: 0);
2534 Index = N.getOperand(i: 1);
2535 return true;
2536 }
2537 }
2538 }
2539 return false;
2540}
2541
2542/// isIntS34Immediate - This method tests if value of node given can be
2543/// accurately represented as a sign extension from a 34-bit value. If so,
2544/// this returns true and the immediate.
2545bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2546 if (!isa<ConstantSDNode>(Val: N))
2547 return false;
2548
2549 Imm = cast<ConstantSDNode>(Val: N)->getSExtValue();
2550 return isInt<34>(x: Imm);
2551}
2552bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2553 return isIntS34Immediate(N: Op.getNode(), Imm);
2554}
2555
2556/// SelectAddressRegReg - Given the specified addressed, check to see if it
2557/// can be represented as an indexed [r+r] operation. Returns false if it
2558/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2559/// non-zero and N can be represented by a base register plus a signed 16-bit
2560/// displacement, make a more precise judgement by checking (displacement % \p
2561/// EncodingAlignment).
2562bool PPCTargetLowering::SelectAddressRegReg(
2563 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2564 MaybeAlign EncodingAlignment) const {
2565 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2566 // a [pc+imm].
2567 if (SelectAddressPCRel(N, Base))
2568 return false;
2569
2570 int16_t Imm = 0;
2571 if (N.getOpcode() == ISD::ADD) {
2572 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2573 // SPE load/store can only handle 8-bit offsets.
2574 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2575 return true;
2576 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm) &&
2577 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2578 return false; // r+i
2579 if (N.getOperand(i: 1).getOpcode() == PPCISD::Lo)
2580 return false; // r+i
2581
2582 Base = N.getOperand(i: 0);
2583 Index = N.getOperand(i: 1);
2584 return true;
2585 } else if (N.getOpcode() == ISD::OR) {
2586 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm) &&
2587 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2588 return false; // r+i can fold it if we can.
2589
2590 // If this is an or of disjoint bitfields, we can codegen this as an add
2591 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2592 // disjoint.
2593 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2594
2595 if (LHSKnown.Zero.getBoolValue()) {
2596 KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 1));
2597 // If all of the bits are known zero on the LHS or RHS, the add won't
2598 // carry.
2599 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2600 Base = N.getOperand(i: 0);
2601 Index = N.getOperand(i: 1);
2602 return true;
2603 }
2604 }
2605 }
2606
2607 return false;
2608}
2609
2610// If we happen to be doing an i64 load or store into a stack slot that has
2611// less than a 4-byte alignment, then the frame-index elimination may need to
2612// use an indexed load or store instruction (because the offset may not be a
2613// multiple of 4). The extra register needed to hold the offset comes from the
2614// register scavenger, and it is possible that the scavenger will need to use
2615// an emergency spill slot. As a result, we need to make sure that a spill slot
2616// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2617// stack slot.
2618static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2619 // FIXME: This does not handle the LWA case.
2620 if (VT != MVT::i64)
2621 return;
2622
2623 // NOTE: We'll exclude negative FIs here, which come from argument
2624 // lowering, because there are no known test cases triggering this problem
2625 // using packed structures (or similar). We can remove this exclusion if
2626 // we find such a test case. The reason why this is so test-case driven is
2627 // because this entire 'fixup' is only to prevent crashes (from the
2628 // register scavenger) on not-really-valid inputs. For example, if we have:
2629 // %a = alloca i1
2630 // %b = bitcast i1* %a to i64*
2631 // store i64* a, i64 b
2632 // then the store should really be marked as 'align 1', but is not. If it
2633 // were marked as 'align 1' then the indexed form would have been
2634 // instruction-selected initially, and the problem this 'fixup' is preventing
2635 // won't happen regardless.
2636 if (FrameIdx < 0)
2637 return;
2638
2639 MachineFunction &MF = DAG.getMachineFunction();
2640 MachineFrameInfo &MFI = MF.getFrameInfo();
2641
2642 if (MFI.getObjectAlign(ObjectIdx: FrameIdx) >= Align(4))
2643 return;
2644
2645 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2646 FuncInfo->setHasNonRISpills();
2647}
2648
2649/// Returns true if the address N can be represented by a base register plus
2650/// a signed 16-bit displacement [r+imm], and if it is not better
2651/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2652/// displacements that are multiples of that value.
2653bool PPCTargetLowering::SelectAddressRegImm(
2654 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2655 MaybeAlign EncodingAlignment) const {
2656 // FIXME dl should come from parent load or store, not from address
2657 SDLoc dl(N);
2658
2659 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2660 // a [pc+imm].
2661 if (SelectAddressPCRel(N, Base))
2662 return false;
2663
2664 // If this can be more profitably realized as r+r, fail.
2665 if (SelectAddressRegReg(N, Base&: Disp, Index&: Base, DAG, EncodingAlignment))
2666 return false;
2667
2668 if (N.getOpcode() == ISD::ADD) {
2669 int16_t imm = 0;
2670 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) &&
2671 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2672 Disp = DAG.getSignedTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2673 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0))) {
2674 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2675 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2676 } else {
2677 Base = N.getOperand(i: 0);
2678 }
2679 return true; // [r+i]
2680 } else if (N.getOperand(i: 1).getOpcode() == PPCISD::Lo) {
2681 // Match LOAD (ADD (X, Lo(G))).
2682 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2683 "Cannot handle constant offsets yet!");
2684 Disp = N.getOperand(i: 1).getOperand(i: 0); // The global address.
2685 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2686 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2687 Disp.getOpcode() == ISD::TargetConstantPool ||
2688 Disp.getOpcode() == ISD::TargetJumpTable);
2689 Base = N.getOperand(i: 0);
2690 return true; // [&g+r]
2691 }
2692 } else if (N.getOpcode() == ISD::OR) {
2693 int16_t imm = 0;
2694 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) &&
2695 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2696 // If this is an or of disjoint bitfields, we can codegen this as an add
2697 // (for better address arithmetic) if the LHS and RHS of the OR are
2698 // provably disjoint.
2699 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2700
2701 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2702 // If all of the bits are known zero on the LHS or RHS, the add won't
2703 // carry.
2704 if (FrameIndexSDNode *FI =
2705 dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0))) {
2706 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2707 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2708 } else {
2709 Base = N.getOperand(i: 0);
2710 }
2711 Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2712 return true;
2713 }
2714 }
2715 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
2716 // Loading from a constant address.
2717
2718 // If this address fits entirely in a 16-bit sext immediate field, codegen
2719 // this as "d, 0"
2720 int16_t Imm;
2721 if (isIntS16Immediate(N: CN, Imm) &&
2722 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm))) {
2723 Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: CN->getValueType(ResNo: 0));
2724 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2725 VT: CN->getValueType(ResNo: 0));
2726 return true;
2727 }
2728
2729 // Handle 32-bit sext immediates with LIS + addr mode.
2730 if ((CN->getValueType(ResNo: 0) == MVT::i32 ||
2731 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2732 (!EncodingAlignment ||
2733 isAligned(Lhs: *EncodingAlignment, SizeInBytes: CN->getZExtValue()))) {
2734 int Addr = (int)CN->getZExtValue();
2735
2736 // Otherwise, break this down into an LIS + disp.
2737 Disp = DAG.getTargetConstant(Val: (short)Addr, DL: dl, VT: MVT::i32);
2738
2739 Base = DAG.getTargetConstant(Val: (Addr - (signed short)Addr) >> 16, DL: dl,
2740 VT: MVT::i32);
2741 unsigned Opc = CN->getValueType(ResNo: 0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2742 Base = SDValue(DAG.getMachineNode(Opcode: Opc, dl, VT: CN->getValueType(ResNo: 0), Op1: Base), 0);
2743 return true;
2744 }
2745 }
2746
2747 Disp = DAG.getTargetConstant(Val: 0, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()));
2748 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2749 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2750 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2751 } else
2752 Base = N;
2753 return true; // [r+0]
2754}
2755
2756/// Similar to the 16-bit case but for instructions that take a 34-bit
2757/// displacement field (prefixed loads/stores).
2758bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2759 SDValue &Base,
2760 SelectionDAG &DAG) const {
2761 // Only on 64-bit targets.
2762 if (N.getValueType() != MVT::i64)
2763 return false;
2764
2765 SDLoc dl(N);
2766 int64_t Imm = 0;
2767
2768 if (N.getOpcode() == ISD::ADD) {
2769 if (!isIntS34Immediate(Op: N.getOperand(i: 1), Imm))
2770 return false;
2771 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2772 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
2773 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2774 else
2775 Base = N.getOperand(i: 0);
2776 return true;
2777 }
2778
2779 if (N.getOpcode() == ISD::OR) {
2780 if (!isIntS34Immediate(Op: N.getOperand(i: 1), Imm))
2781 return false;
2782 // If this is an or of disjoint bitfields, we can codegen this as an add
2783 // (for better address arithmetic) if the LHS and RHS of the OR are
2784 // provably disjoint.
2785 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2786 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2787 return false;
2788 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
2789 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2790 else
2791 Base = N.getOperand(i: 0);
2792 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2793 return true;
2794 }
2795
2796 if (isIntS34Immediate(Op: N, Imm)) { // If the address is a 34-bit const.
2797 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2798 Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
2799 return true;
2800 }
2801
2802 return false;
2803}
2804
2805/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2806/// represented as an indexed [r+r] operation.
2807bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2808 SDValue &Index,
2809 SelectionDAG &DAG) const {
2810 // Check to see if we can easily represent this as an [r+r] address. This
2811 // will fail if it thinks that the address is more profitably represented as
2812 // reg+imm, e.g. where imm = 0.
2813 if (SelectAddressRegReg(N, Base, Index, DAG))
2814 return true;
2815
2816 // If the address is the result of an add, we will utilize the fact that the
2817 // address calculation includes an implicit add. However, we can reduce
2818 // register pressure if we do not materialize a constant just for use as the
2819 // index register. We only get rid of the add if it is not an add of a
2820 // value and a 16-bit signed constant and both have a single use.
2821 int16_t imm = 0;
2822 if (N.getOpcode() == ISD::ADD &&
2823 (!isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) ||
2824 !N.getOperand(i: 1).hasOneUse() || !N.getOperand(i: 0).hasOneUse())) {
2825 Base = N.getOperand(i: 0);
2826 Index = N.getOperand(i: 1);
2827 return true;
2828 }
2829
2830 // Otherwise, do it the hard way, using R0 as the base register.
2831 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2832 VT: N.getValueType());
2833 Index = N;
2834 return true;
2835}
2836
2837template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2838 Ty *PCRelCand = dyn_cast<Ty>(N);
2839 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(TF: PCRelCand->getTargetFlags()));
2840}
2841
2842/// Returns true if this address is a PC Relative address.
2843/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2844/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2845bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2846 // This is a materialize PC Relative node. Always select this as PC Relative.
2847 Base = N;
2848 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2849 return true;
2850 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2851 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2852 isValidPCRelNode<JumpTableSDNode>(N) ||
2853 isValidPCRelNode<BlockAddressSDNode>(N))
2854 return true;
2855 return false;
2856}
2857
2858/// Returns true if we should use a direct load into vector instruction
2859/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2860static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2861
2862 // If there are any other uses other than scalar to vector, then we should
2863 // keep it as a scalar load -> direct move pattern to prevent multiple
2864 // loads.
2865 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N);
2866 if (!LD)
2867 return false;
2868
2869 EVT MemVT = LD->getMemoryVT();
2870 if (!MemVT.isSimple())
2871 return false;
2872 switch(MemVT.getSimpleVT().SimpleTy) {
2873 case MVT::i64:
2874 break;
2875 case MVT::i32:
2876 if (!ST.hasP8Vector())
2877 return false;
2878 break;
2879 case MVT::i16:
2880 case MVT::i8:
2881 if (!ST.hasP9Vector())
2882 return false;
2883 break;
2884 default:
2885 return false;
2886 }
2887
2888 SDValue LoadedVal(N, 0);
2889 if (!LoadedVal.hasOneUse())
2890 return false;
2891
2892 for (SDUse &Use : LD->uses())
2893 if (Use.getResNo() == 0 &&
2894 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2895 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2896 return false;
2897
2898 return true;
2899}
2900
2901/// getPreIndexedAddressParts - returns true by value, base pointer and
2902/// offset pointer and addressing mode by reference if the node's address
2903/// can be legally represented as pre-indexed load / store address.
2904bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2905 SDValue &Offset,
2906 ISD::MemIndexedMode &AM,
2907 SelectionDAG &DAG) const {
2908 if (DisablePPCPreinc) return false;
2909
2910 bool isLoad = true;
2911 SDValue Ptr;
2912 EVT VT;
2913 Align Alignment;
2914 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
2915 Ptr = LD->getBasePtr();
2916 VT = LD->getMemoryVT();
2917 Alignment = LD->getAlign();
2918 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
2919 Ptr = ST->getBasePtr();
2920 VT = ST->getMemoryVT();
2921 Alignment = ST->getAlign();
2922 isLoad = false;
2923 } else
2924 return false;
2925
2926 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2927 // instructions because we can fold these into a more efficient instruction
2928 // instead, (such as LXSD).
2929 if (isLoad && usePartialVectorLoads(N, ST: Subtarget)) {
2930 return false;
2931 }
2932
2933 // PowerPC doesn't have preinc load/store instructions for vectors
2934 if (VT.isVector())
2935 return false;
2936
2937 if (SelectAddressRegReg(N: Ptr, Base, Index&: Offset, DAG)) {
2938 // Common code will reject creating a pre-inc form if the base pointer
2939 // is a frame index, or if N is a store and the base pointer is either
2940 // the same as or a predecessor of the value being stored. Check for
2941 // those situations here, and try with swapped Base/Offset instead.
2942 bool Swap = false;
2943
2944 if (isa<FrameIndexSDNode>(Val: Base) || isa<RegisterSDNode>(Val: Base))
2945 Swap = true;
2946 else if (!isLoad) {
2947 SDValue Val = cast<StoreSDNode>(Val: N)->getValue();
2948 if (Val == Base || Base.getNode()->isPredecessorOf(N: Val.getNode()))
2949 Swap = true;
2950 }
2951
2952 if (Swap)
2953 std::swap(a&: Base, b&: Offset);
2954
2955 AM = ISD::PRE_INC;
2956 return true;
2957 }
2958
2959 // LDU/STU can only handle immediates that are a multiple of 4.
2960 if (VT != MVT::i64) {
2961 if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: std::nullopt))
2962 return false;
2963 } else {
2964 // LDU/STU need an address with at least 4-byte alignment.
2965 if (Alignment < Align(4))
2966 return false;
2967
2968 if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: Align(4)))
2969 return false;
2970 }
2971
2972 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
2973 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2974 // sext i32 to i64 when addr mode is r+i.
2975 if (LD->getValueType(ResNo: 0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2976 LD->getExtensionType() == ISD::SEXTLOAD &&
2977 isa<ConstantSDNode>(Val: Offset))
2978 return false;
2979 }
2980
2981 AM = ISD::PRE_INC;
2982 return true;
2983}
2984
2985//===----------------------------------------------------------------------===//
2986// LowerOperation implementation
2987//===----------------------------------------------------------------------===//
2988
2989/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2990/// and LoOpFlags to the target MO flags.
2991static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2992 unsigned &HiOpFlags, unsigned &LoOpFlags,
2993 const GlobalValue *GV = nullptr) {
2994 HiOpFlags = PPCII::MO_HA;
2995 LoOpFlags = PPCII::MO_LO;
2996
2997 // Don't use the pic base if not in PIC relocation model.
2998 if (IsPIC) {
2999 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3000 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3001 }
3002}
3003
3004static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3005 SelectionDAG &DAG) {
3006 SDLoc DL(HiPart);
3007 EVT PtrVT = HiPart.getValueType();
3008 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: PtrVT);
3009
3010 SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL, VT: PtrVT, N1: HiPart, N2: Zero);
3011 SDValue Lo = DAG.getNode(Opcode: PPCISD::Lo, DL, VT: PtrVT, N1: LoPart, N2: Zero);
3012
3013 // With PIC, the first instruction is actually "GR+hi(&G)".
3014 if (isPIC)
3015 Hi = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
3016 N1: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL, VT: PtrVT), N2: Hi);
3017
3018 // Generate non-pic code that has direct accesses to the constant pool.
3019 // The address of the global is just (hi(&g)+lo(&g)).
3020 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Hi, N2: Lo);
3021}
3022
3023static void setUsesTOCBasePtr(MachineFunction &MF) {
3024 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3025 FuncInfo->setUsesTOCBasePtr();
3026}
3027
3028static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3029 setUsesTOCBasePtr(DAG.getMachineFunction());
3030}
3031
3032SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3033 SDValue GA) const {
3034 EVT VT = Subtarget.getScalarIntVT();
3035 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(Reg: PPC::X2, VT)
3036 : Subtarget.isAIXABI()
3037 ? DAG.getRegister(Reg: PPC::R2, VT)
3038 : DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT);
3039 SDValue Ops[] = { GA, Reg };
3040 return DAG.getMemIntrinsicNode(
3041 Opcode: PPCISD::TOC_ENTRY, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops, MemVT: VT,
3042 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()), Alignment: std::nullopt,
3043 Flags: MachineMemOperand::MOLoad);
3044}
3045
3046SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3047 SelectionDAG &DAG) const {
3048 EVT PtrVT = Op.getValueType();
3049 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val&: Op);
3050 const Constant *C = CP->getConstVal();
3051
3052 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3053 // The actual address of the GlobalValue is stored in the TOC.
3054 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3055 if (Subtarget.isUsingPCRelativeCalls()) {
3056 SDLoc DL(CP);
3057 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3058 SDValue ConstPool = DAG.getTargetConstantPool(
3059 C, VT: Ty, Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: PPCII::MO_PCREL_FLAG);
3060 return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: ConstPool);
3061 }
3062 setUsesTOCBasePtr(DAG);
3063 SDValue GA = DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0);
3064 return getTOCEntry(DAG, dl: SDLoc(CP), GA);
3065 }
3066
3067 unsigned MOHiFlag, MOLoFlag;
3068 bool IsPIC = isPositionIndependent();
3069 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3070
3071 if (IsPIC && Subtarget.isSVR4ABI()) {
3072 SDValue GA =
3073 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: PPCII::MO_PIC_FLAG);
3074 return getTOCEntry(DAG, dl: SDLoc(CP), GA);
3075 }
3076
3077 SDValue CPIHi =
3078 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0, TargetFlags: MOHiFlag);
3079 SDValue CPILo =
3080 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0, TargetFlags: MOLoFlag);
3081 return LowerLabelRef(HiPart: CPIHi, LoPart: CPILo, isPIC: IsPIC, DAG);
3082}
3083
3084// For 64-bit PowerPC, prefer the more compact relative encodings.
3085// This trades 32 bits per jump table entry for one or two instructions
3086// on the jump site.
3087unsigned PPCTargetLowering::getJumpTableEncoding() const {
3088 if (isJumpTableRelative())
3089 return MachineJumpTableInfo::EK_LabelDifference32;
3090
3091 return TargetLowering::getJumpTableEncoding();
3092}
3093
3094bool PPCTargetLowering::isJumpTableRelative() const {
3095 if (UseAbsoluteJumpTables)
3096 return false;
3097 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3098 return true;
3099 return TargetLowering::isJumpTableRelative();
3100}
3101
3102SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3103 SelectionDAG &DAG) const {
3104 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3105 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3106
3107 switch (getTargetMachine().getCodeModel()) {
3108 case CodeModel::Small:
3109 case CodeModel::Medium:
3110 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3111 default:
3112 return DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: SDLoc(),
3113 VT: getPointerTy(DL: DAG.getDataLayout()));
3114 }
3115}
3116
3117const MCExpr *
3118PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3119 unsigned JTI,
3120 MCContext &Ctx) const {
3121 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3122 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3123
3124 switch (getTargetMachine().getCodeModel()) {
3125 case CodeModel::Small:
3126 case CodeModel::Medium:
3127 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3128 default:
3129 return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
3130 }
3131}
3132
3133SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3134 EVT PtrVT = Op.getValueType();
3135 JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
3136
3137 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3138 if (Subtarget.isUsingPCRelativeCalls()) {
3139 SDLoc DL(JT);
3140 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3141 SDValue GA =
3142 DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Ty, TargetFlags: PPCII::MO_PCREL_FLAG);
3143 SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3144 return MatAddr;
3145 }
3146
3147 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3148 // The actual address of the GlobalValue is stored in the TOC.
3149 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3150 setUsesTOCBasePtr(DAG);
3151 SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
3152 return getTOCEntry(DAG, dl: SDLoc(JT), GA);
3153 }
3154
3155 unsigned MOHiFlag, MOLoFlag;
3156 bool IsPIC = isPositionIndependent();
3157 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3158
3159 if (IsPIC && Subtarget.isSVR4ABI()) {
3160 SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT,
3161 TargetFlags: PPCII::MO_PIC_FLAG);
3162 return getTOCEntry(DAG, dl: SDLoc(GA), GA);
3163 }
3164
3165 SDValue JTIHi = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOHiFlag);
3166 SDValue JTILo = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOLoFlag);
3167 return LowerLabelRef(HiPart: JTIHi, LoPart: JTILo, isPIC: IsPIC, DAG);
3168}
3169
3170SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3171 SelectionDAG &DAG) const {
3172 EVT PtrVT = Op.getValueType();
3173 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Val&: Op);
3174 const BlockAddress *BA = BASDN->getBlockAddress();
3175
3176 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3177 if (Subtarget.isUsingPCRelativeCalls()) {
3178 SDLoc DL(BASDN);
3179 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3180 SDValue GA = DAG.getTargetBlockAddress(BA, VT: Ty, Offset: BASDN->getOffset(),
3181 TargetFlags: PPCII::MO_PCREL_FLAG);
3182 SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3183 return MatAddr;
3184 }
3185
3186 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3187 // The actual BlockAddress is stored in the TOC.
3188 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3189 setUsesTOCBasePtr(DAG);
3190 SDValue GA = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset());
3191 return getTOCEntry(DAG, dl: SDLoc(BASDN), GA);
3192 }
3193
3194 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3195 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3196 return getTOCEntry(
3197 DAG, dl: SDLoc(BASDN),
3198 GA: DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset()));
3199
3200 unsigned MOHiFlag, MOLoFlag;
3201 bool IsPIC = isPositionIndependent();
3202 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3203 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: MOHiFlag);
3204 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: MOLoFlag);
3205 return LowerLabelRef(HiPart: TgtBAHi, LoPart: TgtBALo, isPIC: IsPIC, DAG);
3206}
3207
3208SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3209 SelectionDAG &DAG) const {
3210 if (Subtarget.isAIXABI())
3211 return LowerGlobalTLSAddressAIX(Op, DAG);
3212
3213 return LowerGlobalTLSAddressLinux(Op, DAG);
3214}
3215
3216/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3217/// and then apply the update.
3218static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3219 SelectionDAG &DAG,
3220 const TargetMachine &TM) {
3221 // Initialize TLS model opt setting lazily:
3222 // (1) Use initial-exec for single TLS var references within current function.
3223 // (2) Use local-dynamic for multiple TLS var references within current
3224 // function.
3225 PPCFunctionInfo *FuncInfo =
3226 DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3227 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3228 SmallPtrSet<const GlobalValue *, 8> TLSGV;
3229 // Iterate over all instructions within current function, collect all TLS
3230 // global variables (global variables taken as the first parameter to
3231 // Intrinsic::threadlocal_address).
3232 const Function &Func = DAG.getMachineFunction().getFunction();
3233 for (const BasicBlock &BB : Func)
3234 for (const Instruction &I : BB)
3235 if (I.getOpcode() == Instruction::Call)
3236 if (const CallInst *CI = dyn_cast<const CallInst>(Val: &I))
3237 if (Function *CF = CI->getCalledFunction())
3238 if (CF->isDeclaration() &&
3239 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3240 if (const GlobalValue *GV =
3241 dyn_cast<GlobalValue>(Val: I.getOperand(i: 0))) {
3242 TLSModel::Model GVModel = TM.getTLSModel(GV);
3243 if (GVModel == TLSModel::LocalDynamic)
3244 TLSGV.insert(Ptr: GV);
3245 }
3246
3247 unsigned TLSGVCnt = TLSGV.size();
3248 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3249 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3250 FuncInfo->setAIXFuncUseTLSIEForLD();
3251 FuncInfo->setAIXFuncTLSModelOptInitDone();
3252 }
3253
3254 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3255 LLVM_DEBUG(
3256 dbgs() << DAG.getMachineFunction().getName()
3257 << " function is using the TLS-IE model for TLS-LD access.\n");
3258 Model = TLSModel::InitialExec;
3259 }
3260}
3261
3262SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3265
3266 if (DAG.getTarget().useEmulatedTLS())
3267 report_fatal_error(reason: "Emulated TLS is not yet supported on AIX");
3268
3269 SDLoc dl(GA);
3270 const GlobalValue *GV = GA->getGlobal();
3271 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3272 bool Is64Bit = Subtarget.isPPC64();
3273 TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3274
3275 // Apply update to the TLS model.
3276 if (Subtarget.hasAIXShLibTLSModelOpt())
3277 updateForAIXShLibTLSModelOpt(Model, DAG, TM: getTargetMachine());
3278
3279 // TLS variables are accessed through TOC entries.
3280 // To support this, set the DAG to use the TOC base pointer.
3281 setUsesTOCBasePtr(DAG);
3282
3283 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3284
3285 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3286 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3287 bool HasAIXSmallTLSGlobalAttr = false;
3288 SDValue VariableOffsetTGA =
3289 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TPREL_FLAG);
3290 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3291 SDValue TLSReg;
3292
3293 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV))
3294 if (GVar->hasAttribute(Kind: "aix-small-tls"))
3295 HasAIXSmallTLSGlobalAttr = true;
3296
3297 if (Is64Bit) {
3298 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3299 // involves a load of the variable offset (from the TOC), followed by an
3300 // add of the loaded variable offset to R13 (the thread pointer).
3301 // This code sequence looks like:
3302 // ld reg1,var[TC](2)
3303 // add reg2, reg1, r13 // r13 contains the thread pointer
3304 TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3305
3306 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3307 // global variable attribute, produce a faster access sequence for
3308 // local-exec TLS variables where the offset from the TLS base is encoded
3309 // as an immediate operand.
3310 //
3311 // We only utilize the faster local-exec access sequence when the TLS
3312 // variable has a size within the policy limit. We treat types that are
3313 // not sized or are empty as being over the policy size limit.
3314 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3315 IsTLSLocalExecModel) {
3316 Type *GVType = GV->getValueType();
3317 if (GVType->isSized() && !GVType->isEmptyTy() &&
3318 GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3319 AIXSmallTlsPolicySizeLimit)
3320 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA, N2: TLSReg);
3321 }
3322 } else {
3323 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3324 // involves loading the variable offset from the TOC, generating a call to
3325 // .__get_tpointer to get the thread pointer (which will be in R3), and
3326 // adding the two together:
3327 // lwz reg1,var[TC](2)
3328 // bla .__get_tpointer
3329 // add reg2, reg1, r3
3330 TLSReg = DAG.getNode(Opcode: PPCISD::GET_TPOINTER, DL: dl, VT: PtrVT);
3331
3332 // We do not implement the 32-bit version of the faster access sequence
3333 // for local-exec that is controlled by the -maix-small-local-exec-tls
3334 // option, or the "aix-small-tls" global variable attribute.
3335 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3336 report_fatal_error(reason: "The small-local-exec TLS access sequence is "
3337 "currently only supported on AIX (64-bit mode).");
3338 }
3339 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: VariableOffset);
3340 }
3341
3342 if (Model == TLSModel::LocalDynamic) {
3343 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3344
3345 // We do not implement the 32-bit version of the faster access sequence
3346 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3347 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3348 report_fatal_error(reason: "The small-local-dynamic TLS access sequence is "
3349 "currently only supported on AIX (64-bit mode).");
3350
3351 // For local-dynamic on AIX, we need to generate one TOC entry for each
3352 // variable offset, and a single module-handle TOC entry for the entire
3353 // file.
3354
3355 SDValue VariableOffsetTGA =
3356 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSLD_FLAG);
3357 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3358
3359 Module *M = DAG.getMachineFunction().getFunction().getParent();
3360 GlobalVariable *TLSGV =
3361 dyn_cast_or_null<GlobalVariable>(Val: M->getOrInsertGlobal(
3362 Name: StringRef("_$TLSML"), Ty: PointerType::getUnqual(C&: *DAG.getContext())));
3363 TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3364 assert(TLSGV && "Not able to create GV for _$TLSML.");
3365 SDValue ModuleHandleTGA =
3366 DAG.getTargetGlobalAddress(GV: TLSGV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSLDM_FLAG);
3367 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, GA: ModuleHandleTGA);
3368 SDValue ModuleHandle =
3369 DAG.getNode(Opcode: PPCISD::TLSLD_AIX, DL: dl, VT: PtrVT, Operand: ModuleHandleTOC);
3370
3371 // With the -maix-small-local-dynamic-tls option, produce a faster access
3372 // sequence for local-dynamic TLS variables where the offset from the
3373 // module-handle is encoded as an immediate operand.
3374 //
3375 // We only utilize the faster local-dynamic access sequence when the TLS
3376 // variable has a size within the policy limit. We treat types that are
3377 // not sized or are empty as being over the policy size limit.
3378 if (HasAIXSmallLocalDynamicTLS) {
3379 Type *GVType = GV->getValueType();
3380 if (GVType->isSized() && !GVType->isEmptyTy() &&
3381 GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3382 AIXSmallTlsPolicySizeLimit)
3383 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA,
3384 N2: ModuleHandle);
3385 }
3386
3387 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: ModuleHandle, N2: VariableOffset);
3388 }
3389
3390 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3391 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3392 // need to generate two TOC entries, one for the variable offset, one for the
3393 // region handle. The global address for the TOC entry of the region handle is
3394 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3395 // entry of the variable offset is created with MO_TLSGD_FLAG.
3396 SDValue VariableOffsetTGA =
3397 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSGD_FLAG);
3398 SDValue RegionHandleTGA =
3399 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSGDM_FLAG);
3400 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3401 SDValue RegionHandle = getTOCEntry(DAG, dl, GA: RegionHandleTGA);
3402 return DAG.getNode(Opcode: PPCISD::TLSGD_AIX, DL: dl, VT: PtrVT, N1: VariableOffset,
3403 N2: RegionHandle);
3404}
3405
3406SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3407 SelectionDAG &DAG) const {
3408 // FIXME: TLS addresses currently use medium model code sequences,
3409 // which is the most useful form. Eventually support for small and
3410 // large models could be added if users need it, at the cost of
3411 // additional complexity.
3412 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3413 if (DAG.getTarget().useEmulatedTLS())
3414 return LowerToTLSEmulatedModel(GA, DAG);
3415
3416 SDLoc dl(GA);
3417 const GlobalValue *GV = GA->getGlobal();
3418 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3419 bool is64bit = Subtarget.isPPC64();
3420 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3421 PICLevel::Level picLevel = M->getPICLevel();
3422
3423 const TargetMachine &TM = getTargetMachine();
3424 TLSModel::Model Model = TM.getTLSModel(GV);
3425
3426 if (Model == TLSModel::LocalExec) {
3427 if (Subtarget.isUsingPCRelativeCalls()) {
3428 SDValue TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3429 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3430 TargetFlags: PPCII::MO_TPREL_PCREL_FLAG);
3431 SDValue MatAddr =
3432 DAG.getNode(Opcode: PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3433 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: MatAddr);
3434 }
3435
3436 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3437 TargetFlags: PPCII::MO_TPREL_HA);
3438 SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3439 TargetFlags: PPCII::MO_TPREL_LO);
3440 SDValue TLSReg = is64bit ? DAG.getRegister(Reg: PPC::X13, VT: MVT::i64)
3441 : DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
3442
3443 SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL: dl, VT: PtrVT, N1: TGAHi, N2: TLSReg);
3444 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: TGALo, N2: Hi);
3445 }
3446
3447 if (Model == TLSModel::InitialExec) {
3448 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3449 SDValue TGA = DAG.getTargetGlobalAddress(
3450 GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3451 SDValue TGATLS = DAG.getTargetGlobalAddress(
3452 GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3453 SDValue TPOffset;
3454 if (IsPCRel) {
3455 SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3456 TPOffset = DAG.getLoad(VT: MVT::i64, dl, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3457 PtrInfo: MachinePointerInfo());
3458 } else {
3459 SDValue GOTPtr;
3460 if (is64bit) {
3461 setUsesTOCBasePtr(DAG);
3462 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3463 GOTPtr =
3464 DAG.getNode(Opcode: PPCISD::ADDIS_GOT_TPREL_HA, DL: dl, VT: PtrVT, N1: GOTReg, N2: TGA);
3465 } else {
3466 if (!TM.isPositionIndependent())
3467 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_GOT, DL: dl, VT: PtrVT);
3468 else if (picLevel == PICLevel::SmallPIC)
3469 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3470 else
3471 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3472 }
3473 TPOffset = DAG.getNode(Opcode: PPCISD::LD_GOT_TPREL_L, DL: dl, VT: PtrVT, N1: TGA, N2: GOTPtr);
3474 }
3475 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TPOffset, N2: TGATLS);
3476 }
3477
3478 if (Model == TLSModel::GeneralDynamic) {
3479 if (Subtarget.isUsingPCRelativeCalls()) {
3480 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3481 TargetFlags: PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3482 return DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3483 }
3484
3485 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: 0);
3486 SDValue GOTPtr;
3487 if (is64bit) {
3488 setUsesTOCBasePtr(DAG);
3489 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3490 GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSGD_HA, DL: dl, VT: PtrVT,
3491 N1: GOTReg, N2: TGA);
3492 } else {
3493 if (picLevel == PICLevel::SmallPIC)
3494 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3495 else
3496 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3497 }
3498 return DAG.getNode(Opcode: PPCISD::ADDI_TLSGD_L_ADDR, DL: dl, VT: PtrVT,
3499 N1: GOTPtr, N2: TGA, N3: TGA);
3500 }
3501
3502 if (Model == TLSModel::LocalDynamic) {
3503 if (Subtarget.isUsingPCRelativeCalls()) {
3504 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3505 TargetFlags: PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3506 SDValue MatPCRel =
3507 DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3508 return DAG.getNode(Opcode: PPCISD::PADDI_DTPREL, DL: dl, VT: PtrVT, N1: MatPCRel, N2: TGA);
3509 }
3510
3511 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: 0);
3512 SDValue GOTPtr;
3513 if (is64bit) {
3514 setUsesTOCBasePtr(DAG);
3515 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3516 GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSLD_HA, DL: dl, VT: PtrVT,
3517 N1: GOTReg, N2: TGA);
3518 } else {
3519 if (picLevel == PICLevel::SmallPIC)
3520 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3521 else
3522 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3523 }
3524 SDValue TLSAddr = DAG.getNode(Opcode: PPCISD::ADDI_TLSLD_L_ADDR, DL: dl,
3525 VT: PtrVT, N1: GOTPtr, N2: TGA, N3: TGA);
3526 SDValue DtvOffsetHi = DAG.getNode(Opcode: PPCISD::ADDIS_DTPREL_HA, DL: dl,
3527 VT: PtrVT, N1: TLSAddr, N2: TGA);
3528 return DAG.getNode(Opcode: PPCISD::ADDI_DTPREL_L, DL: dl, VT: PtrVT, N1: DtvOffsetHi, N2: TGA);
3529 }
3530
3531 llvm_unreachable("Unknown TLS model!");
3532}
3533
3534SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3535 SelectionDAG &DAG) const {
3536 EVT PtrVT = Op.getValueType();
3537 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Val&: Op);
3538 SDLoc DL(GSDN);
3539 const GlobalValue *GV = GSDN->getGlobal();
3540
3541 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3542 // The actual address of the GlobalValue is stored in the TOC.
3543 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3544 if (Subtarget.isUsingPCRelativeCalls()) {
3545 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3546 if (isAccessedAsGotIndirect(N: Op)) {
3547 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3548 TargetFlags: PPCII::MO_GOT_PCREL_FLAG);
3549 SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3550 SDValue Load = DAG.getLoad(VT: MVT::i64, dl: DL, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3551 PtrInfo: MachinePointerInfo());
3552 return Load;
3553 } else {
3554 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3555 TargetFlags: PPCII::MO_PCREL_FLAG);
3556 return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3557 }
3558 }
3559 setUsesTOCBasePtr(DAG);
3560 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset());
3561 return getTOCEntry(DAG, dl: DL, GA);
3562 }
3563
3564 unsigned MOHiFlag, MOLoFlag;
3565 bool IsPIC = isPositionIndependent();
3566 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag, GV);
3567
3568 if (IsPIC && Subtarget.isSVR4ABI()) {
3569 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT,
3570 offset: GSDN->getOffset(),
3571 TargetFlags: PPCII::MO_PIC_FLAG);
3572 return getTOCEntry(DAG, dl: DL, GA);
3573 }
3574
3575 SDValue GAHi =
3576 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOHiFlag);
3577 SDValue GALo =
3578 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOLoFlag);
3579
3580 return LowerLabelRef(HiPart: GAHi, LoPart: GALo, isPIC: IsPIC, DAG);
3581}
3582
3583SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3584 bool IsStrict = Op->isStrictFPOpcode();
3585 ISD::CondCode CC =
3586 cast<CondCodeSDNode>(Val: Op.getOperand(i: IsStrict ? 3 : 2))->get();
3587 SDValue LHS = Op.getOperand(i: IsStrict ? 1 : 0);
3588 SDValue RHS = Op.getOperand(i: IsStrict ? 2 : 1);
3589 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : SDValue();
3590 EVT LHSVT = LHS.getValueType();
3591 SDLoc dl(Op);
3592
3593 // Soften the setcc with libcall if it is fp128.
3594 if (LHSVT == MVT::f128) {
3595 assert(!Subtarget.hasP9Vector() &&
3596 "SETCC for f128 is already legal under Power9!");
3597 softenSetCCOperands(DAG, VT: LHSVT, NewLHS&: LHS, NewRHS&: RHS, CCCode&: CC, DL: dl, OldLHS: LHS, OldRHS: RHS, Chain,
3598 IsSignaling: Op->getOpcode() == ISD::STRICT_FSETCCS);
3599 if (RHS.getNode())
3600 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS,
3601 N3: DAG.getCondCode(Cond: CC));
3602 if (IsStrict)
3603 return DAG.getMergeValues(Ops: {LHS, Chain}, dl);
3604 return LHS;
3605 }
3606
3607 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3608
3609 if (Op.getValueType() == MVT::v2i64) {
3610 // When the operands themselves are v2i64 values, we need to do something
3611 // special because VSX has no underlying comparison operations for these.
3612 if (LHS.getValueType() == MVT::v2i64) {
3613 // Equality can be handled by casting to the legal type for Altivec
3614 // comparisons, everything else needs to be expanded.
3615 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3616 return SDValue();
3617 SDValue SetCC32 = DAG.getSetCC(
3618 DL: dl, VT: MVT::v4i32, LHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: LHS),
3619 RHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: RHS), Cond: CC);
3620 int ShuffV[] = {1, 0, 3, 2};
3621 SDValue Shuff =
3622 DAG.getVectorShuffle(VT: MVT::v4i32, dl, N1: SetCC32, N2: SetCC32, Mask: ShuffV);
3623 return DAG.getBitcast(VT: MVT::v2i64,
3624 V: DAG.getNode(Opcode: CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3625 DL: dl, VT: MVT::v4i32, N1: Shuff, N2: SetCC32));
3626 }
3627
3628 // We handle most of these in the usual way.
3629 return Op;
3630 }
3631
3632 // If we're comparing for equality to zero, expose the fact that this is
3633 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3634 // fold the new nodes.
3635 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3636 return V;
3637
3638 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3639 // Leave comparisons against 0 and -1 alone for now, since they're usually
3640 // optimized. FIXME: revisit this when we can custom lower all setcc
3641 // optimizations.
3642 if (C->isAllOnes() || C->isZero())
3643 return SDValue();
3644 }
3645
3646 // If we have an integer seteq/setne, turn it into a compare against zero
3647 // by xor'ing the rhs with the lhs, which is faster than setting a
3648 // condition register, reading it back out, and masking the correct bit. The
3649 // normal approach here uses sub to do this instead of xor. Using xor exposes
3650 // the result to other bit-twiddling opportunities.
3651 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3652 EVT VT = Op.getValueType();
3653 SDValue Sub = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: LHSVT, N1: LHS, N2: RHS);
3654 return DAG.getSetCC(DL: dl, VT, LHS: Sub, RHS: DAG.getConstant(Val: 0, DL: dl, VT: LHSVT), Cond: CC);
3655 }
3656 return SDValue();
3657}
3658
3659SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3660 SDNode *Node = Op.getNode();
3661 EVT VT = Node->getValueType(ResNo: 0);
3662 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3663 SDValue InChain = Node->getOperand(Num: 0);
3664 SDValue VAListPtr = Node->getOperand(Num: 1);
3665 const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue();
3666 SDLoc dl(Node);
3667
3668 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3669
3670 // gpr_index
3671 SDValue GprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3672 Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV), MemVT: MVT::i8);
3673 InChain = GprIndex.getValue(R: 1);
3674
3675 if (VT == MVT::i64) {
3676 // Check if GprIndex is even
3677 SDValue GprAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: GprIndex,
3678 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3679 SDValue CC64 = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: GprAnd,
3680 RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32), Cond: ISD::SETNE);
3681 SDValue GprIndexPlusOne = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: GprIndex,
3682 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3683 // Align GprIndex to be even if it isn't
3684 GprIndex = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC64, N2: GprIndexPlusOne,
3685 N3: GprIndex);
3686 }
3687
3688 // fpr index is 1 byte after gpr
3689 SDValue FprPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3690 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3691
3692 // fpr
3693 SDValue FprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3694 Ptr: FprPtr, PtrInfo: MachinePointerInfo(SV), MemVT: MVT::i8);
3695 InChain = FprIndex.getValue(R: 1);
3696
3697 SDValue RegSaveAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3698 N2: DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32));
3699
3700 SDValue OverflowAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3701 N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32));
3702
3703 // areas
3704 SDValue OverflowArea =
3705 DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: OverflowAreaPtr, PtrInfo: MachinePointerInfo());
3706 InChain = OverflowArea.getValue(R: 1);
3707
3708 SDValue RegSaveArea =
3709 DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: RegSaveAreaPtr, PtrInfo: MachinePointerInfo());
3710 InChain = RegSaveArea.getValue(R: 1);
3711
3712 // select overflow_area if index > 8
3713 SDValue CC = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: VT.isInteger() ? GprIndex : FprIndex,
3714 RHS: DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32), Cond: ISD::SETLT);
3715
3716 // adjustment constant gpr_index * 4/8
3717 SDValue RegConstant = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32,
3718 N1: VT.isInteger() ? GprIndex : FprIndex,
3719 N2: DAG.getConstant(Val: VT.isInteger() ? 4 : 8, DL: dl,
3720 VT: MVT::i32));
3721
3722 // OurReg = RegSaveArea + RegConstant
3723 SDValue OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: RegSaveArea,
3724 N2: RegConstant);
3725
3726 // Floating types are 32 bytes into RegSaveArea
3727 if (VT.isFloatingPoint())
3728 OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OurReg,
3729 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
3730
3731 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3732 SDValue IndexPlus1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32,
3733 N1: VT.isInteger() ? GprIndex : FprIndex,
3734 N2: DAG.getConstant(Val: VT == MVT::i64 ? 2 : 1, DL: dl,
3735 VT: MVT::i32));
3736
3737 InChain = DAG.getTruncStore(Chain: InChain, dl, Val: IndexPlus1,
3738 Ptr: VT.isInteger() ? VAListPtr : FprPtr,
3739 PtrInfo: MachinePointerInfo(SV), SVT: MVT::i8);
3740
3741 // determine if we should load from reg_save_area or overflow_area
3742 SDValue Result = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: PtrVT, N1: CC, N2: OurReg, N3: OverflowArea);
3743
3744 // increase overflow_area by 4/8 if gpr/fpr > 8
3745 SDValue OverflowAreaPlusN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OverflowArea,
3746 N2: DAG.getConstant(Val: VT.isInteger() ? 4 : 8,
3747 DL: dl, VT: MVT::i32));
3748
3749 OverflowArea = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC, N2: OverflowArea,
3750 N3: OverflowAreaPlusN);
3751
3752 InChain = DAG.getTruncStore(Chain: InChain, dl, Val: OverflowArea, Ptr: OverflowAreaPtr,
3753 PtrInfo: MachinePointerInfo(), SVT: MVT::i32);
3754
3755 return DAG.getLoad(VT, dl, Chain: InChain, Ptr: Result, PtrInfo: MachinePointerInfo());
3756}
3757
3758SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3759 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3760
3761 // We have to copy the entire va_list struct:
3762 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3763 return DAG.getMemcpy(Chain: Op.getOperand(i: 0), dl: Op, Dst: Op.getOperand(i: 1), Src: Op.getOperand(i: 2),
3764 Size: DAG.getConstant(Val: 12, DL: SDLoc(Op), VT: MVT::i32), Alignment: Align(8),
3765 isVol: false, AlwaysInline: true, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
3766 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
3767}
3768
3769SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3770 SelectionDAG &DAG) const {
3771 return Op.getOperand(i: 0);
3772}
3773
3774SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3775 MachineFunction &MF = DAG.getMachineFunction();
3776 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3777
3778 assert((Op.getOpcode() == ISD::INLINEASM ||
3779 Op.getOpcode() == ISD::INLINEASM_BR) &&
3780 "Expecting Inline ASM node.");
3781
3782 // If an LR store is already known to be required then there is not point in
3783 // checking this ASM as well.
3784 if (MFI.isLRStoreRequired())
3785 return Op;
3786
3787 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3788 // type MVT::Glue. We want to ignore this last operand if that is the case.
3789 unsigned NumOps = Op.getNumOperands();
3790 if (Op.getOperand(i: NumOps - 1).getValueType() == MVT::Glue)
3791 --NumOps;
3792
3793 // Check all operands that may contain the LR.
3794 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3795 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3796 unsigned NumVals = Flags.getNumOperandRegisters();
3797 ++i; // Skip the ID value.
3798
3799 switch (Flags.getKind()) {
3800 default:
3801 llvm_unreachable("Bad flags!");
3802 case InlineAsm::Kind::RegUse:
3803 case InlineAsm::Kind::Imm:
3804 case InlineAsm::Kind::Mem:
3805 i += NumVals;
3806 break;
3807 case InlineAsm::Kind::Clobber:
3808 case InlineAsm::Kind::RegDef:
3809 case InlineAsm::Kind::RegDefEarlyClobber: {
3810 for (; NumVals; --NumVals, ++i) {
3811 Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
3812 if (Reg != PPC::LR && Reg != PPC::LR8)
3813 continue;
3814 MFI.setLRStoreRequired();
3815 return Op;
3816 }
3817 break;
3818 }
3819 }
3820 }
3821
3822 return Op;
3823}
3824
3825SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3826 SelectionDAG &DAG) const {
3827 SDValue Chain = Op.getOperand(i: 0);
3828 SDValue Trmp = Op.getOperand(i: 1); // trampoline
3829 SDValue FPtr = Op.getOperand(i: 2); // nested function
3830 SDValue Nest = Op.getOperand(i: 3); // 'nest' parameter value
3831 SDLoc dl(Op);
3832
3833 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3834
3835 if (Subtarget.isAIXABI()) {
3836 // On AIX we create a trampoline descriptor by combining the
3837 // entry point and TOC from the global descriptor (FPtr) with the
3838 // nest argument as the environment pointer.
3839 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3840 MaybeAlign PointerAlign(PointerSize);
3841 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3842 ? (MachineMemOperand::MODereferenceable |
3843 MachineMemOperand::MOInvariant)
3844 : MachineMemOperand::MONone;
3845
3846 uint64_t TOCPointerOffset = 1 * PointerSize;
3847 uint64_t EnvPointerOffset = 2 * PointerSize;
3848 SDValue SDTOCPtrOffset = DAG.getConstant(Val: TOCPointerOffset, DL: dl, VT: PtrVT);
3849 SDValue SDEnvPtrOffset = DAG.getConstant(Val: EnvPointerOffset, DL: dl, VT: PtrVT);
3850
3851 const Value *TrampolineAddr =
3852 cast<SrcValueSDNode>(Val: Op.getOperand(i: 4))->getValue();
3853 const Function *Func =
3854 cast<Function>(Val: cast<SrcValueSDNode>(Val: Op.getOperand(i: 5))->getValue());
3855
3856 SDValue OutChains[3];
3857
3858 // Copy the entry point address from the global descriptor to the
3859 // trampoline buffer.
3860 SDValue LoadEntryPoint =
3861 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: FPtr, PtrInfo: MachinePointerInfo(Func, 0),
3862 Alignment: PointerAlign, MMOFlags);
3863 SDValue EPLoadChain = LoadEntryPoint.getValue(R: 1);
3864 OutChains[0] = DAG.getStore(Chain: EPLoadChain, dl, Val: LoadEntryPoint, Ptr: Trmp,
3865 PtrInfo: MachinePointerInfo(TrampolineAddr, 0));
3866
3867 // Copy the TOC pointer from the global descriptor to the trampoline
3868 // buffer.
3869 SDValue TOCFromDescriptorPtr =
3870 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FPtr, N2: SDTOCPtrOffset);
3871 SDValue TOCReg = DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: TOCFromDescriptorPtr,
3872 PtrInfo: MachinePointerInfo(Func, TOCPointerOffset),
3873 Alignment: PointerAlign, MMOFlags);
3874 SDValue TrampolineTOCPointer =
3875 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Trmp, N2: SDTOCPtrOffset);
3876 SDValue TOCLoadChain = TOCReg.getValue(R: 1);
3877 OutChains[1] =
3878 DAG.getStore(Chain: TOCLoadChain, dl, Val: TOCReg, Ptr: TrampolineTOCPointer,
3879 PtrInfo: MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3880
3881 // Store the nest argument into the environment pointer in the trampoline
3882 // buffer.
3883 SDValue EnvPointer = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Trmp, N2: SDEnvPtrOffset);
3884 OutChains[2] =
3885 DAG.getStore(Chain, dl, Val: Nest, Ptr: EnvPointer,
3886 PtrInfo: MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3887
3888 SDValue TokenFactor =
3889 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OutChains);
3890 return TokenFactor;
3891 }
3892
3893 bool isPPC64 = (PtrVT == MVT::i64);
3894 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext());
3895
3896 TargetLowering::ArgListTy Args;
3897 Args.emplace_back(args&: Trmp, args&: IntPtrTy);
3898 // TrampSize == (isPPC64 ? 48 : 40);
3899 Args.emplace_back(
3900 args: DAG.getConstant(Val: isPPC64 ? 48 : 40, DL: dl, VT: Subtarget.getScalarIntVT()),
3901 args&: IntPtrTy);
3902 Args.emplace_back(args&: FPtr, args&: IntPtrTy);
3903 Args.emplace_back(args&: Nest, args&: IntPtrTy);
3904
3905 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3906 TargetLowering::CallLoweringInfo CLI(DAG);
3907 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3908 CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *DAG.getContext()),
3909 Target: DAG.getExternalSymbol(Sym: "__trampoline_setup", VT: PtrVT), ArgsList: std::move(Args));
3910
3911 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3912 return CallResult.second;
3913}
3914
3915SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3916 MachineFunction &MF = DAG.getMachineFunction();
3917 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3918 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
3919
3920 SDLoc dl(Op);
3921
3922 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3923 // vastart just stores the address of the VarArgsFrameIndex slot into the
3924 // memory location argument.
3925 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
3926 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
3927 return DAG.getStore(Chain: Op.getOperand(i: 0), dl, Val: FR, Ptr: Op.getOperand(i: 1),
3928 PtrInfo: MachinePointerInfo(SV));
3929 }
3930
3931 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3932 // We suppose the given va_list is already allocated.
3933 //
3934 // typedef struct {
3935 // char gpr; /* index into the array of 8 GPRs
3936 // * stored in the register save area
3937 // * gpr=0 corresponds to r3,
3938 // * gpr=1 to r4, etc.
3939 // */
3940 // char fpr; /* index into the array of 8 FPRs
3941 // * stored in the register save area
3942 // * fpr=0 corresponds to f1,
3943 // * fpr=1 to f2, etc.
3944 // */
3945 // char *overflow_arg_area;
3946 // /* location on stack that holds
3947 // * the next overflow argument
3948 // */
3949 // char *reg_save_area;
3950 // /* where r3:r10 and f1:f8 (if saved)
3951 // * are stored
3952 // */
3953 // } va_list[1];
3954
3955 SDValue ArgGPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumGPR(), DL: dl, VT: MVT::i32);
3956 SDValue ArgFPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumFPR(), DL: dl, VT: MVT::i32);
3957 SDValue StackOffsetFI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsStackOffset(),
3958 VT: PtrVT);
3959 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
3960 VT: PtrVT);
3961
3962 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3963 SDValue ConstFrameOffset = DAG.getConstant(Val: FrameOffset, DL: dl, VT: PtrVT);
3964
3965 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3966 SDValue ConstStackOffset = DAG.getConstant(Val: StackOffset, DL: dl, VT: PtrVT);
3967
3968 uint64_t FPROffset = 1;
3969 SDValue ConstFPROffset = DAG.getConstant(Val: FPROffset, DL: dl, VT: PtrVT);
3970
3971 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
3972
3973 // Store first byte : number of int regs
3974 SDValue firstStore =
3975 DAG.getTruncStore(Chain: Op.getOperand(i: 0), dl, Val: ArgGPR, Ptr: Op.getOperand(i: 1),
3976 PtrInfo: MachinePointerInfo(SV), SVT: MVT::i8);
3977 uint64_t nextOffset = FPROffset;
3978 SDValue nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Op.getOperand(i: 1),
3979 N2: ConstFPROffset);
3980
3981 // Store second byte : number of float regs
3982 SDValue secondStore =
3983 DAG.getTruncStore(Chain: firstStore, dl, Val: ArgFPR, Ptr: nextPtr,
3984 PtrInfo: MachinePointerInfo(SV, nextOffset), SVT: MVT::i8);
3985 nextOffset += StackOffset;
3986 nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstStackOffset);
3987
3988 // Store second word : arguments given on stack
3989 SDValue thirdStore = DAG.getStore(Chain: secondStore, dl, Val: StackOffsetFI, Ptr: nextPtr,
3990 PtrInfo: MachinePointerInfo(SV, nextOffset));
3991 nextOffset += FrameOffset;
3992 nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstFrameOffset);
3993
3994 // Store third word : arguments given in registers
3995 return DAG.getStore(Chain: thirdStore, dl, Val: FR, Ptr: nextPtr,
3996 PtrInfo: MachinePointerInfo(SV, nextOffset));
3997}
3998
3999/// FPR - The set of FP registers that should be allocated for arguments
4000/// on Darwin and AIX.
4001static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4002 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4003 PPC::F11, PPC::F12, PPC::F13};
4004
4005/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4006/// the stack.
4007static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4008 unsigned PtrByteSize) {
4009 unsigned ArgSize = ArgVT.getStoreSize();
4010 if (Flags.isByVal())
4011 ArgSize = Flags.getByValSize();
4012
4013 // Round up to multiples of the pointer size, except for array members,
4014 // which are always packed.
4015 if (!Flags.isInConsecutiveRegs())
4016 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4017
4018 return ArgSize;
4019}
4020
4021/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4022/// on the stack.
4023static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4024 ISD::ArgFlagsTy Flags,
4025 unsigned PtrByteSize) {
4026 Align Alignment(PtrByteSize);
4027
4028 // Altivec parameters are padded to a 16 byte boundary.
4029 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4030 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4031 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4032 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4033 Alignment = Align(16);
4034
4035 // ByVal parameters are aligned as requested.
4036 if (Flags.isByVal()) {
4037 auto BVAlign = Flags.getNonZeroByValAlign();
4038 if (BVAlign > PtrByteSize) {
4039 if (BVAlign.value() % PtrByteSize != 0)
4040 llvm_unreachable(
4041 "ByVal alignment is not a multiple of the pointer size");
4042
4043 Alignment = BVAlign;
4044 }
4045 }
4046
4047 // Array members are always packed to their original alignment.
4048 if (Flags.isInConsecutiveRegs()) {
4049 // If the array member was split into multiple registers, the first
4050 // needs to be aligned to the size of the full type. (Except for
4051 // ppcf128, which is only aligned as its f64 components.)
4052 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4053 Alignment = Align(OrigVT.getStoreSize());
4054 else
4055 Alignment = Align(ArgVT.getStoreSize());
4056 }
4057
4058 return Alignment;
4059}
4060
4061/// CalculateStackSlotUsed - Return whether this argument will use its
4062/// stack slot (instead of being passed in registers). ArgOffset,
4063/// AvailableFPRs, and AvailableVRs must hold the current argument
4064/// position, and will be updated to account for this argument.
4065static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4066 unsigned PtrByteSize, unsigned LinkageSize,
4067 unsigned ParamAreaSize, unsigned &ArgOffset,
4068 unsigned &AvailableFPRs,
4069 unsigned &AvailableVRs) {
4070 bool UseMemory = false;
4071
4072 // Respect alignment of argument on the stack.
4073 Align Alignment =
4074 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4075 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4076 // If there's no space left in the argument save area, we must
4077 // use memory (this check also catches zero-sized arguments).
4078 if (ArgOffset >= LinkageSize + ParamAreaSize)
4079 UseMemory = true;
4080
4081 // Allocate argument on the stack.
4082 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4083 if (Flags.isInConsecutiveRegsLast())
4084 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4085 // If we overran the argument save area, we must use memory
4086 // (this check catches arguments passed partially in memory)
4087 if (ArgOffset > LinkageSize + ParamAreaSize)
4088 UseMemory = true;
4089
4090 // However, if the argument is actually passed in an FPR or a VR,
4091 // we don't use memory after all.
4092 if (!Flags.isByVal()) {
4093 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4094 if (AvailableFPRs > 0) {
4095 --AvailableFPRs;
4096 return false;
4097 }
4098 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4099 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4100 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4101 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4102 if (AvailableVRs > 0) {
4103 --AvailableVRs;
4104 return false;
4105 }
4106 }
4107
4108 return UseMemory;
4109}
4110
4111/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4112/// ensure minimum alignment required for target.
4113static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4114 unsigned NumBytes) {
4115 return alignTo(Size: NumBytes, A: Lowering->getStackAlign());
4116}
4117
4118SDValue PPCTargetLowering::LowerFormalArguments(
4119 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4120 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4121 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4122 if (Subtarget.isAIXABI())
4123 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4124 InVals);
4125 if (Subtarget.is64BitELFABI())
4126 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127 InVals);
4128 assert(Subtarget.is32BitELFABI());
4129 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4130 InVals);
4131}
4132
4133SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4134 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4135 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4136 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4137
4138 // 32-bit SVR4 ABI Stack Frame Layout:
4139 // +-----------------------------------+
4140 // +--> | Back chain |
4141 // | +-----------------------------------+
4142 // | | Floating-point register save area |
4143 // | +-----------------------------------+
4144 // | | General register save area |
4145 // | +-----------------------------------+
4146 // | | CR save word |
4147 // | +-----------------------------------+
4148 // | | VRSAVE save word |
4149 // | +-----------------------------------+
4150 // | | Alignment padding |
4151 // | +-----------------------------------+
4152 // | | Vector register save area |
4153 // | +-----------------------------------+
4154 // | | Local variable space |
4155 // | +-----------------------------------+
4156 // | | Parameter list area |
4157 // | +-----------------------------------+
4158 // | | LR save word |
4159 // | +-----------------------------------+
4160 // SP--> +--- | Back chain |
4161 // +-----------------------------------+
4162 //
4163 // Specifications:
4164 // System V Application Binary Interface PowerPC Processor Supplement
4165 // AltiVec Technology Programming Interface Manual
4166
4167 MachineFunction &MF = DAG.getMachineFunction();
4168 MachineFrameInfo &MFI = MF.getFrameInfo();
4169 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4170
4171 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4172 // Potential tail calls could cause overwriting of argument stack slots.
4173 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4174 (CallConv == CallingConv::Fast));
4175 const Align PtrAlign(4);
4176
4177 // Assign locations to all of the incoming arguments.
4178 SmallVector<CCValAssign, 16> ArgLocs;
4179 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4180 *DAG.getContext());
4181
4182 // Reserve space for the linkage area on the stack.
4183 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4184 CCInfo.AllocateStack(Size: LinkageSize, Alignment: PtrAlign);
4185 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4);
4186
4187 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4188 CCValAssign &VA = ArgLocs[i];
4189
4190 // Arguments stored in registers.
4191 if (VA.isRegLoc()) {
4192 const TargetRegisterClass *RC;
4193 EVT ValVT = VA.getValVT();
4194
4195 switch (ValVT.getSimpleVT().SimpleTy) {
4196 default:
4197 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4198 case MVT::i1:
4199 case MVT::i32:
4200 RC = &PPC::GPRCRegClass;
4201 break;
4202 case MVT::f32:
4203 if (Subtarget.hasP8Vector())
4204 RC = &PPC::VSSRCRegClass;
4205 else if (Subtarget.hasSPE())
4206 RC = &PPC::GPRCRegClass;
4207 else
4208 RC = &PPC::F4RCRegClass;
4209 break;
4210 case MVT::f64:
4211 if (Subtarget.hasVSX())
4212 RC = &PPC::VSFRCRegClass;
4213 else if (Subtarget.hasSPE())
4214 // SPE passes doubles in GPR pairs.
4215 RC = &PPC::GPRCRegClass;
4216 else
4217 RC = &PPC::F8RCRegClass;
4218 break;
4219 case MVT::v16i8:
4220 case MVT::v8i16:
4221 case MVT::v4i32:
4222 RC = &PPC::VRRCRegClass;
4223 break;
4224 case MVT::v4f32:
4225 RC = &PPC::VRRCRegClass;
4226 break;
4227 case MVT::v2f64:
4228 case MVT::v2i64:
4229 RC = &PPC::VRRCRegClass;
4230 break;
4231 }
4232
4233 SDValue ArgValue;
4234 // Transform the arguments stored in physical registers into
4235 // virtual ones.
4236 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4237 assert(i + 1 < e && "No second half of double precision argument");
4238 Register RegLo = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4239 Register RegHi = MF.addLiveIn(PReg: ArgLocs[++i].getLocReg(), RC);
4240 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, Reg: RegLo, VT: MVT::i32);
4241 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, Reg: RegHi, VT: MVT::i32);
4242 if (!Subtarget.isLittleEndian())
4243 std::swap (a&: ArgValueLo, b&: ArgValueHi);
4244 ArgValue = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: ArgValueLo,
4245 N2: ArgValueHi);
4246 } else {
4247 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4248 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4249 VT: ValVT == MVT::i1 ? MVT::i32 : ValVT);
4250 if (ValVT == MVT::i1)
4251 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: ArgValue);
4252 }
4253
4254 InVals.push_back(Elt: ArgValue);
4255 } else {
4256 // Argument stored in memory.
4257 assert(VA.isMemLoc());
4258
4259 // Get the extended size of the argument type in stack
4260 unsigned ArgSize = VA.getLocVT().getStoreSize();
4261 // Get the actual size of the argument type
4262 unsigned ObjSize = VA.getValVT().getStoreSize();
4263 unsigned ArgOffset = VA.getLocMemOffset();
4264 // Stack objects in PPC32 are right justified.
4265 ArgOffset += ArgSize - ObjSize;
4266 int FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: isImmutable);
4267
4268 // Create load nodes to retrieve arguments from the stack.
4269 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4270 InVals.push_back(
4271 Elt: DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo()));
4272 }
4273 }
4274
4275 // Assign locations to all of the incoming aggregate by value arguments.
4276 // Aggregates passed by value are stored in the local variable space of the
4277 // caller's stack frame, right above the parameter list area.
4278 SmallVector<CCValAssign, 16> ByValArgLocs;
4279 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4280 ByValArgLocs, *DAG.getContext());
4281
4282 // Reserve stack space for the allocations in CCInfo.
4283 CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
4284
4285 CCByValInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4_ByVal);
4286
4287 // Area that is at least reserved in the caller of this function.
4288 unsigned MinReservedArea = CCByValInfo.getStackSize();
4289 MinReservedArea = std::max(a: MinReservedArea, b: LinkageSize);
4290
4291 // Set the size that is at least reserved in caller of this function. Tail
4292 // call optimized function's reserved stack space needs to be aligned so that
4293 // taking the difference between two stack areas will result in an aligned
4294 // stack.
4295 MinReservedArea =
4296 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4297 FuncInfo->setMinReservedArea(MinReservedArea);
4298
4299 SmallVector<SDValue, 8> MemOps;
4300
4301 // If the function takes variable number of arguments, make a frame index for
4302 // the start of the first vararg value... for expansion of llvm.va_start.
4303 if (isVarArg) {
4304 static const MCPhysReg GPArgRegs[] = {
4305 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4306 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4307 };
4308 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4309
4310 static const MCPhysReg FPArgRegs[] = {
4311 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4312 PPC::F8
4313 };
4314 unsigned NumFPArgRegs = std::size(FPArgRegs);
4315
4316 if (useSoftFloat() || hasSPE())
4317 NumFPArgRegs = 0;
4318
4319 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(Regs: GPArgRegs));
4320 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(Regs: FPArgRegs));
4321
4322 // Make room for NumGPArgRegs and NumFPArgRegs.
4323 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4324 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4325
4326 FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4327 Size: PtrVT.getSizeInBits() / 8, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
4328
4329 FuncInfo->setVarArgsFrameIndex(
4330 MFI.CreateStackObject(Size: Depth, Alignment: Align(8), isSpillSlot: false));
4331 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4332
4333 // The fixed integer arguments of a variadic function are stored to the
4334 // VarArgsFrameIndex on the stack so that they may be loaded by
4335 // dereferencing the result of va_next.
4336 for (MCPhysReg GPArgReg : GPArgRegs) {
4337 // Get an existing live-in vreg, or add a new one.
4338 Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: GPArgReg);
4339 if (!VReg)
4340 VReg = MF.addLiveIn(PReg: GPArgReg, RC: &PPC::GPRCRegClass);
4341
4342 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4343 SDValue Store =
4344 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4345 MemOps.push_back(Elt: Store);
4346 // Increment the address by four for the next argument to store
4347 SDValue PtrOff = DAG.getConstant(Val: PtrVT.getSizeInBits()/8, DL: dl, VT: PtrVT);
4348 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4349 }
4350
4351 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4352 // is set.
4353 // The double arguments are stored to the VarArgsFrameIndex
4354 // on the stack.
4355 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4356 // Get an existing live-in vreg, or add a new one.
4357 Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: FPArgRegs[FPRIndex]);
4358 if (!VReg)
4359 VReg = MF.addLiveIn(PReg: FPArgRegs[FPRIndex], RC: &PPC::F8RCRegClass);
4360
4361 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::f64);
4362 SDValue Store =
4363 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4364 MemOps.push_back(Elt: Store);
4365 // Increment the address by eight for the next argument to store
4366 SDValue PtrOff = DAG.getConstant(Val: MVT(MVT::f64).getSizeInBits()/8, DL: dl,
4367 VT: PtrVT);
4368 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4369 }
4370 }
4371
4372 if (!MemOps.empty())
4373 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4374
4375 return Chain;
4376}
4377
4378// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4379// value to MVT::i64 and then truncate to the correct register size.
4380SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4381 EVT ObjectVT, SelectionDAG &DAG,
4382 SDValue ArgVal,
4383 const SDLoc &dl) const {
4384 if (Flags.isSExt())
4385 ArgVal = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: MVT::i64, N1: ArgVal,
4386 N2: DAG.getValueType(ObjectVT));
4387 else if (Flags.isZExt())
4388 ArgVal = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: MVT::i64, N1: ArgVal,
4389 N2: DAG.getValueType(ObjectVT));
4390
4391 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ObjectVT, Operand: ArgVal);
4392}
4393
4394SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4395 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4396 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4397 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4398 // TODO: add description of PPC stack frame format, or at least some docs.
4399 //
4400 bool isELFv2ABI = Subtarget.isELFv2ABI();
4401 bool isLittleEndian = Subtarget.isLittleEndian();
4402 MachineFunction &MF = DAG.getMachineFunction();
4403 MachineFrameInfo &MFI = MF.getFrameInfo();
4404 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4405
4406 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4407 "fastcc not supported on varargs functions");
4408
4409 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4410 // Potential tail calls could cause overwriting of argument stack slots.
4411 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4412 (CallConv == CallingConv::Fast));
4413 unsigned PtrByteSize = 8;
4414 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4415
4416 static const MCPhysReg GPR[] = {
4417 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4418 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4419 };
4420 static const MCPhysReg VR[] = {
4421 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4422 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4423 };
4424
4425 const unsigned Num_GPR_Regs = std::size(GPR);
4426 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4427 const unsigned Num_VR_Regs = std::size(VR);
4428
4429 // Do a first pass over the arguments to determine whether the ABI
4430 // guarantees that our caller has allocated the parameter save area
4431 // on its stack frame. In the ELFv1 ABI, this is always the case;
4432 // in the ELFv2 ABI, it is true if this is a vararg function or if
4433 // any parameter is located in a stack slot.
4434
4435 bool HasParameterArea = !isELFv2ABI || isVarArg;
4436 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4437 unsigned NumBytes = LinkageSize;
4438 unsigned AvailableFPRs = Num_FPR_Regs;
4439 unsigned AvailableVRs = Num_VR_Regs;
4440 for (const ISD::InputArg &In : Ins) {
4441 if (In.Flags.isNest())
4442 continue;
4443
4444 if (CalculateStackSlotUsed(ArgVT: In.VT, OrigVT: In.ArgVT, Flags: In.Flags, PtrByteSize,
4445 LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4446 AvailableFPRs, AvailableVRs))
4447 HasParameterArea = true;
4448 }
4449
4450 // Add DAG nodes to load the arguments or copy them out of registers. On
4451 // entry to a function on PPC, the arguments start after the linkage area,
4452 // although the first ones are often in registers.
4453
4454 unsigned ArgOffset = LinkageSize;
4455 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4456 SmallVector<SDValue, 8> MemOps;
4457 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4458 unsigned CurArgIdx = 0;
4459 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4460 SDValue ArgVal;
4461 bool needsLoad = false;
4462 EVT ObjectVT = Ins[ArgNo].VT;
4463 EVT OrigVT = Ins[ArgNo].ArgVT;
4464 unsigned ObjSize = ObjectVT.getStoreSize();
4465 unsigned ArgSize = ObjSize;
4466 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4467 if (Ins[ArgNo].isOrigArg()) {
4468 std::advance(i&: FuncArg, n: Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4469 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4470 }
4471 // We re-align the argument offset for each argument, except when using the
4472 // fast calling convention, when we need to make sure we do that only when
4473 // we'll actually use a stack slot.
4474 unsigned CurArgOffset;
4475 Align Alignment;
4476 auto ComputeArgOffset = [&]() {
4477 /* Respect alignment of argument on the stack. */
4478 Alignment =
4479 CalculateStackSlotAlignment(ArgVT: ObjectVT, OrigVT, Flags, PtrByteSize);
4480 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4481 CurArgOffset = ArgOffset;
4482 };
4483
4484 if (CallConv != CallingConv::Fast) {
4485 ComputeArgOffset();
4486
4487 /* Compute GPR index associated with argument offset. */
4488 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4489 GPR_idx = std::min(a: GPR_idx, b: Num_GPR_Regs);
4490 }
4491
4492 // FIXME the codegen can be much improved in some cases.
4493 // We do not have to keep everything in memory.
4494 if (Flags.isByVal()) {
4495 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4496
4497 if (CallConv == CallingConv::Fast)
4498 ComputeArgOffset();
4499
4500 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4501 ObjSize = Flags.getByValSize();
4502 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4503 // Empty aggregate parameters do not take up registers. Examples:
4504 // struct { } a;
4505 // union { } b;
4506 // int c[0];
4507 // etc. However, we have to provide a place-holder in InVals, so
4508 // pretend we have an 8-byte item at the current address for that
4509 // purpose.
4510 if (!ObjSize) {
4511 int FI = MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: ArgOffset, IsImmutable: true);
4512 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4513 InVals.push_back(Elt: FIN);
4514 continue;
4515 }
4516
4517 // Create a stack object covering all stack doublewords occupied
4518 // by the argument. If the argument is (fully or partially) on
4519 // the stack, or if the argument is fully in registers but the
4520 // caller has allocated the parameter save anyway, we can refer
4521 // directly to the caller's stack frame. Otherwise, create a
4522 // local copy in our own frame.
4523 int FI;
4524 if (HasParameterArea ||
4525 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4526 FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: false, isAliased: true);
4527 else
4528 FI = MFI.CreateStackObject(Size: ArgSize, Alignment, isSpillSlot: false);
4529 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4530
4531 // Handle aggregates smaller than 8 bytes.
4532 if (ObjSize < PtrByteSize) {
4533 // The value of the object is its address, which differs from the
4534 // address of the enclosing doubleword on big-endian systems.
4535 SDValue Arg = FIN;
4536 if (!isLittleEndian) {
4537 SDValue ArgOff = DAG.getConstant(Val: PtrByteSize - ObjSize, DL: dl, VT: PtrVT);
4538 Arg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ArgOff.getValueType(), N1: Arg, N2: ArgOff);
4539 }
4540 InVals.push_back(Elt: Arg);
4541
4542 if (GPR_idx != Num_GPR_Regs) {
4543 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4544 FuncInfo->addLiveInAttr(VReg, Flags);
4545 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4546 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ObjSize * 8);
4547 SDValue Store =
4548 DAG.getTruncStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: Arg,
4549 PtrInfo: MachinePointerInfo(&*FuncArg), SVT: ObjType);
4550 MemOps.push_back(Elt: Store);
4551 }
4552 // Whether we copied from a register or not, advance the offset
4553 // into the parameter save area by a full doubleword.
4554 ArgOffset += PtrByteSize;
4555 continue;
4556 }
4557
4558 // The value of the object is its address, which is the address of
4559 // its first stack doubleword.
4560 InVals.push_back(Elt: FIN);
4561
4562 // Store whatever pieces of the object are in registers to memory.
4563 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4564 if (GPR_idx == Num_GPR_Regs)
4565 break;
4566
4567 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4568 FuncInfo->addLiveInAttr(VReg, Flags);
4569 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4570 SDValue Addr = FIN;
4571 if (j) {
4572 SDValue Off = DAG.getConstant(Val: j, DL: dl, VT: PtrVT);
4573 Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Off.getValueType(), N1: Addr, N2: Off);
4574 }
4575 unsigned StoreSizeInBits = std::min(a: PtrByteSize, b: (ObjSize - j)) * 8;
4576 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreSizeInBits);
4577 SDValue Store =
4578 DAG.getTruncStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: Addr,
4579 PtrInfo: MachinePointerInfo(&*FuncArg, j), SVT: ObjType);
4580 MemOps.push_back(Elt: Store);
4581 ++GPR_idx;
4582 }
4583 ArgOffset += ArgSize;
4584 continue;
4585 }
4586
4587 switch (ObjectVT.getSimpleVT().SimpleTy) {
4588 default: llvm_unreachable("Unhandled argument type!");
4589 case MVT::i1:
4590 case MVT::i32:
4591 case MVT::i64:
4592 if (Flags.isNest()) {
4593 // The 'nest' parameter, if any, is passed in R11.
4594 Register VReg = MF.addLiveIn(PReg: PPC::X11, RC: &PPC::G8RCRegClass);
4595 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4596
4597 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4598 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4599
4600 break;
4601 }
4602
4603 // These can be scalar arguments or elements of an integer array type
4604 // passed directly. Clang may use those instead of "byval" aggregate
4605 // types to avoid forcing arguments to memory unnecessarily.
4606 if (GPR_idx != Num_GPR_Regs) {
4607 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4608 FuncInfo->addLiveInAttr(VReg, Flags);
4609 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4610
4611 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4612 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4613 // value to MVT::i64 and then truncate to the correct register size.
4614 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4615 } else {
4616 if (CallConv == CallingConv::Fast)
4617 ComputeArgOffset();
4618
4619 needsLoad = true;
4620 ArgSize = PtrByteSize;
4621 }
4622 if (CallConv != CallingConv::Fast || needsLoad)
4623 ArgOffset += 8;
4624 break;
4625
4626 case MVT::f32:
4627 case MVT::f64:
4628 // These can be scalar arguments or elements of a float array type
4629 // passed directly. The latter are used to implement ELFv2 homogenous
4630 // float aggregates.
4631 if (FPR_idx != Num_FPR_Regs) {
4632 unsigned VReg;
4633
4634 if (ObjectVT == MVT::f32)
4635 VReg = MF.addLiveIn(PReg: FPR[FPR_idx],
4636 RC: Subtarget.hasP8Vector()
4637 ? &PPC::VSSRCRegClass
4638 : &PPC::F4RCRegClass);
4639 else
4640 VReg = MF.addLiveIn(PReg: FPR[FPR_idx], RC: Subtarget.hasVSX()
4641 ? &PPC::VSFRCRegClass
4642 : &PPC::F8RCRegClass);
4643
4644 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4645 ++FPR_idx;
4646 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4647 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4648 // once we support fp <-> gpr moves.
4649
4650 // This can only ever happen in the presence of f32 array types,
4651 // since otherwise we never run out of FPRs before running out
4652 // of GPRs.
4653 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4654 FuncInfo->addLiveInAttr(VReg, Flags);
4655 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4656
4657 if (ObjectVT == MVT::f32) {
4658 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4659 ArgVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgVal,
4660 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
4661 ArgVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: ArgVal);
4662 }
4663
4664 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ObjectVT, Operand: ArgVal);
4665 } else {
4666 if (CallConv == CallingConv::Fast)
4667 ComputeArgOffset();
4668
4669 needsLoad = true;
4670 }
4671
4672 // When passing an array of floats, the array occupies consecutive
4673 // space in the argument area; only round up to the next doubleword
4674 // at the end of the array. Otherwise, each float takes 8 bytes.
4675 if (CallConv != CallingConv::Fast || needsLoad) {
4676 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4677 ArgOffset += ArgSize;
4678 if (Flags.isInConsecutiveRegsLast())
4679 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4680 }
4681 break;
4682 case MVT::v4f32:
4683 case MVT::v4i32:
4684 case MVT::v8i16:
4685 case MVT::v16i8:
4686 case MVT::v2f64:
4687 case MVT::v2i64:
4688 case MVT::v1i128:
4689 case MVT::f128:
4690 // These can be scalar arguments or elements of a vector array type
4691 // passed directly. The latter are used to implement ELFv2 homogenous
4692 // vector aggregates.
4693 if (VR_idx != Num_VR_Regs) {
4694 Register VReg = MF.addLiveIn(PReg: VR[VR_idx], RC: &PPC::VRRCRegClass);
4695 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4696 ++VR_idx;
4697 } else {
4698 if (CallConv == CallingConv::Fast)
4699 ComputeArgOffset();
4700 needsLoad = true;
4701 }
4702 if (CallConv != CallingConv::Fast || needsLoad)
4703 ArgOffset += 16;
4704 break;
4705 }
4706
4707 // We need to load the argument to a virtual register if we determined
4708 // above that we ran out of physical registers of the appropriate type.
4709 if (needsLoad) {
4710 if (ObjSize < ArgSize && !isLittleEndian)
4711 CurArgOffset += ArgSize - ObjSize;
4712 int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: CurArgOffset, IsImmutable: isImmutable);
4713 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4714 ArgVal = DAG.getLoad(VT: ObjectVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo());
4715 }
4716
4717 InVals.push_back(Elt: ArgVal);
4718 }
4719
4720 // Area that is at least reserved in the caller of this function.
4721 unsigned MinReservedArea;
4722 if (HasParameterArea)
4723 MinReservedArea = std::max(a: ArgOffset, b: LinkageSize + 8 * PtrByteSize);
4724 else
4725 MinReservedArea = LinkageSize;
4726
4727 // Set the size that is at least reserved in caller of this function. Tail
4728 // call optimized functions' reserved stack space needs to be aligned so that
4729 // taking the difference between two stack areas will result in an aligned
4730 // stack.
4731 MinReservedArea =
4732 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4733 FuncInfo->setMinReservedArea(MinReservedArea);
4734
4735 // If the function takes variable number of arguments, make a frame index for
4736 // the start of the first vararg value... for expansion of llvm.va_start.
4737 // On ELFv2ABI spec, it writes:
4738 // C programs that are intended to be *portable* across different compilers
4739 // and architectures must use the header file <stdarg.h> to deal with variable
4740 // argument lists.
4741 if (isVarArg && MFI.hasVAStart()) {
4742 int Depth = ArgOffset;
4743
4744 FuncInfo->setVarArgsFrameIndex(
4745 MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: Depth, IsImmutable: true));
4746 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4747
4748 // If this function is vararg, store any remaining integer argument regs
4749 // to their spots on the stack so that they may be loaded by dereferencing
4750 // the result of va_next.
4751 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4752 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4753 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4754 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4755 SDValue Store =
4756 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4757 MemOps.push_back(Elt: Store);
4758 // Increment the address by four for the next argument to store
4759 SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
4760 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4761 }
4762 }
4763
4764 if (!MemOps.empty())
4765 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4766
4767 return Chain;
4768}
4769
4770/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4771/// adjusted to accommodate the arguments for the tailcall.
4772static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4773 unsigned ParamSize) {
4774
4775 if (!isTailCall) return 0;
4776
4777 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4778 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4779 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4780 // Remember only if the new adjustment is bigger.
4781 if (SPDiff < FI->getTailCallSPDelta())
4782 FI->setTailCallSPDelta(SPDiff);
4783
4784 return SPDiff;
4785}
4786
4787static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4788
4789static bool callsShareTOCBase(const Function *Caller,
4790 const GlobalValue *CalleeGV,
4791 const TargetMachine &TM) {
4792 // It does not make sense to call callsShareTOCBase() with a caller that
4793 // is PC Relative since PC Relative callers do not have a TOC.
4794#ifndef NDEBUG
4795 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4796 assert(!STICaller->isUsingPCRelativeCalls() &&
4797 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4798#endif
4799
4800 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4801 // don't have enough information to determine if the caller and callee share
4802 // the same TOC base, so we have to pessimistically assume they don't for
4803 // correctness.
4804 if (!CalleeGV)
4805 return false;
4806
4807 // If the callee is preemptable, then the static linker will use a plt-stub
4808 // which saves the toc to the stack, and needs a nop after the call
4809 // instruction to convert to a toc-restore.
4810 if (!TM.shouldAssumeDSOLocal(GV: CalleeGV))
4811 return false;
4812
4813 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4814 // We may need a TOC restore in the situation where the caller requires a
4815 // valid TOC but the callee is PC Relative and does not.
4816 const Function *F = dyn_cast<Function>(Val: CalleeGV);
4817 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(Val: CalleeGV);
4818
4819 // If we have an Alias we can try to get the function from there.
4820 if (Alias) {
4821 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4822 F = dyn_cast<Function>(Val: GlobalObj);
4823 }
4824
4825 // If we still have no valid function pointer we do not have enough
4826 // information to determine if the callee uses PC Relative calls so we must
4827 // assume that it does.
4828 if (!F)
4829 return false;
4830
4831 // If the callee uses PC Relative we cannot guarantee that the callee won't
4832 // clobber the TOC of the caller and so we must assume that the two
4833 // functions do not share a TOC base.
4834 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(F: *F);
4835 if (STICallee->isUsingPCRelativeCalls())
4836 return false;
4837
4838 // If the GV is not a strong definition then we need to assume it can be
4839 // replaced by another function at link time. The function that replaces
4840 // it may not share the same TOC as the caller since the callee may be
4841 // replaced by a PC Relative version of the same function.
4842 if (!CalleeGV->isStrongDefinitionForLinker())
4843 return false;
4844
4845 // The medium and large code models are expected to provide a sufficiently
4846 // large TOC to provide all data addressing needs of a module with a
4847 // single TOC.
4848 if (CodeModel::Medium == TM.getCodeModel() ||
4849 CodeModel::Large == TM.getCodeModel())
4850 return true;
4851
4852 // Any explicitly-specified sections and section prefixes must also match.
4853 // Also, if we're using -ffunction-sections, then each function is always in
4854 // a different section (the same is true for COMDAT functions).
4855 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4856 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4857 return false;
4858 if (const auto *F = dyn_cast<Function>(Val: CalleeGV)) {
4859 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4860 return false;
4861 }
4862
4863 return true;
4864}
4865
4866static bool
4867needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4868 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4869 assert(Subtarget.is64BitELFABI());
4870
4871 const unsigned PtrByteSize = 8;
4872 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4873
4874 static const MCPhysReg GPR[] = {
4875 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4876 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4877 };
4878 static const MCPhysReg VR[] = {
4879 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4880 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4881 };
4882
4883 const unsigned NumGPRs = std::size(GPR);
4884 const unsigned NumFPRs = 13;
4885 const unsigned NumVRs = std::size(VR);
4886 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4887
4888 unsigned NumBytes = LinkageSize;
4889 unsigned AvailableFPRs = NumFPRs;
4890 unsigned AvailableVRs = NumVRs;
4891
4892 for (const ISD::OutputArg& Param : Outs) {
4893 if (Param.Flags.isNest()) continue;
4894
4895 if (CalculateStackSlotUsed(ArgVT: Param.VT, OrigVT: Param.ArgVT, Flags: Param.Flags, PtrByteSize,
4896 LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4897 AvailableFPRs, AvailableVRs))
4898 return true;
4899 }
4900 return false;
4901}
4902
4903static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4904 if (CB.arg_size() != CallerFn->arg_size())
4905 return false;
4906
4907 auto CalleeArgIter = CB.arg_begin();
4908 auto CalleeArgEnd = CB.arg_end();
4909 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4910
4911 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4912 const Value* CalleeArg = *CalleeArgIter;
4913 const Value* CallerArg = &(*CallerArgIter);
4914 if (CalleeArg == CallerArg)
4915 continue;
4916
4917 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4918 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4919 // }
4920 // 1st argument of callee is undef and has the same type as caller.
4921 if (CalleeArg->getType() == CallerArg->getType() &&
4922 isa<UndefValue>(Val: CalleeArg))
4923 continue;
4924
4925 return false;
4926 }
4927
4928 return true;
4929}
4930
4931// Returns true if TCO is possible between the callers and callees
4932// calling conventions.
4933static bool
4934areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4935 CallingConv::ID CalleeCC) {
4936 // Tail calls are possible with fastcc and ccc.
4937 auto isTailCallableCC = [] (CallingConv::ID CC){
4938 return CC == CallingConv::C || CC == CallingConv::Fast;
4939 };
4940 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4941 return false;
4942
4943 // We can safely tail call both fastcc and ccc callees from a c calling
4944 // convention caller. If the caller is fastcc, we may have less stack space
4945 // than a non-fastcc caller with the same signature so disable tail-calls in
4946 // that case.
4947 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4948}
4949
4950bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4951 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4952 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4953 const SmallVectorImpl<ISD::OutputArg> &Outs,
4954 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4955 bool isCalleeExternalSymbol) const {
4956 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4957
4958 if (DisableSCO && !TailCallOpt) return false;
4959
4960 // Variadic argument functions are not supported.
4961 if (isVarArg) return false;
4962
4963 // Check that the calling conventions are compatible for tco.
4964 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4965 return false;
4966
4967 // Caller contains any byval parameter is not supported.
4968 if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4969 return false;
4970
4971 // Callee contains any byval parameter is not supported, too.
4972 // Note: This is a quick work around, because in some cases, e.g.
4973 // caller's stack size > callee's stack size, we are still able to apply
4974 // sibling call optimization. For example, gcc is able to do SCO for caller1
4975 // in the following example, but not for caller2.
4976 // struct test {
4977 // long int a;
4978 // char ary[56];
4979 // } gTest;
4980 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4981 // b->a = v.a;
4982 // return 0;
4983 // }
4984 // void caller1(struct test a, struct test c, struct test *b) {
4985 // callee(gTest, b); }
4986 // void caller2(struct test *b) { callee(gTest, b); }
4987 if (any_of(Range: Outs, P: [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4988 return false;
4989
4990 // If callee and caller use different calling conventions, we cannot pass
4991 // parameters on stack since offsets for the parameter area may be different.
4992 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4993 return false;
4994
4995 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4996 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4997 // callee potentially have different TOC bases then we cannot tail call since
4998 // we need to restore the TOC pointer after the call.
4999 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5000 // We cannot guarantee this for indirect calls or calls to external functions.
5001 // When PC-Relative addressing is used, the concept of the TOC is no longer
5002 // applicable so this check is not required.
5003 // Check first for indirect calls.
5004 if (!Subtarget.isUsingPCRelativeCalls() &&
5005 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5006 return false;
5007
5008 // Check if we share the TOC base.
5009 if (!Subtarget.isUsingPCRelativeCalls() &&
5010 !callsShareTOCBase(Caller: CallerFunc, CalleeGV, TM: getTargetMachine()))
5011 return false;
5012
5013 // TCO allows altering callee ABI, so we don't have to check further.
5014 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5015 return true;
5016
5017 if (DisableSCO) return false;
5018
5019 // If callee use the same argument list that caller is using, then we can
5020 // apply SCO on this case. If it is not, then we need to check if callee needs
5021 // stack for passing arguments.
5022 // PC Relative tail calls may not have a CallBase.
5023 // If there is no CallBase we cannot verify if we have the same argument
5024 // list so assume that we don't have the same argument list.
5025 if (CB && !hasSameArgumentList(CallerFn: CallerFunc, CB: *CB) &&
5026 needStackSlotPassParameters(Subtarget, Outs))
5027 return false;
5028 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5029 return false;
5030
5031 return true;
5032}
5033
5034/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5035/// for tail call optimization. Targets which want to do tail call
5036/// optimization should implement this function.
5037bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5038 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5039 CallingConv::ID CallerCC, bool isVarArg,
5040 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5041 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5042 return false;
5043
5044 // Variable argument functions are not supported.
5045 if (isVarArg)
5046 return false;
5047
5048 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5049 // Functions containing by val parameters are not supported.
5050 if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5051 return false;
5052
5053 // Non-PIC/GOT tail calls are supported.
5054 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5055 return true;
5056
5057 // At the moment we can only do local tail calls (in same module, hidden
5058 // or protected) if we are generating PIC.
5059 if (CalleeGV)
5060 return CalleeGV->hasHiddenVisibility() ||
5061 CalleeGV->hasProtectedVisibility();
5062 }
5063
5064 return false;
5065}
5066
5067/// isCallCompatibleAddress - Return the immediate to use if the specified
5068/// 32-bit value is representable in the immediate field of a BxA instruction.
5069static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5070 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Op);
5071 if (!C) return nullptr;
5072
5073 int Addr = C->getZExtValue();
5074 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5075 SignExtend32<26>(X: Addr) != Addr)
5076 return nullptr; // Top 6 bits have to be sext of immediate.
5077
5078 return DAG
5079 .getSignedConstant(
5080 Val: (int)C->getZExtValue() >> 2, DL: SDLoc(Op),
5081 VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()))
5082 .getNode();
5083}
5084
5085namespace {
5086
5087struct TailCallArgumentInfo {
5088 SDValue Arg;
5089 SDValue FrameIdxOp;
5090 int FrameIdx = 0;
5091
5092 TailCallArgumentInfo() = default;
5093};
5094
5095} // end anonymous namespace
5096
5097/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5098static void StoreTailCallArgumentsToStackSlot(
5099 SelectionDAG &DAG, SDValue Chain,
5100 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5101 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5102 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5103 SDValue Arg = TailCallArgs[i].Arg;
5104 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5105 int FI = TailCallArgs[i].FrameIdx;
5106 // Store relative to framepointer.
5107 MemOpChains.push_back(Elt: DAG.getStore(
5108 Chain, dl, Val: Arg, Ptr: FIN,
5109 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
5110 }
5111}
5112
5113/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5114/// the appropriate stack slot for the tail call optimized function call.
5115static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5116 SDValue OldRetAddr, SDValue OldFP,
5117 int SPDiff, const SDLoc &dl) {
5118 if (SPDiff) {
5119 // Calculate the new stack slot for the return address.
5120 MachineFunction &MF = DAG.getMachineFunction();
5121 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5122 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5123 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5124 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5125 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(Size: SlotSize,
5126 SPOffset: NewRetAddrLoc, IsImmutable: true);
5127 SDValue NewRetAddrFrIdx =
5128 DAG.getFrameIndex(FI: NewRetAddr, VT: Subtarget.getScalarIntVT());
5129 Chain = DAG.getStore(Chain, dl, Val: OldRetAddr, Ptr: NewRetAddrFrIdx,
5130 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: NewRetAddr));
5131 }
5132 return Chain;
5133}
5134
5135/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5136/// the position of the argument.
5137static void CalculateTailCallArgDest(
5138 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5139 int SPDiff, unsigned ArgOffset,
5140 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5141 int Offset = ArgOffset + SPDiff;
5142 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5143 int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
5144 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5145 SDValue FIN = DAG.getFrameIndex(FI, VT);
5146 TailCallArgumentInfo Info;
5147 Info.Arg = Arg;
5148 Info.FrameIdxOp = FIN;
5149 Info.FrameIdx = FI;
5150 TailCallArguments.push_back(Elt: Info);
5151}
5152
5153/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5154/// stack slot. Returns the chain as result and the loaded frame pointers in
5155/// LROpOut/FPOpout. Used when tail calling.
5156SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5157 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5158 SDValue &FPOpOut, const SDLoc &dl) const {
5159 if (SPDiff) {
5160 // Load the LR and FP stack slot for later adjusting.
5161 LROpOut = getReturnAddrFrameIndex(DAG);
5162 LROpOut = DAG.getLoad(VT: Subtarget.getScalarIntVT(), dl, Chain, Ptr: LROpOut,
5163 PtrInfo: MachinePointerInfo());
5164 Chain = SDValue(LROpOut.getNode(), 1);
5165 }
5166 return Chain;
5167}
5168
5169/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5170/// by "Src" to address "Dst" of size "Size". Alignment information is
5171/// specified by the specific parameter attribute. The copy will be passed as
5172/// a byval function parameter.
5173/// Sometimes what we are copying is the end of a larger object, the part that
5174/// does not fit in registers.
5175static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5176 SDValue Chain, ISD::ArgFlagsTy Flags,
5177 SelectionDAG &DAG, const SDLoc &dl) {
5178 SDValue SizeNode = DAG.getConstant(Val: Flags.getByValSize(), DL: dl, VT: MVT::i32);
5179 return DAG.getMemcpy(
5180 Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(), isVol: false, AlwaysInline: false,
5181 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
5182}
5183
5184/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5185/// tail calls.
5186static void LowerMemOpCallTo(
5187 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5188 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5189 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5190 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5191 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5192 if (!isTailCall) {
5193 if (isVector) {
5194 SDValue StackPtr;
5195 if (isPPC64)
5196 StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
5197 else
5198 StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5199 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr,
5200 N2: DAG.getConstant(Val: ArgOffset, DL: dl, VT: PtrVT));
5201 }
5202 MemOpChains.push_back(
5203 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
5204 // Calculate and remember argument location.
5205 } else
5206 CalculateTailCallArgDest(DAG, MF, IsPPC64: isPPC64, Arg, SPDiff, ArgOffset,
5207 TailCallArguments);
5208}
5209
5210static void
5211PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5212 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5213 SDValue FPOp,
5214 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5215 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5216 // might overwrite each other in case of tail call optimization.
5217 SmallVector<SDValue, 8> MemOpChains2;
5218 // Do not flag preceding copytoreg stuff together with the following stuff.
5219 InGlue = SDValue();
5220 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArgs: TailCallArguments,
5221 MemOpChains&: MemOpChains2, dl);
5222 if (!MemOpChains2.empty())
5223 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
5224
5225 // Store the return address to the appropriate stack slot.
5226 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, OldRetAddr: LROp, OldFP: FPOp, SPDiff, dl);
5227
5228 // Emit callseq_end just before tailcall node.
5229 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue: InGlue, DL: dl);
5230 InGlue = Chain.getValue(R: 1);
5231}
5232
5233// Is this global address that of a function that can be called by name? (as
5234// opposed to something that must hold a descriptor for an indirect call).
5235static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5236 if (GV) {
5237 if (GV->isThreadLocal())
5238 return false;
5239
5240 return GV->getValueType()->isFunctionTy();
5241 }
5242
5243 return false;
5244}
5245
5246SDValue PPCTargetLowering::LowerCallResult(
5247 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5248 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5249 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5250 SmallVector<CCValAssign, 16> RVLocs;
5251 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5252 *DAG.getContext());
5253
5254 CCRetInfo.AnalyzeCallResult(
5255 Ins, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5256 ? RetCC_PPC_Cold
5257 : RetCC_PPC);
5258
5259 // Copy all of the result registers out of their specified physreg.
5260 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5261 CCValAssign &VA = RVLocs[i];
5262 assert(VA.isRegLoc() && "Can only return in registers!");
5263
5264 SDValue Val;
5265
5266 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5267 SDValue Lo = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5268 Glue: InGlue);
5269 Chain = Lo.getValue(R: 1);
5270 InGlue = Lo.getValue(R: 2);
5271 VA = RVLocs[++i]; // skip ahead to next loc
5272 SDValue Hi = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5273 Glue: InGlue);
5274 Chain = Hi.getValue(R: 1);
5275 InGlue = Hi.getValue(R: 2);
5276 if (!Subtarget.isLittleEndian())
5277 std::swap (a&: Lo, b&: Hi);
5278 Val = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
5279 } else {
5280 Val = DAG.getCopyFromReg(Chain, dl,
5281 Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue: InGlue);
5282 Chain = Val.getValue(R: 1);
5283 InGlue = Val.getValue(R: 2);
5284 }
5285
5286 switch (VA.getLocInfo()) {
5287 default: llvm_unreachable("Unknown loc info!");
5288 case CCValAssign::Full: break;
5289 case CCValAssign::AExt:
5290 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5291 break;
5292 case CCValAssign::ZExt:
5293 Val = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: VA.getLocVT(), N1: Val,
5294 N2: DAG.getValueType(VA.getValVT()));
5295 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5296 break;
5297 case CCValAssign::SExt:
5298 Val = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: VA.getLocVT(), N1: Val,
5299 N2: DAG.getValueType(VA.getValVT()));
5300 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5301 break;
5302 }
5303
5304 InVals.push_back(Elt: Val);
5305 }
5306
5307 return Chain;
5308}
5309
5310static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5311 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5312 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5313 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5314
5315 // PatchPoint calls are not indirect.
5316 if (isPatchPoint)
5317 return false;
5318
5319 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Val: Callee))
5320 return false;
5321
5322 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5323 // becuase the immediate function pointer points to a descriptor instead of
5324 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5325 // pointer immediate points to the global entry point, while the BLA would
5326 // need to jump to the local entry point (see rL211174).
5327 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5328 isBLACompatibleAddress(Op: Callee, DAG))
5329 return false;
5330
5331 return true;
5332}
5333
5334// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5335static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5336 return Subtarget.isAIXABI() ||
5337 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5338}
5339
5340static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5341 const Function &Caller, const SDValue &Callee,
5342 const PPCSubtarget &Subtarget,
5343 const TargetMachine &TM,
5344 bool IsStrictFPCall = false) {
5345 if (CFlags.IsTailCall)
5346 return PPCISD::TC_RETURN;
5347
5348 unsigned RetOpc = 0;
5349 // This is a call through a function pointer.
5350 if (CFlags.IsIndirect) {
5351 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5352 // indirect calls. The save of the caller's TOC pointer to the stack will be
5353 // inserted into the DAG as part of call lowering. The restore of the TOC
5354 // pointer is modeled by using a pseudo instruction for the call opcode that
5355 // represents the 2 instruction sequence of an indirect branch and link,
5356 // immediately followed by a load of the TOC pointer from the stack save
5357 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5358 // as it is not saved or used.
5359 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5360 : PPCISD::BCTRL;
5361 } else if (Subtarget.isUsingPCRelativeCalls()) {
5362 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5363 RetOpc = PPCISD::CALL_NOTOC;
5364 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5365 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5366 // immediately following the call instruction if the caller and callee may
5367 // have different TOC bases. At link time if the linker determines the calls
5368 // may not share a TOC base, the call is redirected to a trampoline inserted
5369 // by the linker. The trampoline will (among other things) save the callers
5370 // TOC pointer at an ABI designated offset in the linkage area and the
5371 // linker will rewrite the nop to be a load of the TOC pointer from the
5372 // linkage area into gpr2.
5373 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5374 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5375 RetOpc =
5376 callsShareTOCBase(Caller: &Caller, CalleeGV: GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5377 } else
5378 RetOpc = PPCISD::CALL;
5379 if (IsStrictFPCall) {
5380 switch (RetOpc) {
5381 default:
5382 llvm_unreachable("Unknown call opcode");
5383 case PPCISD::BCTRL_LOAD_TOC:
5384 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5385 break;
5386 case PPCISD::BCTRL:
5387 RetOpc = PPCISD::BCTRL_RM;
5388 break;
5389 case PPCISD::CALL_NOTOC:
5390 RetOpc = PPCISD::CALL_NOTOC_RM;
5391 break;
5392 case PPCISD::CALL:
5393 RetOpc = PPCISD::CALL_RM;
5394 break;
5395 case PPCISD::CALL_NOP:
5396 RetOpc = PPCISD::CALL_NOP_RM;
5397 break;
5398 }
5399 }
5400 return RetOpc;
5401}
5402
5403static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5404 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5405 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5406 if (SDNode *Dest = isBLACompatibleAddress(Op: Callee, DAG))
5407 return SDValue(Dest, 0);
5408
5409 // Returns true if the callee is local, and false otherwise.
5410 auto isLocalCallee = [&]() {
5411 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5412 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5413
5414 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5415 !isa_and_nonnull<GlobalIFunc>(Val: GV);
5416 };
5417
5418 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5419 // a static relocation model causes some versions of GNU LD (2.17.50, at
5420 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5421 // built with secure-PLT.
5422 bool UsePlt =
5423 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5424 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5425
5426 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5427 const TargetMachine &TM = Subtarget.getTargetMachine();
5428 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5429 auto *S =
5430 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(Func: GV, TM));
5431
5432 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5433 return DAG.getMCSymbol(Sym: S, VT: PtrVT);
5434 };
5435
5436 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5437 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5438 if (isFunctionGlobalAddress(GV)) {
5439 const GlobalValue *GV = cast<GlobalAddressSDNode>(Val: Callee)->getGlobal();
5440
5441 if (Subtarget.isAIXABI()) {
5442 return getAIXFuncEntryPointSymbolSDNode(GV);
5443 }
5444 return DAG.getTargetGlobalAddress(GV, DL: dl, VT: Callee.getValueType(), offset: 0,
5445 TargetFlags: UsePlt ? PPCII::MO_PLT : 0);
5446 }
5447
5448 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val: Callee)) {
5449 const char *SymName = S->getSymbol();
5450 if (Subtarget.isAIXABI()) {
5451 // If there exists a user-declared function whose name is the same as the
5452 // ExternalSymbol's, then we pick up the user-declared version.
5453 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5454 if (const Function *F =
5455 dyn_cast_or_null<Function>(Val: Mod->getNamedValue(Name: SymName)))
5456 return getAIXFuncEntryPointSymbolSDNode(F);
5457
5458 // On AIX, direct function calls reference the symbol for the function's
5459 // entry point, which is named by prepending a "." before the function's
5460 // C-linkage name. A Qualname is returned here because an external
5461 // function entry point is a csect with XTY_ER property.
5462 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5463 auto &Context = DAG.getMachineFunction().getContext();
5464 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5465 Section: (Twine(".") + Twine(SymName)).str(), K: SectionKind::getMetadata(),
5466 CsectProp: XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5467 return Sec->getQualNameSymbol();
5468 };
5469
5470 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5471 }
5472 return DAG.getTargetExternalSymbol(Sym: SymName, VT: Callee.getValueType(),
5473 TargetFlags: UsePlt ? PPCII::MO_PLT : 0);
5474 }
5475
5476 // No transformation needed.
5477 assert(Callee.getNode() && "What no callee?");
5478 return Callee;
5479}
5480
5481static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5482 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5483 "Expected a CALLSEQ_STARTSDNode.");
5484
5485 // The last operand is the chain, except when the node has glue. If the node
5486 // has glue, then the last operand is the glue, and the chain is the second
5487 // last operand.
5488 SDValue LastValue = CallSeqStart.getValue(R: CallSeqStart->getNumValues() - 1);
5489 if (LastValue.getValueType() != MVT::Glue)
5490 return LastValue;
5491
5492 return CallSeqStart.getValue(R: CallSeqStart->getNumValues() - 2);
5493}
5494
5495// Creates the node that moves a functions address into the count register
5496// to prepare for an indirect call instruction.
5497static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5498 SDValue &Glue, SDValue &Chain,
5499 const SDLoc &dl) {
5500 SDValue MTCTROps[] = {Chain, Callee, Glue};
5501 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5502 Chain = DAG.getNode(Opcode: PPCISD::MTCTR, DL: dl, ResultTys: ReturnTypes,
5503 Ops: ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5504 // The glue is the second value produced.
5505 Glue = Chain.getValue(R: 1);
5506}
5507
5508static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5509 SDValue &Glue, SDValue &Chain,
5510 SDValue CallSeqStart,
5511 const CallBase *CB, const SDLoc &dl,
5512 bool hasNest,
5513 const PPCSubtarget &Subtarget) {
5514 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5515 // entry point, but to the function descriptor (the function entry point
5516 // address is part of the function descriptor though).
5517 // The function descriptor is a three doubleword structure with the
5518 // following fields: function entry point, TOC base address and
5519 // environment pointer.
5520 // Thus for a call through a function pointer, the following actions need
5521 // to be performed:
5522 // 1. Save the TOC of the caller in the TOC save area of its stack
5523 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5524 // 2. Load the address of the function entry point from the function
5525 // descriptor.
5526 // 3. Load the TOC of the callee from the function descriptor into r2.
5527 // 4. Load the environment pointer from the function descriptor into
5528 // r11.
5529 // 5. Branch to the function entry point address.
5530 // 6. On return of the callee, the TOC of the caller needs to be
5531 // restored (this is done in FinishCall()).
5532 //
5533 // The loads are scheduled at the beginning of the call sequence, and the
5534 // register copies are flagged together to ensure that no other
5535 // operations can be scheduled in between. E.g. without flagging the
5536 // copies together, a TOC access in the caller could be scheduled between
5537 // the assignment of the callee TOC and the branch to the callee, which leads
5538 // to incorrect code.
5539
5540 // Start by loading the function address from the descriptor.
5541 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5542 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5543 ? (MachineMemOperand::MODereferenceable |
5544 MachineMemOperand::MOInvariant)
5545 : MachineMemOperand::MONone;
5546
5547 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5548
5549 // Registers used in building the DAG.
5550 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5551 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5552
5553 // Offsets of descriptor members.
5554 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5555 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5556
5557 const MVT RegVT = Subtarget.getScalarIntVT();
5558 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5559
5560 // One load for the functions entry point address.
5561 SDValue LoadFuncPtr = DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: Callee, PtrInfo: MPI,
5562 Alignment, MMOFlags);
5563
5564 // One for loading the TOC anchor for the module that contains the called
5565 // function.
5566 SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCAnchorOffset, DL: dl);
5567 SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: TOCOff);
5568 SDValue TOCPtr =
5569 DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddTOC,
5570 PtrInfo: MPI.getWithOffset(O: TOCAnchorOffset), Alignment, MMOFlags);
5571
5572 // One for loading the environment pointer.
5573 SDValue PtrOff = DAG.getIntPtrConstant(Val: EnvPtrOffset, DL: dl);
5574 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: PtrOff);
5575 SDValue LoadEnvPtr =
5576 DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddPtr,
5577 PtrInfo: MPI.getWithOffset(O: EnvPtrOffset), Alignment, MMOFlags);
5578
5579
5580 // Then copy the newly loaded TOC anchor to the TOC pointer.
5581 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, Reg: TOCReg, N: TOCPtr, Glue);
5582 Chain = TOCVal.getValue(R: 0);
5583 Glue = TOCVal.getValue(R: 1);
5584
5585 // If the function call has an explicit 'nest' parameter, it takes the
5586 // place of the environment pointer.
5587 assert((!hasNest || !Subtarget.isAIXABI()) &&
5588 "Nest parameter is not supported on AIX.");
5589 if (!hasNest) {
5590 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, Reg: EnvPtrReg, N: LoadEnvPtr, Glue);
5591 Chain = EnvVal.getValue(R: 0);
5592 Glue = EnvVal.getValue(R: 1);
5593 }
5594
5595 // The rest of the indirect call sequence is the same as the non-descriptor
5596 // DAG.
5597 prepareIndirectCall(DAG, Callee&: LoadFuncPtr, Glue, Chain, dl);
5598}
5599
5600static void
5601buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5602 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5603 SelectionDAG &DAG,
5604 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5605 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5606 const PPCSubtarget &Subtarget) {
5607 const bool IsPPC64 = Subtarget.isPPC64();
5608 // MVT for a general purpose register.
5609 const MVT RegVT = Subtarget.getScalarIntVT();
5610
5611 // First operand is always the chain.
5612 Ops.push_back(Elt: Chain);
5613
5614 // If it's a direct call pass the callee as the second operand.
5615 if (!CFlags.IsIndirect)
5616 Ops.push_back(Elt: Callee);
5617 else {
5618 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5619
5620 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5621 // on the stack (this would have been done in `LowerCall_64SVR4` or
5622 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5623 // represents both the indirect branch and a load that restores the TOC
5624 // pointer from the linkage area. The operand for the TOC restore is an add
5625 // of the TOC save offset to the stack pointer. This must be the second
5626 // operand: after the chain input but before any other variadic arguments.
5627 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5628 // saved or used.
5629 if (isTOCSaveRestoreRequired(Subtarget)) {
5630 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5631
5632 SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: RegVT);
5633 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5634 SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
5635 SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: StackPtr, N2: TOCOff);
5636 Ops.push_back(Elt: AddTOC);
5637 }
5638
5639 // Add the register used for the environment pointer.
5640 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5641 Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getEnvironmentPointerRegister(),
5642 VT: RegVT));
5643
5644
5645 // Add CTR register as callee so a bctr can be emitted later.
5646 if (CFlags.IsTailCall)
5647 Ops.push_back(Elt: DAG.getRegister(Reg: IsPPC64 ? PPC::CTR8 : PPC::CTR, VT: RegVT));
5648 }
5649
5650 // If this is a tail call add stack pointer delta.
5651 if (CFlags.IsTailCall)
5652 Ops.push_back(Elt: DAG.getConstant(Val: SPDiff, DL: dl, VT: MVT::i32));
5653
5654 // Add argument registers to the end of the list so that they are known live
5655 // into the call.
5656 for (const auto &[Reg, N] : RegsToPass)
5657 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
5658
5659 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5660 // no way to mark dependencies as implicit here.
5661 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5662 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5663 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5664 Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getTOCPointerRegister(), VT: RegVT));
5665
5666 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5667 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5668 Ops.push_back(Elt: DAG.getRegister(Reg: PPC::CR1EQ, VT: MVT::i32));
5669
5670 // Add a register mask operand representing the call-preserved registers.
5671 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5672 const uint32_t *Mask =
5673 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CFlags.CallConv);
5674 assert(Mask && "Missing call preserved mask for calling convention");
5675 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5676
5677 // If the glue is valid, it is the last operand.
5678 if (Glue.getNode())
5679 Ops.push_back(Elt: Glue);
5680}
5681
5682SDValue PPCTargetLowering::FinishCall(
5683 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5684 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5685 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5686 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5687 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5688
5689 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5690 Subtarget.isAIXABI())
5691 setUsesTOCBasePtr(DAG);
5692
5693 unsigned CallOpc =
5694 getCallOpcode(CFlags, Caller: DAG.getMachineFunction().getFunction(), Callee,
5695 Subtarget, TM: DAG.getTarget(), IsStrictFPCall: CB ? CB->isStrictFP() : false);
5696
5697 if (!CFlags.IsIndirect)
5698 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5699 else if (Subtarget.usesFunctionDescriptors())
5700 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5701 dl, hasNest: CFlags.HasNest, Subtarget);
5702 else
5703 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5704
5705 // Build the operand list for the call instruction.
5706 SmallVector<SDValue, 8> Ops;
5707 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5708 SPDiff, Subtarget);
5709
5710 // Emit tail call.
5711 if (CFlags.IsTailCall) {
5712 // Indirect tail call when using PC Relative calls do not have the same
5713 // constraints.
5714 assert(((Callee.getOpcode() == ISD::Register &&
5715 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5716 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5717 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5718 isa<ConstantSDNode>(Callee) ||
5719 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5720 "Expecting a global address, external symbol, absolute value, "
5721 "register or an indirect tail call when PC Relative calls are "
5722 "used.");
5723 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5724 assert(CallOpc == PPCISD::TC_RETURN &&
5725 "Unexpected call opcode for a tail call.");
5726 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5727 SDValue Ret = DAG.getNode(Opcode: CallOpc, DL: dl, VT: MVT::Other, Ops);
5728 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CFlags.NoMerge);
5729 return Ret;
5730 }
5731
5732 std::array<EVT, 2> ReturnTypes = {._M_elems: {MVT::Other, MVT::Glue}};
5733 Chain = DAG.getNode(Opcode: CallOpc, DL: dl, ResultTys: ReturnTypes, Ops);
5734 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CFlags.NoMerge);
5735 Glue = Chain.getValue(R: 1);
5736
5737 // When performing tail call optimization the callee pops its arguments off
5738 // the stack. Account for this here so these bytes can be pushed back on in
5739 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5740 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5741 getTargetMachine().Options.GuaranteedTailCallOpt)
5742 ? NumBytes
5743 : 0;
5744
5745 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: BytesCalleePops, Glue, DL: dl);
5746 Glue = Chain.getValue(R: 1);
5747
5748 return LowerCallResult(Chain, InGlue: Glue, CallConv: CFlags.CallConv, isVarArg: CFlags.IsVarArg, Ins, dl,
5749 DAG, InVals);
5750}
5751
5752bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5753 CallingConv::ID CalleeCC = CB->getCallingConv();
5754 const Function *CallerFunc = CB->getCaller();
5755 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5756 const Function *CalleeFunc = CB->getCalledFunction();
5757 if (!CalleeFunc)
5758 return false;
5759 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(Val: CalleeFunc);
5760
5761 SmallVector<ISD::OutputArg, 2> Outs;
5762 SmallVector<ISD::InputArg, 2> Ins;
5763
5764 GetReturnInfo(CC: CalleeCC, ReturnType: CalleeFunc->getReturnType(),
5765 attr: CalleeFunc->getAttributes(), Outs, TLI: *this,
5766 DL: CalleeFunc->getDataLayout());
5767
5768 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5769 isVarArg: CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5770 isCalleeExternalSymbol: false /*isCalleeExternalSymbol*/);
5771}
5772
5773bool PPCTargetLowering::isEligibleForTCO(
5774 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5775 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5776 const SmallVectorImpl<ISD::OutputArg> &Outs,
5777 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5778 bool isCalleeExternalSymbol) const {
5779 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5780 return false;
5781
5782 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5783 return IsEligibleForTailCallOptimization_64SVR4(
5784 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5785 isCalleeExternalSymbol);
5786 else
5787 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5788 isVarArg, Ins);
5789}
5790
5791SDValue
5792PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5793 SmallVectorImpl<SDValue> &InVals) const {
5794 SelectionDAG &DAG = CLI.DAG;
5795 SDLoc &dl = CLI.DL;
5796 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5797 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5798 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5799 SDValue Chain = CLI.Chain;
5800 SDValue Callee = CLI.Callee;
5801 bool &isTailCall = CLI.IsTailCall;
5802 CallingConv::ID CallConv = CLI.CallConv;
5803 bool isVarArg = CLI.IsVarArg;
5804 bool isPatchPoint = CLI.IsPatchPoint;
5805 const CallBase *CB = CLI.CB;
5806
5807 if (isTailCall) {
5808 MachineFunction &MF = DAG.getMachineFunction();
5809 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5810 auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
5811 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5812 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Val: Callee);
5813
5814 isTailCall =
5815 isEligibleForTCO(CalleeGV: GV, CalleeCC: CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5816 CallerFunc: &(MF.getFunction()), isCalleeExternalSymbol: IsCalleeExternalSymbol);
5817 if (isTailCall) {
5818 ++NumTailCalls;
5819 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5820 ++NumSiblingCalls;
5821
5822 // PC Relative calls no longer guarantee that the callee is a Global
5823 // Address Node. The callee could be an indirect tail call in which
5824 // case the SDValue for the callee could be a load (to load the address
5825 // of a function pointer) or it may be a register copy (to move the
5826 // address of the callee from a function parameter into a virtual
5827 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5828 assert((Subtarget.isUsingPCRelativeCalls() ||
5829 isa<GlobalAddressSDNode>(Callee)) &&
5830 "Callee should be an llvm::Function object.");
5831
5832 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5833 << "\nTCO callee: ");
5834 LLVM_DEBUG(Callee.dump());
5835 }
5836 }
5837
5838 if (!isTailCall && CB && CB->isMustTailCall())
5839 report_fatal_error(reason: "failed to perform tail call elimination on a call "
5840 "site marked musttail");
5841
5842 // When long calls (i.e. indirect calls) are always used, calls are always
5843 // made via function pointer. If we have a function name, first translate it
5844 // into a pointer.
5845 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Val: Callee) &&
5846 !isTailCall)
5847 Callee = LowerGlobalAddress(Op: Callee, DAG);
5848
5849 CallFlags CFlags(
5850 CallConv, isTailCall, isVarArg, isPatchPoint,
5851 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5852 // hasNest
5853 Subtarget.is64BitELFABI() &&
5854 any_of(Range&: Outs, P: [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5855 CLI.NoMerge);
5856
5857 if (Subtarget.isAIXABI())
5858 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5859 InVals, CB);
5860
5861 assert(Subtarget.isSVR4ABI());
5862 if (Subtarget.isPPC64())
5863 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5864 InVals, CB);
5865 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5866 InVals, CB);
5867}
5868
5869SDValue PPCTargetLowering::LowerCall_32SVR4(
5870 SDValue Chain, SDValue Callee, CallFlags CFlags,
5871 const SmallVectorImpl<ISD::OutputArg> &Outs,
5872 const SmallVectorImpl<SDValue> &OutVals,
5873 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5874 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5875 const CallBase *CB) const {
5876 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5877 // of the 32-bit SVR4 ABI stack frame layout.
5878
5879 const CallingConv::ID CallConv = CFlags.CallConv;
5880 const bool IsVarArg = CFlags.IsVarArg;
5881 const bool IsTailCall = CFlags.IsTailCall;
5882
5883 assert((CallConv == CallingConv::C ||
5884 CallConv == CallingConv::Cold ||
5885 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5886
5887 const Align PtrAlign(4);
5888
5889 MachineFunction &MF = DAG.getMachineFunction();
5890
5891 // Mark this function as potentially containing a function that contains a
5892 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5893 // and restoring the callers stack pointer in this functions epilog. This is
5894 // done because by tail calling the called function might overwrite the value
5895 // in this function's (MF) stack pointer stack slot 0(SP).
5896 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5897 CallConv == CallingConv::Fast)
5898 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5899
5900 // Count how many bytes are to be pushed on the stack, including the linkage
5901 // area, parameter list area and the part of the local variable space which
5902 // contains copies of aggregates which are passed by value.
5903
5904 // Assign locations to all of the outgoing arguments.
5905 SmallVector<CCValAssign, 16> ArgLocs;
5906 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5907
5908 // Reserve space for the linkage area on the stack.
5909 CCInfo.AllocateStack(Size: Subtarget.getFrameLowering()->getLinkageSize(),
5910 Alignment: PtrAlign);
5911
5912 if (IsVarArg) {
5913 // Handle fixed and variable vector arguments differently.
5914 // Fixed vector arguments go into registers as long as registers are
5915 // available. Variable vector arguments always go into memory.
5916 unsigned NumArgs = Outs.size();
5917
5918 for (unsigned i = 0; i != NumArgs; ++i) {
5919 MVT ArgVT = Outs[i].VT;
5920 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5921 bool Result;
5922
5923 if (!ArgFlags.isVarArg()) {
5924 Result = CC_PPC32_SVR4(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full, ArgFlags,
5925 OrigTy: Outs[i].OrigTy, State&: CCInfo);
5926 } else {
5927 Result = CC_PPC32_SVR4_VarArg(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full,
5928 ArgFlags, OrigTy: Outs[i].OrigTy, State&: CCInfo);
5929 }
5930
5931 if (Result) {
5932#ifndef NDEBUG
5933 errs() << "Call operand #" << i << " has unhandled type "
5934 << ArgVT << "\n";
5935#endif
5936 llvm_unreachable(nullptr);
5937 }
5938 }
5939 } else {
5940 // All arguments are treated the same.
5941 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4);
5942 }
5943
5944 // Assign locations to all of the outgoing aggregate by value arguments.
5945 SmallVector<CCValAssign, 16> ByValArgLocs;
5946 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5947
5948 // Reserve stack space for the allocations in CCInfo.
5949 CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
5950
5951 CCByValInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4_ByVal);
5952
5953 // Size of the linkage area, parameter list area and the part of the local
5954 // space variable where copies of aggregates which are passed by value are
5955 // stored.
5956 unsigned NumBytes = CCByValInfo.getStackSize();
5957
5958 // Calculate by how many bytes the stack has to be adjusted in case of tail
5959 // call optimization.
5960 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: IsTailCall, ParamSize: NumBytes);
5961
5962 // Adjust the stack pointer for the new arguments...
5963 // These operations are automatically eliminated by the prolog/epilog pass
5964 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
5965 SDValue CallSeqStart = Chain;
5966
5967 // Load the return address and frame pointer so it can be moved somewhere else
5968 // later.
5969 SDValue LROp, FPOp;
5970 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
5971
5972 // Set up a copy of the stack pointer for use loading and storing any
5973 // arguments that may not fit in the registers available for argument
5974 // passing.
5975 SDValue StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5976
5977 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5978 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5979 SmallVector<SDValue, 8> MemOpChains;
5980
5981 bool seenFloatArg = false;
5982 // Walk the register/memloc assignments, inserting copies/loads.
5983 // i - Tracks the index into the list of registers allocated for the call
5984 // RealArgIdx - Tracks the index into the list of actual function arguments
5985 // j - Tracks the index into the list of byval arguments
5986 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5987 i != e;
5988 ++i, ++RealArgIdx) {
5989 CCValAssign &VA = ArgLocs[i];
5990 SDValue Arg = OutVals[RealArgIdx];
5991 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5992
5993 if (Flags.isByVal()) {
5994 // Argument is an aggregate which is passed by value, thus we need to
5995 // create a copy of it in the local variable space of the current stack
5996 // frame (which is the stack frame of the caller) and pass the address of
5997 // this copy to the callee.
5998 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5999 CCValAssign &ByValVA = ByValArgLocs[j++];
6000 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6001
6002 // Memory reserved in the local variable space of the callers stack frame.
6003 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6004
6005 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6006 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6007 N1: StackPtr, N2: PtrOff);
6008
6009 // Create a copy of the argument in the local area of the current
6010 // stack frame.
6011 SDValue MemcpyCall =
6012 CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6013 Chain: CallSeqStart.getNode()->getOperand(Num: 0),
6014 Flags, DAG, dl);
6015
6016 // This must go outside the CALLSEQ_START..END.
6017 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: NumBytes, OutSize: 0,
6018 DL: SDLoc(MemcpyCall));
6019 DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6020 To: NewCallSeqStart.getNode());
6021 Chain = CallSeqStart = NewCallSeqStart;
6022
6023 // Pass the address of the aggregate copy on the stack either in a
6024 // physical register or in the parameter list area of the current stack
6025 // frame to the callee.
6026 Arg = PtrOff;
6027 }
6028
6029 // When useCRBits() is true, there can be i1 arguments.
6030 // It is because getRegisterType(MVT::i1) => MVT::i1,
6031 // and for other integer types getRegisterType() => MVT::i32.
6032 // Extend i1 and ensure callee will get i32.
6033 if (Arg.getValueType() == MVT::i1)
6034 Arg = DAG.getNode(Opcode: Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6035 DL: dl, VT: MVT::i32, Operand: Arg);
6036
6037 if (VA.isRegLoc()) {
6038 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6039 // Put argument in a physical register.
6040 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6041 bool IsLE = Subtarget.isLittleEndian();
6042 SDValue SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6043 N2: DAG.getIntPtrConstant(Val: IsLE ? 0 : 1, DL: dl));
6044 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y: SVal.getValue(R: 0)));
6045 SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6046 N2: DAG.getIntPtrConstant(Val: IsLE ? 1 : 0, DL: dl));
6047 RegsToPass.push_back(Elt: std::make_pair(x: ArgLocs[++i].getLocReg(),
6048 y: SVal.getValue(R: 0)));
6049 } else
6050 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
6051 } else {
6052 // Put argument in the parameter list area of the current stack frame.
6053 assert(VA.isMemLoc());
6054 unsigned LocMemOffset = VA.getLocMemOffset();
6055
6056 if (!IsTailCall) {
6057 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6058 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6059 N1: StackPtr, N2: PtrOff);
6060
6061 MemOpChains.push_back(
6062 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
6063 } else {
6064 // Calculate and remember argument location.
6065 CalculateTailCallArgDest(DAG, MF, IsPPC64: false, Arg, SPDiff, ArgOffset: LocMemOffset,
6066 TailCallArguments);
6067 }
6068 }
6069 }
6070
6071 if (!MemOpChains.empty())
6072 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6073
6074 // Build a sequence of copy-to-reg nodes chained together with token chain
6075 // and flag operands which copy the outgoing args into the appropriate regs.
6076 SDValue InGlue;
6077 for (const auto &[Reg, N] : RegsToPass) {
6078 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6079 InGlue = Chain.getValue(R: 1);
6080 }
6081
6082 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6083 // registers.
6084 if (IsVarArg) {
6085 SDVTList VTs = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
6086 SDValue Ops[] = { Chain, InGlue };
6087
6088 Chain = DAG.getNode(Opcode: seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, DL: dl,
6089 VTList: VTs, Ops: ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6090
6091 InGlue = Chain.getValue(R: 1);
6092 }
6093
6094 if (IsTailCall)
6095 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6096 TailCallArguments);
6097
6098 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6099 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6100}
6101
6102// Copy an argument into memory, being careful to do this outside the
6103// call sequence for the call to which the argument belongs.
6104SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6105 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6106 SelectionDAG &DAG, const SDLoc &dl) const {
6107 SDValue MemcpyCall = CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6108 Chain: CallSeqStart.getNode()->getOperand(Num: 0),
6109 Flags, DAG, dl);
6110 // The MEMCPY must go outside the CALLSEQ_START..END.
6111 int64_t FrameSize = CallSeqStart.getConstantOperandVal(i: 1);
6112 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: FrameSize, OutSize: 0,
6113 DL: SDLoc(MemcpyCall));
6114 DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6115 To: NewCallSeqStart.getNode());
6116 return NewCallSeqStart;
6117}
6118
6119SDValue PPCTargetLowering::LowerCall_64SVR4(
6120 SDValue Chain, SDValue Callee, CallFlags CFlags,
6121 const SmallVectorImpl<ISD::OutputArg> &Outs,
6122 const SmallVectorImpl<SDValue> &OutVals,
6123 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6124 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6125 const CallBase *CB) const {
6126 bool isELFv2ABI = Subtarget.isELFv2ABI();
6127 bool isLittleEndian = Subtarget.isLittleEndian();
6128 unsigned NumOps = Outs.size();
6129 bool IsSibCall = false;
6130 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6131
6132 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
6133 unsigned PtrByteSize = 8;
6134
6135 MachineFunction &MF = DAG.getMachineFunction();
6136
6137 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6138 IsSibCall = true;
6139
6140 // Mark this function as potentially containing a function that contains a
6141 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6142 // and restoring the callers stack pointer in this functions epilog. This is
6143 // done because by tail calling the called function might overwrite the value
6144 // in this function's (MF) stack pointer stack slot 0(SP).
6145 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6146 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6147
6148 assert(!(IsFastCall && CFlags.IsVarArg) &&
6149 "fastcc not supported on varargs functions");
6150
6151 // Count how many bytes are to be pushed on the stack, including the linkage
6152 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6153 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6154 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6155 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6156 unsigned NumBytes = LinkageSize;
6157 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6158
6159 static const MCPhysReg GPR[] = {
6160 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6161 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6162 };
6163 static const MCPhysReg VR[] = {
6164 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6165 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6166 };
6167
6168 const unsigned NumGPRs = std::size(GPR);
6169 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6170 const unsigned NumVRs = std::size(VR);
6171
6172 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6173 // can be passed to the callee in registers.
6174 // For the fast calling convention, there is another check below.
6175 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6176 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6177 if (!HasParameterArea) {
6178 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6179 unsigned AvailableFPRs = NumFPRs;
6180 unsigned AvailableVRs = NumVRs;
6181 unsigned NumBytesTmp = NumBytes;
6182 for (unsigned i = 0; i != NumOps; ++i) {
6183 if (Outs[i].Flags.isNest()) continue;
6184 if (CalculateStackSlotUsed(ArgVT: Outs[i].VT, OrigVT: Outs[i].ArgVT, Flags: Outs[i].Flags,
6185 PtrByteSize, LinkageSize, ParamAreaSize,
6186 ArgOffset&: NumBytesTmp, AvailableFPRs, AvailableVRs))
6187 HasParameterArea = true;
6188 }
6189 }
6190
6191 // When using the fast calling convention, we don't provide backing for
6192 // arguments that will be in registers.
6193 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6194
6195 // Avoid allocating parameter area for fastcc functions if all the arguments
6196 // can be passed in the registers.
6197 if (IsFastCall)
6198 HasParameterArea = false;
6199
6200 // Add up all the space actually used.
6201 for (unsigned i = 0; i != NumOps; ++i) {
6202 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6203 EVT ArgVT = Outs[i].VT;
6204 EVT OrigVT = Outs[i].ArgVT;
6205
6206 if (Flags.isNest())
6207 continue;
6208
6209 if (IsFastCall) {
6210 if (Flags.isByVal()) {
6211 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6212 if (NumGPRsUsed > NumGPRs)
6213 HasParameterArea = true;
6214 } else {
6215 switch (ArgVT.getSimpleVT().SimpleTy) {
6216 default: llvm_unreachable("Unexpected ValueType for argument!");
6217 case MVT::i1:
6218 case MVT::i32:
6219 case MVT::i64:
6220 if (++NumGPRsUsed <= NumGPRs)
6221 continue;
6222 break;
6223 case MVT::v4i32:
6224 case MVT::v8i16:
6225 case MVT::v16i8:
6226 case MVT::v2f64:
6227 case MVT::v2i64:
6228 case MVT::v1i128:
6229 case MVT::f128:
6230 if (++NumVRsUsed <= NumVRs)
6231 continue;
6232 break;
6233 case MVT::v4f32:
6234 if (++NumVRsUsed <= NumVRs)
6235 continue;
6236 break;
6237 case MVT::f32:
6238 case MVT::f64:
6239 if (++NumFPRsUsed <= NumFPRs)
6240 continue;
6241 break;
6242 }
6243 HasParameterArea = true;
6244 }
6245 }
6246
6247 /* Respect alignment of argument on the stack. */
6248 auto Alignement =
6249 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6250 NumBytes = alignTo(Size: NumBytes, A: Alignement);
6251
6252 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6253 if (Flags.isInConsecutiveRegsLast())
6254 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6255 }
6256
6257 unsigned NumBytesActuallyUsed = NumBytes;
6258
6259 // In the old ELFv1 ABI,
6260 // the prolog code of the callee may store up to 8 GPR argument registers to
6261 // the stack, allowing va_start to index over them in memory if its varargs.
6262 // Because we cannot tell if this is needed on the caller side, we have to
6263 // conservatively assume that it is needed. As such, make sure we have at
6264 // least enough stack space for the caller to store the 8 GPRs.
6265 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6266 // really requires memory operands, e.g. a vararg function.
6267 if (HasParameterArea)
6268 NumBytes = std::max(a: NumBytes, b: LinkageSize + 8 * PtrByteSize);
6269 else
6270 NumBytes = LinkageSize;
6271
6272 // Tail call needs the stack to be aligned.
6273 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6274 NumBytes = EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes);
6275
6276 int SPDiff = 0;
6277
6278 // Calculate by how many bytes the stack has to be adjusted in case of tail
6279 // call optimization.
6280 if (!IsSibCall)
6281 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: CFlags.IsTailCall, ParamSize: NumBytes);
6282
6283 // To protect arguments on the stack from being clobbered in a tail call,
6284 // force all the loads to happen before doing any other lowering.
6285 if (CFlags.IsTailCall)
6286 Chain = DAG.getStackArgumentTokenFactor(Chain);
6287
6288 // Adjust the stack pointer for the new arguments...
6289 // These operations are automatically eliminated by the prolog/epilog pass
6290 if (!IsSibCall)
6291 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
6292 SDValue CallSeqStart = Chain;
6293
6294 // Load the return address and frame pointer so it can be move somewhere else
6295 // later.
6296 SDValue LROp, FPOp;
6297 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6298
6299 // Set up a copy of the stack pointer for use loading and storing any
6300 // arguments that may not fit in the registers available for argument
6301 // passing.
6302 SDValue StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
6303
6304 // Figure out which arguments are going to go in registers, and which in
6305 // memory. Also, if this is a vararg function, floating point operations
6306 // must be stored to our stack, and loaded into integer regs as well, if
6307 // any integer regs are available for argument passing.
6308 unsigned ArgOffset = LinkageSize;
6309
6310 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6311 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6312
6313 SmallVector<SDValue, 8> MemOpChains;
6314 for (unsigned i = 0; i != NumOps; ++i) {
6315 SDValue Arg = OutVals[i];
6316 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6317 EVT ArgVT = Outs[i].VT;
6318 EVT OrigVT = Outs[i].ArgVT;
6319
6320 // PtrOff will be used to store the current argument to the stack if a
6321 // register cannot be found for it.
6322 SDValue PtrOff;
6323
6324 // We re-align the argument offset for each argument, except when using the
6325 // fast calling convention, when we need to make sure we do that only when
6326 // we'll actually use a stack slot.
6327 auto ComputePtrOff = [&]() {
6328 /* Respect alignment of argument on the stack. */
6329 auto Alignment =
6330 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6331 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
6332
6333 PtrOff = DAG.getConstant(Val: ArgOffset, DL: dl, VT: StackPtr.getValueType());
6334
6335 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6336 };
6337
6338 if (!IsFastCall) {
6339 ComputePtrOff();
6340
6341 /* Compute GPR index associated with argument offset. */
6342 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6343 GPR_idx = std::min(a: GPR_idx, b: NumGPRs);
6344 }
6345
6346 // Promote integers to 64-bit values.
6347 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6348 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6349 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6350 Arg = DAG.getNode(Opcode: ExtOp, DL: dl, VT: MVT::i64, Operand: Arg);
6351 }
6352
6353 // FIXME memcpy is used way more than necessary. Correctness first.
6354 // Note: "by value" is code for passing a structure by value, not
6355 // basic types.
6356 if (Flags.isByVal()) {
6357 // Note: Size includes alignment padding, so
6358 // struct x { short a; char b; }
6359 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6360 // These are the proper values we need for right-justifying the
6361 // aggregate in a parameter register.
6362 unsigned Size = Flags.getByValSize();
6363
6364 // An empty aggregate parameter takes up no storage and no
6365 // registers.
6366 if (Size == 0)
6367 continue;
6368
6369 if (IsFastCall)
6370 ComputePtrOff();
6371
6372 // All aggregates smaller than 8 bytes must be passed right-justified.
6373 if (Size==1 || Size==2 || Size==4) {
6374 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6375 if (GPR_idx != NumGPRs) {
6376 SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: Arg,
6377 PtrInfo: MachinePointerInfo(), MemVT: VT);
6378 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6379 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6380
6381 ArgOffset += PtrByteSize;
6382 continue;
6383 }
6384 }
6385
6386 if (GPR_idx == NumGPRs && Size < 8) {
6387 SDValue AddPtr = PtrOff;
6388 if (!isLittleEndian) {
6389 SDValue Const = DAG.getConstant(Val: PtrByteSize - Size, DL: dl,
6390 VT: PtrOff.getValueType());
6391 AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6392 }
6393 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6394 CallSeqStart,
6395 Flags, DAG, dl);
6396 ArgOffset += PtrByteSize;
6397 continue;
6398 }
6399 // Copy the object to parameter save area if it can not be entirely passed
6400 // by registers.
6401 // FIXME: we only need to copy the parts which need to be passed in
6402 // parameter save area. For the parts passed by registers, we don't need
6403 // to copy them to the stack although we need to allocate space for them
6404 // in parameter save area.
6405 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6406 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6407 CallSeqStart,
6408 Flags, DAG, dl);
6409
6410 // When a register is available, pass a small aggregate right-justified.
6411 if (Size < 8 && GPR_idx != NumGPRs) {
6412 // The easiest way to get this right-justified in a register
6413 // is to copy the structure into the rightmost portion of a
6414 // local variable slot, then load the whole slot into the
6415 // register.
6416 // FIXME: The memcpy seems to produce pretty awful code for
6417 // small aggregates, particularly for packed ones.
6418 // FIXME: It would be preferable to use the slot in the
6419 // parameter save area instead of a new local variable.
6420 SDValue AddPtr = PtrOff;
6421 if (!isLittleEndian) {
6422 SDValue Const = DAG.getConstant(Val: 8 - Size, DL: dl, VT: PtrOff.getValueType());
6423 AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6424 }
6425 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6426 CallSeqStart,
6427 Flags, DAG, dl);
6428
6429 // Load the slot into the register.
6430 SDValue Load =
6431 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6432 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6433 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6434
6435 // Done with this argument.
6436 ArgOffset += PtrByteSize;
6437 continue;
6438 }
6439
6440 // For aggregates larger than PtrByteSize, copy the pieces of the
6441 // object that fit into registers from the parameter save area.
6442 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6443 SDValue Const = DAG.getConstant(Val: j, DL: dl, VT: PtrOff.getValueType());
6444 SDValue AddArg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Arg, N2: Const);
6445 if (GPR_idx != NumGPRs) {
6446 unsigned LoadSizeInBits = std::min(a: PtrByteSize, b: (Size - j)) * 8;
6447 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadSizeInBits);
6448 SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: AddArg,
6449 PtrInfo: MachinePointerInfo(), MemVT: ObjType);
6450
6451 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6452 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6453 ArgOffset += PtrByteSize;
6454 } else {
6455 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6456 break;
6457 }
6458 }
6459 continue;
6460 }
6461
6462 switch (Arg.getSimpleValueType().SimpleTy) {
6463 default: llvm_unreachable("Unexpected ValueType for argument!");
6464 case MVT::i1:
6465 case MVT::i32:
6466 case MVT::i64:
6467 if (Flags.isNest()) {
6468 // The 'nest' parameter, if any, is passed in R11.
6469 RegsToPass.push_back(Elt: std::make_pair(x: PPC::X11, y&: Arg));
6470 break;
6471 }
6472
6473 // These can be scalar arguments or elements of an integer array type
6474 // passed directly. Clang may use those instead of "byval" aggregate
6475 // types to avoid forcing arguments to memory unnecessarily.
6476 if (GPR_idx != NumGPRs) {
6477 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Arg));
6478 } else {
6479 if (IsFastCall)
6480 ComputePtrOff();
6481
6482 assert(HasParameterArea &&
6483 "Parameter area must exist to pass an argument in memory.");
6484 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6485 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6486 TailCallArguments, dl);
6487 if (IsFastCall)
6488 ArgOffset += PtrByteSize;
6489 }
6490 if (!IsFastCall)
6491 ArgOffset += PtrByteSize;
6492 break;
6493 case MVT::f32:
6494 case MVT::f64: {
6495 // These can be scalar arguments or elements of a float array type
6496 // passed directly. The latter are used to implement ELFv2 homogenous
6497 // float aggregates.
6498
6499 // Named arguments go into FPRs first, and once they overflow, the
6500 // remaining arguments go into GPRs and then the parameter save area.
6501 // Unnamed arguments for vararg functions always go to GPRs and
6502 // then the parameter save area. For now, put all arguments to vararg
6503 // routines always in both locations (FPR *and* GPR or stack slot).
6504 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6505 bool NeededLoad = false;
6506
6507 // First load the argument into the next available FPR.
6508 if (FPR_idx != NumFPRs)
6509 RegsToPass.push_back(Elt: std::make_pair(x: FPR[FPR_idx++], y&: Arg));
6510
6511 // Next, load the argument into GPR or stack slot if needed.
6512 if (!NeedGPROrStack)
6513 ;
6514 else if (GPR_idx != NumGPRs && !IsFastCall) {
6515 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6516 // once we support fp <-> gpr moves.
6517
6518 // In the non-vararg case, this can only ever happen in the
6519 // presence of f32 array types, since otherwise we never run
6520 // out of FPRs before running out of GPRs.
6521 SDValue ArgVal;
6522
6523 // Double values are always passed in a single GPR.
6524 if (Arg.getValueType() != MVT::f32) {
6525 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: Arg);
6526
6527 // Non-array float values are extended and passed in a GPR.
6528 } else if (!Flags.isInConsecutiveRegs()) {
6529 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6530 ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6531
6532 // If we have an array of floats, we collect every odd element
6533 // together with its predecessor into one GPR.
6534 } else if (ArgOffset % PtrByteSize != 0) {
6535 SDValue Lo, Hi;
6536 Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: OutVals[i - 1]);
6537 Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6538 if (!isLittleEndian)
6539 std::swap(a&: Lo, b&: Hi);
6540 ArgVal = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
6541
6542 // The final element, if even, goes into the first half of a GPR.
6543 } else if (Flags.isInConsecutiveRegsLast()) {
6544 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6545 ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6546 if (!isLittleEndian)
6547 ArgVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: ArgVal,
6548 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
6549
6550 // Non-final even elements are skipped; they will be handled
6551 // together the with subsequent argument on the next go-around.
6552 } else
6553 ArgVal = SDValue();
6554
6555 if (ArgVal.getNode())
6556 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: ArgVal));
6557 } else {
6558 if (IsFastCall)
6559 ComputePtrOff();
6560
6561 // Single-precision floating-point values are mapped to the
6562 // second (rightmost) word of the stack doubleword.
6563 if (Arg.getValueType() == MVT::f32 &&
6564 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6565 SDValue ConstFour = DAG.getConstant(Val: 4, DL: dl, VT: PtrOff.getValueType());
6566 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: ConstFour);
6567 }
6568
6569 assert(HasParameterArea &&
6570 "Parameter area must exist to pass an argument in memory.");
6571 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6573 TailCallArguments, dl);
6574
6575 NeededLoad = true;
6576 }
6577 // When passing an array of floats, the array occupies consecutive
6578 // space in the argument area; only round up to the next doubleword
6579 // at the end of the array. Otherwise, each float takes 8 bytes.
6580 if (!IsFastCall || NeededLoad) {
6581 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6582 Flags.isInConsecutiveRegs()) ? 4 : 8;
6583 if (Flags.isInConsecutiveRegsLast())
6584 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6585 }
6586 break;
6587 }
6588 case MVT::v4f32:
6589 case MVT::v4i32:
6590 case MVT::v8i16:
6591 case MVT::v16i8:
6592 case MVT::v2f64:
6593 case MVT::v2i64:
6594 case MVT::v1i128:
6595 case MVT::f128:
6596 // These can be scalar arguments or elements of a vector array type
6597 // passed directly. The latter are used to implement ELFv2 homogenous
6598 // vector aggregates.
6599
6600 // For a varargs call, named arguments go into VRs or on the stack as
6601 // usual; unnamed arguments always go to the stack or the corresponding
6602 // GPRs when within range. For now, we always put the value in both
6603 // locations (or even all three).
6604 if (CFlags.IsVarArg) {
6605 assert(HasParameterArea &&
6606 "Parameter area must exist if we have a varargs call.");
6607 // We could elide this store in the case where the object fits
6608 // entirely in R registers. Maybe later.
6609 SDValue Store =
6610 DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6611 MemOpChains.push_back(Elt: Store);
6612 if (VR_idx != NumVRs) {
6613 SDValue Load =
6614 DAG.getLoad(VT: MVT::v4f32, dl, Chain: Store, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6615 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6616 RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Load));
6617 }
6618 ArgOffset += 16;
6619 for (unsigned i=0; i<16; i+=PtrByteSize) {
6620 if (GPR_idx == NumGPRs)
6621 break;
6622 SDValue Ix = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
6623 N2: DAG.getConstant(Val: i, DL: dl, VT: PtrVT));
6624 SDValue Load =
6625 DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Ix, PtrInfo: MachinePointerInfo());
6626 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6627 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6628 }
6629 break;
6630 }
6631
6632 // Non-varargs Altivec params go into VRs or on the stack.
6633 if (VR_idx != NumVRs) {
6634 RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Arg));
6635 } else {
6636 if (IsFastCall)
6637 ComputePtrOff();
6638
6639 assert(HasParameterArea &&
6640 "Parameter area must exist to pass an argument in memory.");
6641 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6642 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: true, MemOpChains,
6643 TailCallArguments, dl);
6644 if (IsFastCall)
6645 ArgOffset += 16;
6646 }
6647
6648 if (!IsFastCall)
6649 ArgOffset += 16;
6650 break;
6651 }
6652 }
6653
6654 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6655 "mismatch in size of parameter area");
6656 (void)NumBytesActuallyUsed;
6657
6658 if (!MemOpChains.empty())
6659 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6660
6661 // Check if this is an indirect call (MTCTR/BCTRL).
6662 // See prepareDescriptorIndirectCall and buildCallOperands for more
6663 // information about calls through function pointers in the 64-bit SVR4 ABI.
6664 if (CFlags.IsIndirect) {
6665 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6666 // caller in the TOC save area.
6667 if (isTOCSaveRestoreRequired(Subtarget)) {
6668 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6669 // Load r2 into a virtual register and store it to the TOC save area.
6670 setUsesTOCBasePtr(DAG);
6671 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: PPC::X2, VT: MVT::i64);
6672 // TOC save area offset.
6673 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6674 SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
6675 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6676 Chain = DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: AddPtr,
6677 PtrInfo: MachinePointerInfo::getStack(
6678 MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
6679 }
6680 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6681 // This does not mean the MTCTR instruction must use R12; it's easier
6682 // to model this as an extra parameter, so do that.
6683 if (isELFv2ABI && !CFlags.IsPatchPoint)
6684 RegsToPass.push_back(Elt: std::make_pair(x: (unsigned)PPC::X12, y&: Callee));
6685 }
6686
6687 // Build a sequence of copy-to-reg nodes chained together with token chain
6688 // and flag operands which copy the outgoing args into the appropriate regs.
6689 SDValue InGlue;
6690 for (const auto &[Reg, N] : RegsToPass) {
6691 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6692 InGlue = Chain.getValue(R: 1);
6693 }
6694
6695 if (CFlags.IsTailCall && !IsSibCall)
6696 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6697 TailCallArguments);
6698
6699 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6700 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6701}
6702
6703// Returns true when the shadow of a general purpose argument register
6704// in the parameter save area is aligned to at least 'RequiredAlign'.
6705static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6706 assert(RequiredAlign.value() <= 16 &&
6707 "Required alignment greater than stack alignment.");
6708 switch (Reg) {
6709 default:
6710 report_fatal_error(reason: "called on invalid register.");
6711 case PPC::R5:
6712 case PPC::R9:
6713 case PPC::X3:
6714 case PPC::X5:
6715 case PPC::X7:
6716 case PPC::X9:
6717 // These registers are 16 byte aligned which is the most strict aligment
6718 // we can support.
6719 return true;
6720 case PPC::R3:
6721 case PPC::R7:
6722 case PPC::X4:
6723 case PPC::X6:
6724 case PPC::X8:
6725 case PPC::X10:
6726 // The shadow of these registers in the PSA is 8 byte aligned.
6727 return RequiredAlign <= 8;
6728 case PPC::R4:
6729 case PPC::R6:
6730 case PPC::R8:
6731 case PPC::R10:
6732 return RequiredAlign <= 4;
6733 }
6734}
6735
6736static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6737 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6738 Type *OrigTy, CCState &State) {
6739 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6740 State.getMachineFunction().getSubtarget());
6741 const bool IsPPC64 = Subtarget.isPPC64();
6742 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6743 const Align PtrAlign(PtrSize);
6744 const Align StackAlign(16);
6745 const MVT RegVT = Subtarget.getScalarIntVT();
6746
6747 if (ValVT == MVT::f128)
6748 report_fatal_error(reason: "f128 is unimplemented on AIX.");
6749
6750 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6751 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6752 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6753 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6754 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6755 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6756
6757 static const MCPhysReg VR[] = {// Vector registers.
6758 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6759 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6760 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6761
6762 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6763
6764 if (ArgFlags.isNest()) {
6765 MCRegister EnvReg = State.AllocateReg(Reg: IsPPC64 ? PPC::X11 : PPC::R11);
6766 if (!EnvReg)
6767 report_fatal_error(reason: "More then one nest argument.");
6768 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: EnvReg, LocVT: RegVT, HTP: LocInfo));
6769 return false;
6770 }
6771
6772 if (ArgFlags.isByVal()) {
6773 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6774 if (ByValAlign > StackAlign)
6775 report_fatal_error(reason: "Pass-by-value arguments with alignment greater than "
6776 "16 are not supported.");
6777
6778 const unsigned ByValSize = ArgFlags.getByValSize();
6779 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6780
6781 // An empty aggregate parameter takes up no storage and no registers,
6782 // but needs a MemLoc for a stack slot for the formal arguments side.
6783 if (ByValSize == 0) {
6784 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6785 Offset: State.getStackSize(), LocVT: RegVT, HTP: LocInfo));
6786 return false;
6787 }
6788
6789 // Shadow allocate any registers that are not properly aligned.
6790 unsigned NextReg = State.getFirstUnallocated(Regs: GPRs);
6791 while (NextReg != GPRs.size() &&
6792 !isGPRShadowAligned(Reg: GPRs[NextReg], RequiredAlign: ObjAlign)) {
6793 // Shadow allocate next registers since its aligment is not strict enough.
6794 MCRegister Reg = State.AllocateReg(Regs: GPRs);
6795 // Allocate the stack space shadowed by said register.
6796 State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6797 assert(Reg && "Alocating register unexpectedly failed.");
6798 (void)Reg;
6799 NextReg = State.getFirstUnallocated(Regs: GPRs);
6800 }
6801
6802 const unsigned StackSize = alignTo(Size: ByValSize, A: ObjAlign);
6803 unsigned Offset = State.AllocateStack(Size: StackSize, Alignment: ObjAlign);
6804 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6805 if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6806 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6807 else {
6808 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6809 Offset, LocVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6810 HTP: LocInfo));
6811 break;
6812 }
6813 }
6814 return false;
6815 }
6816
6817 // Arguments always reserve parameter save area.
6818 switch (ValVT.SimpleTy) {
6819 default:
6820 report_fatal_error(reason: "Unhandled value type for argument.");
6821 case MVT::i64:
6822 // i64 arguments should have been split to i32 for PPC32.
6823 assert(IsPPC64 && "PPC32 should have split i64 values.");
6824 [[fallthrough]];
6825 case MVT::i1:
6826 case MVT::i32: {
6827 const unsigned Offset = State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6828 // AIX integer arguments are always passed in register width.
6829 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6830 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6831 : CCValAssign::LocInfo::ZExt;
6832 if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6833 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6834 else
6835 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT: RegVT, HTP: LocInfo));
6836
6837 return false;
6838 }
6839 case MVT::f32:
6840 case MVT::f64: {
6841 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6842 const unsigned StoreSize = LocVT.getStoreSize();
6843 // Floats are always 4-byte aligned in the PSA on AIX.
6844 // This includes f64 in 64-bit mode for ABI compatibility.
6845 const unsigned Offset =
6846 State.AllocateStack(Size: IsPPC64 ? 8 : StoreSize, Alignment: Align(4));
6847 MCRegister FReg = State.AllocateReg(Regs: FPR);
6848 if (FReg)
6849 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: FReg, LocVT, HTP: LocInfo));
6850
6851 // Reserve and initialize GPRs or initialize the PSA as required.
6852 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6853 if (MCRegister Reg = State.AllocateReg(Regs: GPRs)) {
6854 assert(FReg && "An FPR should be available when a GPR is reserved.");
6855 if (State.isVarArg()) {
6856 // Successfully reserved GPRs are only initialized for vararg calls.
6857 // Custom handling is required for:
6858 // f64 in PPC32 needs to be split into 2 GPRs.
6859 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6860 State.addLoc(
6861 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6862 }
6863 } else {
6864 // If there are insufficient GPRs, the PSA needs to be initialized.
6865 // Initialization occurs even if an FPR was initialized for
6866 // compatibility with the AIX XL compiler. The full memory for the
6867 // argument will be initialized even if a prior word is saved in GPR.
6868 // A custom memLoc is used when the argument also passes in FPR so
6869 // that the callee handling can skip over it easily.
6870 State.addLoc(
6871 V: FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6872 HTP: LocInfo)
6873 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6874 break;
6875 }
6876 }
6877
6878 return false;
6879 }
6880 case MVT::v4f32:
6881 case MVT::v4i32:
6882 case MVT::v8i16:
6883 case MVT::v16i8:
6884 case MVT::v2i64:
6885 case MVT::v2f64:
6886 case MVT::v1i128: {
6887 const unsigned VecSize = 16;
6888 const Align VecAlign(VecSize);
6889
6890 if (!State.isVarArg()) {
6891 // If there are vector registers remaining we don't consume any stack
6892 // space.
6893 if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
6894 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
6895 return false;
6896 }
6897 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6898 // might be allocated in the portion of the PSA that is shadowed by the
6899 // GPRs.
6900 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6901 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6902 return false;
6903 }
6904
6905 unsigned NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6906 // Burn any underaligned registers and their shadowed stack space until
6907 // we reach the required alignment.
6908 while (NextRegIndex != GPRs.size() &&
6909 !isGPRShadowAligned(Reg: GPRs[NextRegIndex], RequiredAlign: VecAlign)) {
6910 // Shadow allocate register and its stack shadow.
6911 MCRegister Reg = State.AllocateReg(Regs: GPRs);
6912 State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6913 assert(Reg && "Allocating register unexpectedly failed.");
6914 (void)Reg;
6915 NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6916 }
6917
6918 // Vectors that are passed as fixed arguments are handled differently.
6919 // They are passed in VRs if any are available (unlike arguments passed
6920 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6921 // functions)
6922 if (!ArgFlags.isVarArg()) {
6923 if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
6924 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
6925 // Shadow allocate GPRs and stack space even though we pass in a VR.
6926 for (unsigned I = 0; I != VecSize; I += PtrSize)
6927 State.AllocateReg(Regs: GPRs);
6928 State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6929 return false;
6930 }
6931 // No vector registers remain so pass on the stack.
6932 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6933 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6934 return false;
6935 }
6936
6937 // If all GPRS are consumed then we pass the argument fully on the stack.
6938 if (NextRegIndex == GPRs.size()) {
6939 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6940 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6941 return false;
6942 }
6943
6944 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6945 // half of the argument, and then need to pass the remaining half on the
6946 // stack.
6947 if (GPRs[NextRegIndex] == PPC::R9) {
6948 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6949 State.addLoc(
6950 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6951
6952 const MCRegister FirstReg = State.AllocateReg(Reg: PPC::R9);
6953 const MCRegister SecondReg = State.AllocateReg(Reg: PPC::R10);
6954 assert(FirstReg && SecondReg &&
6955 "Allocating R9 or R10 unexpectedly failed.");
6956 State.addLoc(
6957 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: FirstReg, LocVT: RegVT, HTP: LocInfo));
6958 State.addLoc(
6959 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: SecondReg, LocVT: RegVT, HTP: LocInfo));
6960 return false;
6961 }
6962
6963 // We have enough GPRs to fully pass the vector argument, and we have
6964 // already consumed any underaligned registers. Start with the custom
6965 // MemLoc and then the custom RegLocs.
6966 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6967 State.addLoc(
6968 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6969 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6970 const MCRegister Reg = State.AllocateReg(Regs: GPRs);
6971 assert(Reg && "Failed to allocated register for vararg vector argument");
6972 State.addLoc(
6973 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6974 }
6975 return false;
6976 }
6977 }
6978 return true;
6979}
6980
6981// So far, this function is only used by LowerFormalArguments_AIX()
6982static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6983 bool IsPPC64,
6984 bool HasP8Vector,
6985 bool HasVSX) {
6986 assert((IsPPC64 || SVT != MVT::i64) &&
6987 "i64 should have been split for 32-bit codegen.");
6988
6989 switch (SVT) {
6990 default:
6991 report_fatal_error(reason: "Unexpected value type for formal argument");
6992 case MVT::i1:
6993 case MVT::i32:
6994 case MVT::i64:
6995 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6996 case MVT::f32:
6997 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6998 case MVT::f64:
6999 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7000 case MVT::v4f32:
7001 case MVT::v4i32:
7002 case MVT::v8i16:
7003 case MVT::v16i8:
7004 case MVT::v2i64:
7005 case MVT::v2f64:
7006 case MVT::v1i128:
7007 return &PPC::VRRCRegClass;
7008 }
7009}
7010
7011static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7012 SelectionDAG &DAG, SDValue ArgValue,
7013 MVT LocVT, const SDLoc &dl) {
7014 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7015 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7016
7017 if (Flags.isSExt())
7018 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: LocVT, N1: ArgValue,
7019 N2: DAG.getValueType(ValVT));
7020 else if (Flags.isZExt())
7021 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: LocVT, N1: ArgValue,
7022 N2: DAG.getValueType(ValVT));
7023
7024 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ValVT, Operand: ArgValue);
7025}
7026
7027static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7028 const unsigned LASize = FL->getLinkageSize();
7029
7030 if (PPC::GPRCRegClass.contains(Reg)) {
7031 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7032 "Reg must be a valid argument register!");
7033 return LASize + 4 * (Reg - PPC::R3);
7034 }
7035
7036 if (PPC::G8RCRegClass.contains(Reg)) {
7037 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7038 "Reg must be a valid argument register!");
7039 return LASize + 8 * (Reg - PPC::X3);
7040 }
7041
7042 llvm_unreachable("Only general purpose registers expected.");
7043}
7044
7045// AIX ABI Stack Frame Layout:
7046//
7047// Low Memory +--------------------------------------------+
7048// SP +---> | Back chain | ---+
7049// | +--------------------------------------------+ |
7050// | | Saved Condition Register | |
7051// | +--------------------------------------------+ |
7052// | | Saved Linkage Register | |
7053// | +--------------------------------------------+ | Linkage Area
7054// | | Reserved for compilers | |
7055// | +--------------------------------------------+ |
7056// | | Reserved for binders | |
7057// | +--------------------------------------------+ |
7058// | | Saved TOC pointer | ---+
7059// | +--------------------------------------------+
7060// | | Parameter save area |
7061// | +--------------------------------------------+
7062// | | Alloca space |
7063// | +--------------------------------------------+
7064// | | Local variable space |
7065// | +--------------------------------------------+
7066// | | Float/int conversion temporary |
7067// | +--------------------------------------------+
7068// | | Save area for AltiVec registers |
7069// | +--------------------------------------------+
7070// | | AltiVec alignment padding |
7071// | +--------------------------------------------+
7072// | | Save area for VRSAVE register |
7073// | +--------------------------------------------+
7074// | | Save area for General Purpose registers |
7075// | +--------------------------------------------+
7076// | | Save area for Floating Point registers |
7077// | +--------------------------------------------+
7078// +---- | Back chain |
7079// High Memory +--------------------------------------------+
7080//
7081// Specifications:
7082// AIX 7.2 Assembler Language Reference
7083// Subroutine linkage convention
7084
7085SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7086 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7087 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7088 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7089
7090 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7091 CallConv == CallingConv::Fast) &&
7092 "Unexpected calling convention!");
7093
7094 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7095 report_fatal_error(reason: "Tail call support is unimplemented on AIX.");
7096
7097 if (useSoftFloat())
7098 report_fatal_error(reason: "Soft float support is unimplemented on AIX.");
7099
7100 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7101
7102 const bool IsPPC64 = Subtarget.isPPC64();
7103 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7104
7105 // Assign locations to all of the incoming arguments.
7106 SmallVector<CCValAssign, 16> ArgLocs;
7107 MachineFunction &MF = DAG.getMachineFunction();
7108 MachineFrameInfo &MFI = MF.getFrameInfo();
7109 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7110 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7111
7112 const EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7113 // Reserve space for the linkage area on the stack.
7114 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7115 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(PtrByteSize));
7116 uint64_t SaveStackPos = CCInfo.getStackSize();
7117 bool SaveParams = MF.getFunction().hasFnAttribute(Kind: "save-reg-params");
7118 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_AIX);
7119
7120 SmallVector<SDValue, 8> MemOps;
7121
7122 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7123 CCValAssign &VA = ArgLocs[I++];
7124 MVT LocVT = VA.getLocVT();
7125 MVT ValVT = VA.getValVT();
7126 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7127
7128 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7129 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7130 // For compatibility with the AIX XL compiler, the float args in the
7131 // parameter save area are initialized even if the argument is available
7132 // in register. The caller is required to initialize both the register
7133 // and memory, however, the callee can choose to expect it in either.
7134 // The memloc is dismissed here because the argument is retrieved from
7135 // the register.
7136 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7137 continue;
7138
7139 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7140 const TargetRegisterClass *RegClass = getRegClassForSVT(
7141 SVT: LocVT.SimpleTy, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(), HasVSX: Subtarget.hasVSX());
7142 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7143 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7144 const Register VReg = MF.addLiveIn(PReg: VA.getLocReg(), RC: RegClass);
7145 SDValue Parm = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: SaveVT);
7146 int FI = MFI.CreateFixedObject(Size: SaveVT.getStoreSize(), SPOffset: SaveStackPos, IsImmutable: true);
7147 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7148 SDValue StoreReg = DAG.getStore(Chain, dl, Val: Parm, Ptr: FIN,
7149 PtrInfo: MachinePointerInfo(), Alignment: Align(PtrByteSize));
7150 SaveStackPos = alignTo(Value: SaveStackPos + SaveVT.getStoreSize(), Align: PtrByteSize);
7151 MemOps.push_back(Elt: StoreReg);
7152 }
7153
7154 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7155 unsigned StoreSize =
7156 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7157 SaveStackPos = alignTo(Value: SaveStackPos + StoreSize, Align: PtrByteSize);
7158 }
7159
7160 auto HandleMemLoc = [&]() {
7161 const unsigned LocSize = LocVT.getStoreSize();
7162 const unsigned ValSize = ValVT.getStoreSize();
7163 assert((ValSize <= LocSize) &&
7164 "Object size is larger than size of MemLoc");
7165 int CurArgOffset = VA.getLocMemOffset();
7166 // Objects are right-justified because AIX is big-endian.
7167 if (LocSize > ValSize)
7168 CurArgOffset += LocSize - ValSize;
7169 // Potential tail calls could cause overwriting of argument stack slots.
7170 const bool IsImmutable =
7171 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7172 (CallConv == CallingConv::Fast));
7173 int FI = MFI.CreateFixedObject(Size: ValSize, SPOffset: CurArgOffset, IsImmutable);
7174 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7175 SDValue ArgValue =
7176 DAG.getLoad(VT: ValVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo());
7177
7178 // While the ABI specifies the argument type is (sign or zero) extended
7179 // out to register width, not all code is compliant. We truncate and
7180 // re-extend to be more forgiving of these callers when the argument type
7181 // is smaller than register width.
7182 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7183 ValVT.isInteger() &&
7184 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7185 // It is possible to have either real integer values
7186 // or integers that were not originally integers.
7187 // In the latter case, these could have came from structs,
7188 // and these integers would not have an extend on the parameter.
7189 // Since these types of integers do not have an extend specified
7190 // in the first place, the type of extend that we do should not matter.
7191 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7192 ? MVT::i8
7193 : ArgVT;
7194 SDValue ArgValueTrunc =
7195 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncatedArgVT, Operand: ArgValue);
7196 SDValue ArgValueExt =
7197 ArgSignExt ? DAG.getSExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT)
7198 : DAG.getZExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT);
7199 InVals.push_back(Elt: ArgValueExt);
7200 } else {
7201 InVals.push_back(Elt: ArgValue);
7202 }
7203 };
7204
7205 // Vector arguments to VaArg functions are passed both on the stack, and
7206 // in any available GPRs. Load the value from the stack and add the GPRs
7207 // as live ins.
7208 if (VA.isMemLoc() && VA.needsCustom()) {
7209 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7210 assert(isVarArg && "Only use custom memloc for vararg.");
7211 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7212 // matching custom RegLocs.
7213 const unsigned OriginalValNo = VA.getValNo();
7214 (void)OriginalValNo;
7215
7216 auto HandleCustomVecRegLoc = [&]() {
7217 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7218 "Missing custom RegLoc.");
7219 VA = ArgLocs[I++];
7220 assert(VA.getValVT().isVector() &&
7221 "Unexpected Val type for custom RegLoc.");
7222 assert(VA.getValNo() == OriginalValNo &&
7223 "ValNo mismatch between custom MemLoc and RegLoc.");
7224 MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7225 MF.addLiveIn(PReg: VA.getLocReg(),
7226 RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7227 HasVSX: Subtarget.hasVSX()));
7228 };
7229
7230 HandleMemLoc();
7231 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7232 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7233 // R10.
7234 HandleCustomVecRegLoc();
7235 HandleCustomVecRegLoc();
7236
7237 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7238 // we passed the vector in R5, R6, R7 and R8.
7239 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7240 assert(!IsPPC64 &&
7241 "Only 2 custom RegLocs expected for 64-bit codegen.");
7242 HandleCustomVecRegLoc();
7243 HandleCustomVecRegLoc();
7244 }
7245
7246 continue;
7247 }
7248
7249 if (VA.isRegLoc()) {
7250 if (VA.getValVT().isScalarInteger())
7251 FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7252 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7253 switch (VA.getValVT().SimpleTy) {
7254 default:
7255 report_fatal_error(reason: "Unhandled value type for argument.");
7256 case MVT::f32:
7257 FuncInfo->appendParameterType(Type: PPCFunctionInfo::ShortFloatingPoint);
7258 break;
7259 case MVT::f64:
7260 FuncInfo->appendParameterType(Type: PPCFunctionInfo::LongFloatingPoint);
7261 break;
7262 }
7263 } else if (VA.getValVT().isVector()) {
7264 switch (VA.getValVT().SimpleTy) {
7265 default:
7266 report_fatal_error(reason: "Unhandled value type for argument.");
7267 case MVT::v16i8:
7268 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorChar);
7269 break;
7270 case MVT::v8i16:
7271 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorShort);
7272 break;
7273 case MVT::v4i32:
7274 case MVT::v2i64:
7275 case MVT::v1i128:
7276 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorInt);
7277 break;
7278 case MVT::v4f32:
7279 case MVT::v2f64:
7280 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorFloat);
7281 break;
7282 }
7283 }
7284 }
7285
7286 if (Flags.isByVal() && VA.isMemLoc()) {
7287 const unsigned Size =
7288 alignTo(Value: Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7289 Align: PtrByteSize);
7290 const int FI = MF.getFrameInfo().CreateFixedObject(
7291 Size, SPOffset: VA.getLocMemOffset(), /* IsImmutable */ false,
7292 /* IsAliased */ isAliased: true);
7293 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7294 InVals.push_back(Elt: FIN);
7295
7296 continue;
7297 }
7298
7299 if (Flags.isByVal()) {
7300 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7301
7302 const MCPhysReg ArgReg = VA.getLocReg();
7303 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7304
7305 const unsigned StackSize = alignTo(Value: Flags.getByValSize(), Align: PtrByteSize);
7306 const int FI = MF.getFrameInfo().CreateFixedObject(
7307 Size: StackSize, SPOffset: mapArgRegToOffsetAIX(Reg: ArgReg, FL), /* IsImmutable */ false,
7308 /* IsAliased */ isAliased: true);
7309 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7310 InVals.push_back(Elt: FIN);
7311
7312 // Add live ins for all the RegLocs for the same ByVal.
7313 const TargetRegisterClass *RegClass =
7314 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7315
7316 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7317 unsigned Offset) {
7318 const Register VReg = MF.addLiveIn(PReg: PhysReg, RC: RegClass);
7319 // Since the callers side has left justified the aggregate in the
7320 // register, we can simply store the entire register into the stack
7321 // slot.
7322 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7323 // The store to the fixedstack object is needed becuase accessing a
7324 // field of the ByVal will use a gep and load. Ideally we will optimize
7325 // to extracting the value from the register directly, and elide the
7326 // stores when the arguments address is not taken, but that will need to
7327 // be future work.
7328 SDValue Store = DAG.getStore(
7329 Chain: CopyFrom.getValue(R: 1), dl, Val: CopyFrom,
7330 Ptr: DAG.getObjectPtrOffset(SL: dl, Ptr: FIN, Offset: TypeSize::getFixed(ExactSize: Offset)),
7331 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset));
7332
7333 MemOps.push_back(Elt: Store);
7334 };
7335
7336 unsigned Offset = 0;
7337 HandleRegLoc(VA.getLocReg(), Offset);
7338 Offset += PtrByteSize;
7339 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7340 Offset += PtrByteSize) {
7341 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342 "RegLocs should be for ByVal argument.");
7343
7344 const CCValAssign RL = ArgLocs[I++];
7345 HandleRegLoc(RL.getLocReg(), Offset);
7346 FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7347 }
7348
7349 if (Offset != StackSize) {
7350 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7351 "Expected MemLoc for remaining bytes.");
7352 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7353 // Consume the MemLoc.The InVal has already been emitted, so nothing
7354 // more needs to be done.
7355 ++I;
7356 }
7357
7358 continue;
7359 }
7360
7361 if (VA.isRegLoc() && !VA.needsCustom()) {
7362 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7363 Register VReg =
7364 MF.addLiveIn(PReg: VA.getLocReg(),
7365 RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7366 HasVSX: Subtarget.hasVSX()));
7367 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7368 if (ValVT.isScalarInteger() &&
7369 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7370 ArgValue =
7371 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7372 }
7373 InVals.push_back(Elt: ArgValue);
7374 continue;
7375 }
7376 if (VA.isMemLoc()) {
7377 HandleMemLoc();
7378 continue;
7379 }
7380 }
7381
7382 // On AIX a minimum of 8 words is saved to the parameter save area.
7383 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7384 // Area that is at least reserved in the caller of this function.
7385 unsigned CallerReservedArea = std::max<unsigned>(
7386 a: CCInfo.getStackSize(), b: LinkageSize + MinParameterSaveArea);
7387
7388 // Set the size that is at least reserved in caller of this function. Tail
7389 // call optimized function's reserved stack space needs to be aligned so
7390 // that taking the difference between two stack areas will result in an
7391 // aligned stack.
7392 CallerReservedArea =
7393 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: CallerReservedArea);
7394 FuncInfo->setMinReservedArea(CallerReservedArea);
7395
7396 if (isVarArg) {
7397 int VAListIndex = 0;
7398 // If any of the optional arguments are passed in register then the fixed
7399 // stack object we spill into is not immutable. Create a fixed stack object
7400 // that overlaps the remainder of the parameter save area.
7401 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7402 unsigned FixedStackSize =
7403 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7404 VAListIndex =
7405 MFI.CreateFixedObject(Size: FixedStackSize, SPOffset: CCInfo.getStackSize(),
7406 /* IsImmutable */ false, /* IsAliased */ isAliased: true);
7407 } else {
7408 // All the arguments passed through ellipses are on the stack. Create a
7409 // dummy fixed stack object the same size as a pointer since we don't
7410 // know the actual size.
7411 VAListIndex =
7412 MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: CCInfo.getStackSize(),
7413 /* IsImmutable */ true, /* IsAliased */ isAliased: true);
7414 }
7415
7416 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7417 SDValue FIN = DAG.getFrameIndex(FI: VAListIndex, VT: PtrVT);
7418
7419 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7420 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7421
7422 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7423 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7424 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7425
7426 // The fixed integer arguments of a variadic function are stored to the
7427 // VarArgsFrameIndex on the stack so that they may be loaded by
7428 // dereferencing the result of va_next.
7429 for (unsigned
7430 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7431 Offset = 0;
7432 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7433
7434 const Register VReg =
7435 IsPPC64 ? MF.addLiveIn(PReg: GPR_64[GPRIndex], RC: &PPC::G8RCRegClass)
7436 : MF.addLiveIn(PReg: GPR_32[GPRIndex], RC: &PPC::GPRCRegClass);
7437
7438 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
7439 MachinePointerInfo MPI =
7440 MachinePointerInfo::getFixedStack(MF, FI: VAListIndex, Offset);
7441 SDValue Store = DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MPI);
7442 MemOps.push_back(Elt: Store);
7443 // Increment the address for the next argument to store.
7444 SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
7445 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
7446 }
7447 }
7448
7449 if (!MemOps.empty())
7450 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
7451
7452 return Chain;
7453}
7454
7455SDValue PPCTargetLowering::LowerCall_AIX(
7456 SDValue Chain, SDValue Callee, CallFlags CFlags,
7457 const SmallVectorImpl<ISD::OutputArg> &Outs,
7458 const SmallVectorImpl<SDValue> &OutVals,
7459 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7460 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7461 const CallBase *CB) const {
7462 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7463 // AIX ABI stack frame layout.
7464
7465 assert((CFlags.CallConv == CallingConv::C ||
7466 CFlags.CallConv == CallingConv::Cold ||
7467 CFlags.CallConv == CallingConv::Fast) &&
7468 "Unexpected calling convention!");
7469
7470 if (CFlags.IsPatchPoint)
7471 report_fatal_error(reason: "This call type is unimplemented on AIX.");
7472
7473 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7474
7475 MachineFunction &MF = DAG.getMachineFunction();
7476 SmallVector<CCValAssign, 16> ArgLocs;
7477 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7478 *DAG.getContext());
7479
7480 // Reserve space for the linkage save area (LSA) on the stack.
7481 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7482 // [SP][CR][LR][2 x reserved][TOC].
7483 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7484 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7485 const bool IsPPC64 = Subtarget.isPPC64();
7486 const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7487 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7488 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(PtrByteSize));
7489 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_AIX);
7490
7491 // The prolog code of the callee may store up to 8 GPR argument registers to
7492 // the stack, allowing va_start to index over them in memory if the callee
7493 // is variadic.
7494 // Because we cannot tell if this is needed on the caller side, we have to
7495 // conservatively assume that it is needed. As such, make sure we have at
7496 // least enough stack space for the caller to store the 8 GPRs.
7497 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7498 const unsigned NumBytes = std::max<unsigned>(
7499 a: LinkageSize + MinParameterSaveAreaSize, b: CCInfo.getStackSize());
7500
7501 // Adjust the stack pointer for the new arguments...
7502 // These operations are automatically eliminated by the prolog/epilog pass.
7503 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
7504 SDValue CallSeqStart = Chain;
7505
7506 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7507 SmallVector<SDValue, 8> MemOpChains;
7508
7509 // Set up a copy of the stack pointer for loading and storing any
7510 // arguments that may not fit in the registers available for argument
7511 // passing.
7512 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(Reg: PPC::X1, VT: MVT::i64)
7513 : DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
7514
7515 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7516 const unsigned ValNo = ArgLocs[I].getValNo();
7517 SDValue Arg = OutVals[ValNo];
7518 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7519
7520 if (Flags.isByVal()) {
7521 const unsigned ByValSize = Flags.getByValSize();
7522
7523 // Nothing to do for zero-sized ByVals on the caller side.
7524 if (!ByValSize) {
7525 ++I;
7526 continue;
7527 }
7528
7529 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7530 return DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: PtrVT, Chain,
7531 Ptr: (LoadOffset != 0)
7532 ? DAG.getObjectPtrOffset(
7533 SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7534 : Arg,
7535 PtrInfo: MachinePointerInfo(), MemVT: VT);
7536 };
7537
7538 unsigned LoadOffset = 0;
7539
7540 // Initialize registers, which are fully occupied by the by-val argument.
7541 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7542 SDValue Load = GetLoad(PtrVT, LoadOffset);
7543 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7544 LoadOffset += PtrByteSize;
7545 const CCValAssign &ByValVA = ArgLocs[I++];
7546 assert(ByValVA.getValNo() == ValNo &&
7547 "Unexpected location for pass-by-value argument.");
7548 RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: Load));
7549 }
7550
7551 if (LoadOffset == ByValSize)
7552 continue;
7553
7554 // There must be one more loc to handle the remainder.
7555 assert(ArgLocs[I].getValNo() == ValNo &&
7556 "Expected additional location for by-value argument.");
7557
7558 if (ArgLocs[I].isMemLoc()) {
7559 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7560 const CCValAssign &ByValVA = ArgLocs[I++];
7561 ISD::ArgFlagsTy MemcpyFlags = Flags;
7562 // Only memcpy the bytes that don't pass in register.
7563 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7564 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7565 Arg: (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7566 SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7567 : Arg,
7568 PtrOff: DAG.getObjectPtrOffset(
7569 SL: dl, Ptr: StackPtr, Offset: TypeSize::getFixed(ExactSize: ByValVA.getLocMemOffset())),
7570 CallSeqStart, Flags: MemcpyFlags, DAG, dl);
7571 continue;
7572 }
7573
7574 // Initialize the final register residue.
7575 // Any residue that occupies the final by-val arg register must be
7576 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7577 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7578 // 2 and 1 byte loads.
7579 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7580 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7581 "Unexpected register residue for by-value argument.");
7582 SDValue ResidueVal;
7583 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7584 const unsigned N = llvm::bit_floor(Value: ResidueBytes - Bytes);
7585 const MVT VT =
7586 N == 1 ? MVT::i8
7587 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7588 SDValue Load = GetLoad(VT, LoadOffset);
7589 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7590 LoadOffset += N;
7591 Bytes += N;
7592
7593 // By-val arguments are passed left-justfied in register.
7594 // Every load here needs to be shifted, otherwise a full register load
7595 // should have been used.
7596 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7597 "Unexpected load emitted during handling of pass-by-value "
7598 "argument.");
7599 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7600 EVT ShiftAmountTy =
7601 getShiftAmountTy(LHSTy: Load->getValueType(ResNo: 0), DL: DAG.getDataLayout());
7602 SDValue SHLAmt = DAG.getConstant(Val: NumSHLBits, DL: dl, VT: ShiftAmountTy);
7603 SDValue ShiftedLoad =
7604 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: Load.getValueType(), N1: Load, N2: SHLAmt);
7605 ResidueVal = ResidueVal ? DAG.getNode(Opcode: ISD::OR, DL: dl, VT: PtrVT, N1: ResidueVal,
7606 N2: ShiftedLoad)
7607 : ShiftedLoad;
7608 }
7609
7610 const CCValAssign &ByValVA = ArgLocs[I++];
7611 RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: ResidueVal));
7612 continue;
7613 }
7614
7615 CCValAssign &VA = ArgLocs[I++];
7616 const MVT LocVT = VA.getLocVT();
7617 const MVT ValVT = VA.getValVT();
7618
7619 switch (VA.getLocInfo()) {
7620 default:
7621 report_fatal_error(reason: "Unexpected argument extension type.");
7622 case CCValAssign::Full:
7623 break;
7624 case CCValAssign::ZExt:
7625 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7626 break;
7627 case CCValAssign::SExt:
7628 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7629 break;
7630 }
7631
7632 if (VA.isRegLoc() && !VA.needsCustom()) {
7633 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
7634 continue;
7635 }
7636
7637 // Vector arguments passed to VarArg functions need custom handling when
7638 // they are passed (at least partially) in GPRs.
7639 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7640 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7641 // Store value to its stack slot.
7642 SDValue PtrOff =
7643 DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7644 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7645 SDValue Store =
7646 DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
7647 MemOpChains.push_back(Elt: Store);
7648 const unsigned OriginalValNo = VA.getValNo();
7649 // Then load the GPRs from the stack
7650 unsigned LoadOffset = 0;
7651 auto HandleCustomVecRegLoc = [&]() {
7652 assert(I != E && "Unexpected end of CCvalAssigns.");
7653 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7654 "Expected custom RegLoc.");
7655 CCValAssign RegVA = ArgLocs[I++];
7656 assert(RegVA.getValNo() == OriginalValNo &&
7657 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7658 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
7659 N2: DAG.getConstant(Val: LoadOffset, DL: dl, VT: PtrVT));
7660 SDValue Load = DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Add, PtrInfo: MachinePointerInfo());
7661 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7662 RegsToPass.push_back(Elt: std::make_pair(x: RegVA.getLocReg(), y&: Load));
7663 LoadOffset += PtrByteSize;
7664 };
7665
7666 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7667 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7668 // R10.
7669 HandleCustomVecRegLoc();
7670 HandleCustomVecRegLoc();
7671
7672 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7673 ArgLocs[I].getValNo() == OriginalValNo) {
7674 assert(!IsPPC64 &&
7675 "Only 2 custom RegLocs expected for 64-bit codegen.");
7676 HandleCustomVecRegLoc();
7677 HandleCustomVecRegLoc();
7678 }
7679
7680 continue;
7681 }
7682
7683 if (VA.isMemLoc()) {
7684 SDValue PtrOff =
7685 DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7686 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7687 MemOpChains.push_back(
7688 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff,
7689 PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset()),
7690 Alignment: Subtarget.getFrameLowering()->getStackAlign()));
7691
7692 continue;
7693 }
7694
7695 if (!ValVT.isFloatingPoint())
7696 report_fatal_error(
7697 reason: "Unexpected register handling for calling convention.");
7698
7699 // Custom handling is used for GPR initializations for vararg float
7700 // arguments.
7701 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7702 LocVT.isInteger() &&
7703 "Custom register handling only expected for VarArg.");
7704
7705 SDValue ArgAsInt =
7706 DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: ValVT.getSizeInBits()), V: Arg);
7707
7708 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7709 // f32 in 32-bit GPR
7710 // f64 in 64-bit GPR
7711 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgAsInt));
7712 else if (Arg.getValueType().getFixedSizeInBits() <
7713 LocVT.getFixedSizeInBits())
7714 // f32 in 64-bit GPR.
7715 RegsToPass.push_back(Elt: std::make_pair(
7716 x: VA.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: LocVT)));
7717 else {
7718 // f64 in two 32-bit GPRs
7719 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7720 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7721 "Unexpected custom register for argument!");
7722 CCValAssign &GPR1 = VA;
7723 SDValue MSWAsI64 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgAsInt,
7724 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i8));
7725 RegsToPass.push_back(Elt: std::make_pair(
7726 x: GPR1.getLocReg(), y: DAG.getZExtOrTrunc(Op: MSWAsI64, DL: dl, VT: MVT::i32)));
7727
7728 if (I != E) {
7729 // If only 1 GPR was available, there will only be one custom GPR and
7730 // the argument will also pass in memory.
7731 CCValAssign &PeekArg = ArgLocs[I];
7732 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7733 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7734 CCValAssign &GPR2 = ArgLocs[I++];
7735 RegsToPass.push_back(Elt: std::make_pair(
7736 x: GPR2.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: MVT::i32)));
7737 }
7738 }
7739 }
7740 }
7741
7742 if (!MemOpChains.empty())
7743 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
7744
7745 // For indirect calls, we need to save the TOC base to the stack for
7746 // restoration after the call.
7747 if (CFlags.IsIndirect) {
7748 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7749 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7750 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7751 const MVT PtrVT = Subtarget.getScalarIntVT();
7752 const unsigned TOCSaveOffset =
7753 Subtarget.getFrameLowering()->getTOCSaveOffset();
7754
7755 setUsesTOCBasePtr(DAG);
7756 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: TOCBaseReg, VT: PtrVT);
7757 SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
7758 SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: PtrVT);
7759 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7760 Chain = DAG.getStore(
7761 Chain: Val.getValue(R: 1), dl, Val, Ptr: AddPtr,
7762 PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
7763 }
7764
7765 // Build a sequence of copy-to-reg nodes chained together with token chain
7766 // and flag operands which copy the outgoing args into the appropriate regs.
7767 SDValue InGlue;
7768 for (auto Reg : RegsToPass) {
7769 Chain = DAG.getCopyToReg(Chain, dl, Reg: Reg.first, N: Reg.second, Glue: InGlue);
7770 InGlue = Chain.getValue(R: 1);
7771 }
7772
7773 const int SPDiff = 0;
7774 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
7775 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7776}
7777
7778bool
7779PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7780 MachineFunction &MF, bool isVarArg,
7781 const SmallVectorImpl<ISD::OutputArg> &Outs,
7782 LLVMContext &Context,
7783 const Type *RetTy) const {
7784 SmallVector<CCValAssign, 16> RVLocs;
7785 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7786 return CCInfo.CheckReturn(
7787 Outs, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7788 ? RetCC_PPC_Cold
7789 : RetCC_PPC);
7790}
7791
7792SDValue
7793PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7794 bool isVarArg,
7795 const SmallVectorImpl<ISD::OutputArg> &Outs,
7796 const SmallVectorImpl<SDValue> &OutVals,
7797 const SDLoc &dl, SelectionDAG &DAG) const {
7798 SmallVector<CCValAssign, 16> RVLocs;
7799 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7800 *DAG.getContext());
7801 CCInfo.AnalyzeReturn(Outs,
7802 Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7803 ? RetCC_PPC_Cold
7804 : RetCC_PPC);
7805
7806 SDValue Glue;
7807 SmallVector<SDValue, 4> RetOps(1, Chain);
7808
7809 // Copy the result values into the output registers.
7810 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7811 CCValAssign &VA = RVLocs[i];
7812 assert(VA.isRegLoc() && "Can only return in registers!");
7813
7814 SDValue Arg = OutVals[RealResIdx];
7815
7816 switch (VA.getLocInfo()) {
7817 default: llvm_unreachable("Unknown loc info!");
7818 case CCValAssign::Full: break;
7819 case CCValAssign::AExt:
7820 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7821 break;
7822 case CCValAssign::ZExt:
7823 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7824 break;
7825 case CCValAssign::SExt:
7826 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7827 break;
7828 }
7829 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7830 bool isLittleEndian = Subtarget.isLittleEndian();
7831 // Legalize ret f64 -> ret 2 x i32.
7832 SDValue SVal =
7833 DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7834 N2: DAG.getIntPtrConstant(Val: isLittleEndian ? 0 : 1, DL: dl));
7835 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7836 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7837 SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7838 N2: DAG.getIntPtrConstant(Val: isLittleEndian ? 1 : 0, DL: dl));
7839 Glue = Chain.getValue(R: 1);
7840 VA = RVLocs[++i]; // skip ahead to next loc
7841 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7842 } else
7843 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Arg, Glue);
7844 Glue = Chain.getValue(R: 1);
7845 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7846 }
7847
7848 RetOps[0] = Chain; // Update chain.
7849
7850 // Add the glue if we have it.
7851 if (Glue.getNode())
7852 RetOps.push_back(Elt: Glue);
7853
7854 return DAG.getNode(Opcode: PPCISD::RET_GLUE, DL: dl, VT: MVT::Other, Ops: RetOps);
7855}
7856
7857SDValue
7858PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7859 SelectionDAG &DAG) const {
7860 SDLoc dl(Op);
7861
7862 // Get the correct type for integers.
7863 EVT IntVT = Op.getValueType();
7864
7865 // Get the inputs.
7866 SDValue Chain = Op.getOperand(i: 0);
7867 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7868 // Build a DYNAREAOFFSET node.
7869 SDValue Ops[2] = {Chain, FPSIdx};
7870 SDVTList VTs = DAG.getVTList(VT: IntVT);
7871 return DAG.getNode(Opcode: PPCISD::DYNAREAOFFSET, DL: dl, VTList: VTs, Ops);
7872}
7873
7874SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7875 SelectionDAG &DAG) const {
7876 // When we pop the dynamic allocation we need to restore the SP link.
7877 SDLoc dl(Op);
7878
7879 // Get the correct type for pointers.
7880 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7881
7882 // Construct the stack pointer operand.
7883 bool isPPC64 = Subtarget.isPPC64();
7884 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7885 SDValue StackPtr = DAG.getRegister(Reg: SP, VT: PtrVT);
7886
7887 // Get the operands for the STACKRESTORE.
7888 SDValue Chain = Op.getOperand(i: 0);
7889 SDValue SaveSP = Op.getOperand(i: 1);
7890
7891 // Load the old link SP.
7892 SDValue LoadLinkSP =
7893 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: StackPtr, PtrInfo: MachinePointerInfo());
7894
7895 // Restore the stack pointer.
7896 Chain = DAG.getCopyToReg(Chain: LoadLinkSP.getValue(R: 1), dl, Reg: SP, N: SaveSP);
7897
7898 // Store the old link SP.
7899 return DAG.getStore(Chain, dl, Val: LoadLinkSP, Ptr: StackPtr, PtrInfo: MachinePointerInfo());
7900}
7901
7902SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7903 MachineFunction &MF = DAG.getMachineFunction();
7904 bool isPPC64 = Subtarget.isPPC64();
7905 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7906
7907 // Get current frame pointer save index. The users of this index will be
7908 // primarily DYNALLOC instructions.
7909 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7910 int RASI = FI->getReturnAddrSaveIndex();
7911
7912 // If the frame pointer save index hasn't been defined yet.
7913 if (!RASI) {
7914 // Find out what the fix offset of the frame pointer save area.
7915 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7916 // Allocate the frame index for frame pointer save area.
7917 RASI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: LROffset, IsImmutable: false);
7918 // Save the result.
7919 FI->setReturnAddrSaveIndex(RASI);
7920 }
7921 return DAG.getFrameIndex(FI: RASI, VT: PtrVT);
7922}
7923
7924SDValue
7925PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7926 MachineFunction &MF = DAG.getMachineFunction();
7927 bool isPPC64 = Subtarget.isPPC64();
7928 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7929
7930 // Get current frame pointer save index. The users of this index will be
7931 // primarily DYNALLOC instructions.
7932 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7933 int FPSI = FI->getFramePointerSaveIndex();
7934
7935 // If the frame pointer save index hasn't been defined yet.
7936 if (!FPSI) {
7937 // Find out what the fix offset of the frame pointer save area.
7938 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7939 // Allocate the frame index for frame pointer save area.
7940 FPSI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: FPOffset, IsImmutable: true);
7941 // Save the result.
7942 FI->setFramePointerSaveIndex(FPSI);
7943 }
7944 return DAG.getFrameIndex(FI: FPSI, VT: PtrVT);
7945}
7946
7947SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 MachineFunction &MF = DAG.getMachineFunction();
7950 // Get the inputs.
7951 SDValue Chain = Op.getOperand(i: 0);
7952 SDValue Size = Op.getOperand(i: 1);
7953 SDLoc dl(Op);
7954
7955 // Get the correct type for pointers.
7956 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7957 // Negate the size.
7958 SDValue NegSize = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: PtrVT,
7959 N1: DAG.getConstant(Val: 0, DL: dl, VT: PtrVT), N2: Size);
7960 // Construct a node for the frame pointer save index.
7961 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7962 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7963 SDVTList VTs = DAG.getVTList(VT1: PtrVT, VT2: MVT::Other);
7964 if (hasInlineStackProbe(MF))
7965 return DAG.getNode(Opcode: PPCISD::PROBED_ALLOCA, DL: dl, VTList: VTs, Ops);
7966 return DAG.getNode(Opcode: PPCISD::DYNALLOC, DL: dl, VTList: VTs, Ops);
7967}
7968
7969SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7970 SelectionDAG &DAG) const {
7971 MachineFunction &MF = DAG.getMachineFunction();
7972
7973 bool isPPC64 = Subtarget.isPPC64();
7974 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7975
7976 int FI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64 ? 8 : 4, SPOffset: 0, IsImmutable: false);
7977 return DAG.getFrameIndex(FI, VT: PtrVT);
7978}
7979
7980SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7981 SelectionDAG &DAG) const {
7982 SDLoc DL(Op);
7983 return DAG.getNode(Opcode: PPCISD::EH_SJLJ_SETJMP, DL,
7984 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
7985 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
7986}
7987
7988SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7989 SelectionDAG &DAG) const {
7990 SDLoc DL(Op);
7991 return DAG.getNode(Opcode: PPCISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other,
7992 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
7993}
7994
7995SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7996 if (Op.getValueType().isVector())
7997 return LowerVectorLoad(Op, DAG);
7998
7999 assert(Op.getValueType() == MVT::i1 &&
8000 "Custom lowering only for i1 loads");
8001
8002 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8003
8004 SDLoc dl(Op);
8005 LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
8006
8007 SDValue Chain = LD->getChain();
8008 SDValue BasePtr = LD->getBasePtr();
8009 MachineMemOperand *MMO = LD->getMemOperand();
8010
8011 SDValue NewLD =
8012 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: getPointerTy(DL: DAG.getDataLayout()), Chain,
8013 Ptr: BasePtr, MemVT: MVT::i8, MMO);
8014 SDValue Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewLD);
8015
8016 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8017 return DAG.getMergeValues(Ops, dl);
8018}
8019
8020SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8021 if (Op.getOperand(i: 1).getValueType().isVector())
8022 return LowerVectorStore(Op, DAG);
8023
8024 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8025 "Custom lowering only for i1 stores");
8026
8027 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8028
8029 SDLoc dl(Op);
8030 StoreSDNode *ST = cast<StoreSDNode>(Val&: Op);
8031
8032 SDValue Chain = ST->getChain();
8033 SDValue BasePtr = ST->getBasePtr();
8034 SDValue Value = ST->getValue();
8035 MachineMemOperand *MMO = ST->getMemOperand();
8036
8037 Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
8038 Operand: Value);
8039 return DAG.getTruncStore(Chain, dl, Val: Value, Ptr: BasePtr, SVT: MVT::i8, MMO);
8040}
8041
8042// FIXME: Remove this once the ANDI glue bug is fixed:
8043SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8044 assert(Op.getValueType() == MVT::i1 &&
8045 "Custom lowering only for i1 results");
8046
8047 SDLoc DL(Op);
8048 return DAG.getNode(Opcode: PPCISD::ANDI_rec_1_GT_BIT, DL, VT: MVT::i1, Operand: Op.getOperand(i: 0));
8049}
8050
8051SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8052 SelectionDAG &DAG) const {
8053
8054 // Implements a vector truncate that fits in a vector register as a shuffle.
8055 // We want to legalize vector truncates down to where the source fits in
8056 // a vector register (and target is therefore smaller than vector register
8057 // size). At that point legalization will try to custom lower the sub-legal
8058 // result and get here - where we can contain the truncate as a single target
8059 // operation.
8060
8061 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8062 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8063 //
8064 // We will implement it for big-endian ordering as this (where x denotes
8065 // undefined):
8066 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8067 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8068 //
8069 // The same operation in little-endian ordering will be:
8070 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8071 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8072
8073 EVT TrgVT = Op.getValueType();
8074 assert(TrgVT.isVector() && "Vector type expected.");
8075 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8076 EVT EltVT = TrgVT.getVectorElementType();
8077 if (!isOperationCustom(Op: Op.getOpcode(), VT: TrgVT) ||
8078 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(Value: TrgNumElts) ||
8079 !llvm::has_single_bit<uint32_t>(Value: EltVT.getSizeInBits()))
8080 return SDValue();
8081
8082 SDValue N1 = Op.getOperand(i: 0);
8083 EVT SrcVT = N1.getValueType();
8084 unsigned SrcSize = SrcVT.getSizeInBits();
8085 if (SrcSize > 256 || !isPowerOf2_32(Value: SrcVT.getVectorNumElements()) ||
8086 !llvm::has_single_bit<uint32_t>(
8087 Value: SrcVT.getVectorElementType().getSizeInBits()))
8088 return SDValue();
8089 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8090 return SDValue();
8091
8092 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8093 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8094
8095 SDLoc DL(Op);
8096 SDValue Op1, Op2;
8097 if (SrcSize == 256) {
8098 EVT VecIdxTy = getVectorIdxTy(DL: DAG.getDataLayout());
8099 EVT SplitVT =
8100 N1.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
8101 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8102 Op1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8103 N2: DAG.getConstant(Val: 0, DL, VT: VecIdxTy));
8104 Op2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8105 N2: DAG.getConstant(Val: SplitNumElts, DL, VT: VecIdxTy));
8106 }
8107 else {
8108 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, Vec: N1, dl: DL);
8109 Op2 = DAG.getUNDEF(VT: WideVT);
8110 }
8111
8112 // First list the elements we want to keep.
8113 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8114 SmallVector<int, 16> ShuffV;
8115 if (Subtarget.isLittleEndian())
8116 for (unsigned i = 0; i < TrgNumElts; ++i)
8117 ShuffV.push_back(Elt: i * SizeMult);
8118 else
8119 for (unsigned i = 1; i <= TrgNumElts; ++i)
8120 ShuffV.push_back(Elt: i * SizeMult - 1);
8121
8122 // Populate the remaining elements with undefs.
8123 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8124 // ShuffV.push_back(i + WideNumElts);
8125 ShuffV.push_back(Elt: WideNumElts + 1);
8126
8127 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op1);
8128 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op2);
8129 return DAG.getVectorShuffle(VT: WideVT, dl: DL, N1: Op1, N2: Op2, Mask: ShuffV);
8130}
8131
8132/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8133/// possible.
8134SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8135 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get();
8136 EVT ResVT = Op.getValueType();
8137 EVT CmpVT = Op.getOperand(i: 0).getValueType();
8138 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
8139 SDValue TV = Op.getOperand(i: 2), FV = Op.getOperand(i: 3);
8140 SDLoc dl(Op);
8141
8142 // Without power9-vector, we don't have native instruction for f128 comparison.
8143 // Following transformation to libcall is needed for setcc:
8144 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8145 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8146 SDValue Z = DAG.getSetCC(
8147 DL: dl, VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: CmpVT),
8148 LHS, RHS, Cond: CC);
8149 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: Z.getValueType());
8150 return DAG.getSelectCC(DL: dl, LHS: Z, RHS: Zero, True: TV, False: FV, Cond: ISD::SETNE);
8151 }
8152
8153 // Not FP, or using SPE? Not a fsel.
8154 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8155 Subtarget.hasSPE())
8156 return Op;
8157
8158 SDNodeFlags Flags = Op.getNode()->getFlags();
8159
8160 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8161 // presence of infinities.
8162 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8163 switch (CC) {
8164 default:
8165 break;
8166 case ISD::SETOGT:
8167 case ISD::SETGT:
8168 return DAG.getNode(Opcode: PPCISD::XSMAXC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8169 case ISD::SETOLT:
8170 case ISD::SETLT:
8171 return DAG.getNode(Opcode: PPCISD::XSMINC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8172 }
8173 }
8174
8175 // We might be able to do better than this under some circumstances, but in
8176 // general, fsel-based lowering of select is a finite-math-only optimization.
8177 // For more information, see section F.3 of the 2.06 ISA specification.
8178 // With ISA 3.0
8179 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8180 return Op;
8181
8182 // If the RHS of the comparison is a 0.0, we don't need to do the
8183 // subtraction at all.
8184 SDValue Sel1;
8185 if (isFloatingPointZero(Op: RHS))
8186 switch (CC) {
8187 default: break; // SETUO etc aren't handled by fsel.
8188 case ISD::SETNE:
8189 std::swap(a&: TV, b&: FV);
8190 [[fallthrough]];
8191 case ISD::SETEQ:
8192 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8193 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8194 Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8195 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8196 Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8197 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8198 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: Sel1, N3: FV);
8199 case ISD::SETULT:
8200 case ISD::SETLT:
8201 std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8202 [[fallthrough]];
8203 case ISD::SETOGE:
8204 case ISD::SETGE:
8205 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8206 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8207 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8208 case ISD::SETUGT:
8209 case ISD::SETGT:
8210 std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8211 [[fallthrough]];
8212 case ISD::SETOLE:
8213 case ISD::SETLE:
8214 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8215 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8216 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8217 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: TV, N3: FV);
8218 }
8219
8220 SDValue Cmp;
8221 switch (CC) {
8222 default: break; // SETUO etc aren't handled by fsel.
8223 case ISD::SETNE:
8224 std::swap(a&: TV, b&: FV);
8225 [[fallthrough]];
8226 case ISD::SETEQ:
8227 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8228 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8229 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8230 Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8231 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8233 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8234 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: Cmp), N2: Sel1, N3: FV);
8235 case ISD::SETULT:
8236 case ISD::SETLT:
8237 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8238 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8239 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8240 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8241 case ISD::SETOGE:
8242 case ISD::SETGE:
8243 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8244 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8245 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8246 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8247 case ISD::SETUGT:
8248 case ISD::SETGT:
8249 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8250 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8251 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8252 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8253 case ISD::SETOLE:
8254 case ISD::SETLE:
8255 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8256 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8257 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8258 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8259 }
8260 return Op;
8261}
8262
8263static unsigned getPPCStrictOpcode(unsigned Opc) {
8264 switch (Opc) {
8265 default:
8266 llvm_unreachable("No strict version of this opcode!");
8267 case PPCISD::FCTIDZ:
8268 return PPCISD::STRICT_FCTIDZ;
8269 case PPCISD::FCTIWZ:
8270 return PPCISD::STRICT_FCTIWZ;
8271 case PPCISD::FCTIDUZ:
8272 return PPCISD::STRICT_FCTIDUZ;
8273 case PPCISD::FCTIWUZ:
8274 return PPCISD::STRICT_FCTIWUZ;
8275 case PPCISD::FCFID:
8276 return PPCISD::STRICT_FCFID;
8277 case PPCISD::FCFIDU:
8278 return PPCISD::STRICT_FCFIDU;
8279 case PPCISD::FCFIDS:
8280 return PPCISD::STRICT_FCFIDS;
8281 case PPCISD::FCFIDUS:
8282 return PPCISD::STRICT_FCFIDUS;
8283 }
8284}
8285
8286static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8287 const PPCSubtarget &Subtarget) {
8288 SDLoc dl(Op);
8289 bool IsStrict = Op->isStrictFPOpcode();
8290 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8291 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8292
8293 // TODO: Any other flags to propagate?
8294 SDNodeFlags Flags;
8295 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8296
8297 // For strict nodes, source is the second operand.
8298 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8299 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : SDValue();
8300 MVT DestTy = Op.getSimpleValueType();
8301 assert(Src.getValueType().isFloatingPoint() &&
8302 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8303 DestTy == MVT::i64) &&
8304 "Invalid FP_TO_INT types");
8305 if (Src.getValueType() == MVT::f32) {
8306 if (IsStrict) {
8307 Src =
8308 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL: dl,
8309 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8310 Chain = Src.getValue(R: 1);
8311 } else
8312 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
8313 }
8314 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8315 DestTy = Subtarget.getScalarIntVT();
8316 unsigned Opc = ISD::DELETED_NODE;
8317 switch (DestTy.SimpleTy) {
8318 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8319 case MVT::i32:
8320 Opc = IsSigned ? PPCISD::FCTIWZ
8321 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8322 break;
8323 case MVT::i64:
8324 assert((IsSigned || Subtarget.hasFPCVT()) &&
8325 "i64 FP_TO_UINT is supported only with FPCVT");
8326 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8327 }
8328 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8329 SDValue Conv;
8330 if (IsStrict) {
8331 Opc = getPPCStrictOpcode(Opc);
8332 Conv = DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src},
8333 Flags);
8334 } else {
8335 Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: ConvTy, Operand: Src);
8336 }
8337 return Conv;
8338}
8339
8340void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8341 SelectionDAG &DAG,
8342 const SDLoc &dl) const {
8343 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8344 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8345 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8346 bool IsStrict = Op->isStrictFPOpcode();
8347
8348 // Convert the FP value to an int value through memory.
8349 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8350 (IsSigned || Subtarget.hasFPCVT());
8351 SDValue FIPtr = DAG.CreateStackTemporary(VT: i32Stack ? MVT::i32 : MVT::f64);
8352 int FI = cast<FrameIndexSDNode>(Val&: FIPtr)->getIndex();
8353 MachinePointerInfo MPI =
8354 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
8355
8356 // Emit a store to the stack slot.
8357 SDValue Chain = IsStrict ? Tmp.getValue(R: 1) : DAG.getEntryNode();
8358 Align Alignment(DAG.getEVTAlign(MemoryVT: Tmp.getValueType()));
8359 if (i32Stack) {
8360 MachineFunction &MF = DAG.getMachineFunction();
8361 Alignment = Align(4);
8362 MachineMemOperand *MMO =
8363 MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Alignment);
8364 SDValue Ops[] = { Chain, Tmp, FIPtr };
8365 Chain = DAG.getMemIntrinsicNode(Opcode: PPCISD::STFIWX, dl,
8366 VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT: MVT::i32, MMO);
8367 } else
8368 Chain = DAG.getStore(Chain, dl, Val: Tmp, Ptr: FIPtr, PtrInfo: MPI, Alignment);
8369
8370 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8371 // add in a bias on big endian.
8372 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8373 !Subtarget.isLittleEndian()) {
8374 FIPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: FIPtr.getValueType(), N1: FIPtr,
8375 N2: DAG.getConstant(Val: 4, DL: dl, VT: FIPtr.getValueType()));
8376 MPI = MPI.getWithOffset(O: 4);
8377 }
8378
8379 RLI.Chain = Chain;
8380 RLI.Ptr = FIPtr;
8381 RLI.MPI = MPI;
8382 RLI.Alignment = Alignment;
8383}
8384
8385/// Custom lowers floating point to integer conversions to use
8386/// the direct move instructions available in ISA 2.07 to avoid the
8387/// need for load/store combinations.
8388SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8389 SelectionDAG &DAG,
8390 const SDLoc &dl) const {
8391 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8392 SDValue Mov = DAG.getNode(Opcode: PPCISD::MFVSR, DL: dl, VT: Op.getValueType(), Operand: Conv);
8393 if (Op->isStrictFPOpcode())
8394 return DAG.getMergeValues(Ops: {Mov, Conv.getValue(R: 1)}, dl);
8395 else
8396 return Mov;
8397}
8398
8399SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8400 const SDLoc &dl) const {
8401 bool IsStrict = Op->isStrictFPOpcode();
8402 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8403 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8404 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8405 EVT SrcVT = Src.getValueType();
8406 EVT DstVT = Op.getValueType();
8407
8408 // FP to INT conversions are legal for f128.
8409 if (SrcVT == MVT::f128)
8410 return Subtarget.hasP9Vector() ? Op : SDValue();
8411
8412 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8413 // PPC (the libcall is not available).
8414 if (SrcVT == MVT::ppcf128) {
8415 if (DstVT == MVT::i32) {
8416 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8417 // set other fast-math flags to FP operations in both strict and
8418 // non-strict cases. (FP_TO_SINT, FSUB)
8419 SDNodeFlags Flags;
8420 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8421
8422 if (IsSigned) {
8423 SDValue Lo, Hi;
8424 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: dl, LoVT: MVT::f64, HiVT: MVT::f64);
8425
8426 // Add the two halves of the long double in round-to-zero mode, and use
8427 // a smaller FP_TO_SINT.
8428 if (IsStrict) {
8429 SDValue Res = DAG.getNode(Opcode: PPCISD::STRICT_FADDRTZ, DL: dl,
8430 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8431 Ops: {Op.getOperand(i: 0), Lo, Hi}, Flags);
8432 return DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8433 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8434 Ops: {Res.getValue(R: 1), Res}, Flags);
8435 } else {
8436 SDValue Res = DAG.getNode(Opcode: PPCISD::FADDRTZ, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
8437 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Res);
8438 }
8439 } else {
8440 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8441 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8442 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8443 SDValue SignMask = DAG.getConstant(Val: 0x80000000, DL: dl, VT: DstVT);
8444 if (IsStrict) {
8445 // Sel = Src < 0x80000000
8446 // FltOfs = select Sel, 0.0, 0x80000000
8447 // IntOfs = select Sel, 0, 0x80000000
8448 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8449 SDValue Chain = Op.getOperand(i: 0);
8450 EVT SetCCVT =
8451 getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: SrcVT);
8452 EVT DstSetCCVT =
8453 getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: DstVT);
8454 SDValue Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8455 Chain, IsSignaling: true);
8456 Chain = Sel.getValue(R: 1);
8457
8458 SDValue FltOfs = DAG.getSelect(
8459 DL: dl, VT: SrcVT, Cond: Sel, LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8460 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8461
8462 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl,
8463 VTList: DAG.getVTList(VT1: SrcVT, VT2: MVT::Other),
8464 Ops: {Chain, Src, FltOfs}, Flags);
8465 Chain = Val.getValue(R: 1);
8466 SDValue SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8467 VTList: DAG.getVTList(VT1: DstVT, VT2: MVT::Other),
8468 Ops: {Chain, Val}, Flags);
8469 Chain = SInt.getValue(R: 1);
8470 SDValue IntOfs = DAG.getSelect(
8471 DL: dl, VT: DstVT, Cond: Sel, LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), RHS: SignMask);
8472 SDValue Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8473 return DAG.getMergeValues(Ops: {Result, Chain}, dl);
8474 } else {
8475 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8476 // FIXME: generated code sucks.
8477 SDValue True = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::ppcf128, N1: Src, N2: Cst);
8478 True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: True);
8479 True = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: True, N2: SignMask);
8480 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Src);
8481 return DAG.getSelectCC(DL: dl, LHS: Src, RHS: Cst, True, False, Cond: ISD::SETGE);
8482 }
8483 }
8484 }
8485
8486 return SDValue();
8487 }
8488
8489 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8490 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8491
8492 ReuseLoadInfo RLI;
8493 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8494
8495 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8496 Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8497}
8498
8499// We're trying to insert a regular store, S, and then a load, L. If the
8500// incoming value, O, is a load, we might just be able to have our load use the
8501// address used by O. However, we don't know if anything else will store to
8502// that address before we can load from it. To prevent this situation, we need
8503// to insert our load, L, into the chain as a peer of O. To do this, we give L
8504// the same chain operand as O, we create a token factor from the chain results
8505// of O and L, and we replace all uses of O's chain result with that token
8506// factor (this last part is handled by makeEquivalentMemoryOrdering).
8507bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8508 ReuseLoadInfo &RLI,
8509 SelectionDAG &DAG,
8510 ISD::LoadExtType ET) const {
8511 // Conservatively skip reusing for constrained FP nodes.
8512 if (Op->isStrictFPOpcode())
8513 return false;
8514
8515 SDLoc dl(Op);
8516 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8517 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8518 if (ET == ISD::NON_EXTLOAD &&
8519 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8520 isOperationLegalOrCustom(Op: Op.getOpcode(),
8521 VT: Op.getOperand(i: 0).getValueType())) {
8522
8523 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8524 return true;
8525 }
8526
8527 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op);
8528 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8529 LD->isNonTemporal())
8530 return false;
8531 if (LD->getMemoryVT() != MemVT)
8532 return false;
8533
8534 // If the result of the load is an illegal type, then we can't build a
8535 // valid chain for reuse since the legalised loads and token factor node that
8536 // ties the legalised loads together uses a different output chain then the
8537 // illegal load.
8538 if (!isTypeLegal(VT: LD->getValueType(ResNo: 0)))
8539 return false;
8540
8541 RLI.Ptr = LD->getBasePtr();
8542 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8543 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8544 "Non-pre-inc AM on PPC?");
8545 RLI.Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RLI.Ptr.getValueType(), N1: RLI.Ptr,
8546 N2: LD->getOffset());
8547 }
8548
8549 RLI.Chain = LD->getChain();
8550 RLI.MPI = LD->getPointerInfo();
8551 RLI.IsDereferenceable = LD->isDereferenceable();
8552 RLI.IsInvariant = LD->isInvariant();
8553 RLI.Alignment = LD->getAlign();
8554 RLI.AAInfo = LD->getAAInfo();
8555 RLI.Ranges = LD->getRanges();
8556
8557 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8558 return true;
8559}
8560
8561/// Analyze profitability of direct move
8562/// prefer float load to int load plus direct move
8563/// when there is no integer use of int load
8564bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8565 SDNode *Origin = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0).getNode();
8566 if (Origin->getOpcode() != ISD::LOAD)
8567 return true;
8568
8569 // If there is no LXSIBZX/LXSIHZX, like Power8,
8570 // prefer direct move if the memory size is 1 or 2 bytes.
8571 MachineMemOperand *MMO = cast<LoadSDNode>(Val: Origin)->getMemOperand();
8572 if (!Subtarget.hasP9Vector() &&
8573 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8574 return true;
8575
8576 for (SDUse &Use : Origin->uses()) {
8577
8578 // Only look at the users of the loaded value.
8579 if (Use.getResNo() != 0)
8580 continue;
8581
8582 SDNode *User = Use.getUser();
8583 if (User->getOpcode() != ISD::SINT_TO_FP &&
8584 User->getOpcode() != ISD::UINT_TO_FP &&
8585 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8586 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8587 return true;
8588 }
8589
8590 return false;
8591}
8592
8593static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8594 const PPCSubtarget &Subtarget,
8595 SDValue Chain = SDValue()) {
8596 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8597 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8598 SDLoc dl(Op);
8599
8600 // TODO: Any other flags to propagate?
8601 SDNodeFlags Flags;
8602 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8603
8604 // If we have FCFIDS, then use it when converting to single-precision.
8605 // Otherwise, convert to double-precision and then round.
8606 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8607 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8608 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8609 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8610 if (Op->isStrictFPOpcode()) {
8611 if (!Chain)
8612 Chain = Op.getOperand(i: 0);
8613 return DAG.getNode(Opcode: getPPCStrictOpcode(Opc: ConvOpc), DL: dl,
8614 VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8615 } else
8616 return DAG.getNode(Opcode: ConvOpc, DL: dl, VT: ConvTy, Operand: Src);
8617}
8618
8619/// Custom lowers integer to floating point conversions to use
8620/// the direct move instructions available in ISA 2.07 to avoid the
8621/// need for load/store combinations.
8622SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8623 SelectionDAG &DAG,
8624 const SDLoc &dl) const {
8625 assert((Op.getValueType() == MVT::f32 ||
8626 Op.getValueType() == MVT::f64) &&
8627 "Invalid floating point type as target of conversion");
8628 assert(Subtarget.hasFPCVT() &&
8629 "Int to FP conversions with direct moves require FPCVT");
8630 SDValue Src = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0);
8631 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8632 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8633 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8634 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8635 SDValue Mov = DAG.getNode(Opcode: MovOpc, DL: dl, VT: MVT::f64, Operand: Src);
8636 return convertIntToFP(Op, Src: Mov, DAG, Subtarget);
8637}
8638
8639static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8640
8641 EVT VecVT = Vec.getValueType();
8642 assert(VecVT.isVector() && "Expected a vector type.");
8643 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8644
8645 EVT EltVT = VecVT.getVectorElementType();
8646 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8647 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8648
8649 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8650 SmallVector<SDValue, 16> Ops(NumConcat);
8651 Ops[0] = Vec;
8652 SDValue UndefVec = DAG.getUNDEF(VT: VecVT);
8653 for (unsigned i = 1; i < NumConcat; ++i)
8654 Ops[i] = UndefVec;
8655
8656 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: WideVT, Ops);
8657}
8658
8659SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8660 const SDLoc &dl) const {
8661 bool IsStrict = Op->isStrictFPOpcode();
8662 unsigned Opc = Op.getOpcode();
8663 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8664 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8665 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8666 "Unexpected conversion type");
8667 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8668 "Supports conversions to v2f64/v4f32 only.");
8669
8670 // TODO: Any other flags to propagate?
8671 SDNodeFlags Flags;
8672 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8673
8674 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8675 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8676
8677 SDValue Wide = widenVec(DAG, Vec: Src, dl);
8678 EVT WideVT = Wide.getValueType();
8679 unsigned WideNumElts = WideVT.getVectorNumElements();
8680 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8681
8682 SmallVector<int, 16> ShuffV;
8683 for (unsigned i = 0; i < WideNumElts; ++i)
8684 ShuffV.push_back(Elt: i + WideNumElts);
8685
8686 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8687 int SaveElts = FourEltRes ? 4 : 2;
8688 if (Subtarget.isLittleEndian())
8689 for (int i = 0; i < SaveElts; i++)
8690 ShuffV[i * Stride] = i;
8691 else
8692 for (int i = 1; i <= SaveElts; i++)
8693 ShuffV[i * Stride - 1] = i - 1;
8694
8695 SDValue ShuffleSrc2 =
8696 SignedConv ? DAG.getUNDEF(VT: WideVT) : DAG.getConstant(Val: 0, DL: dl, VT: WideVT);
8697 SDValue Arrange = DAG.getVectorShuffle(VT: WideVT, dl, N1: Wide, N2: ShuffleSrc2, Mask: ShuffV);
8698
8699 SDValue Extend;
8700 if (SignedConv) {
8701 Arrange = DAG.getBitcast(VT: IntermediateVT, V: Arrange);
8702 EVT ExtVT = Src.getValueType();
8703 if (Subtarget.hasP9Altivec())
8704 ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT.getVectorElementType(),
8705 NumElements: IntermediateVT.getVectorNumElements());
8706
8707 Extend = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: IntermediateVT, N1: Arrange,
8708 N2: DAG.getValueType(ExtVT));
8709 } else
8710 Extend = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntermediateVT, Operand: Arrange);
8711
8712 if (IsStrict)
8713 return DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other),
8714 Ops: {Op.getOperand(i: 0), Extend}, Flags);
8715
8716 return DAG.getNode(Opcode: Opc, DL: dl, VT: Op.getValueType(), Operand: Extend);
8717}
8718
8719SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8720 SelectionDAG &DAG) const {
8721 SDLoc dl(Op);
8722 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8723 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724 bool IsStrict = Op->isStrictFPOpcode();
8725 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8726 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode();
8727
8728 // TODO: Any other flags to propagate?
8729 SDNodeFlags Flags;
8730 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8731
8732 EVT InVT = Src.getValueType();
8733 EVT OutVT = Op.getValueType();
8734 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8735 isOperationCustom(Op: Op.getOpcode(), VT: InVT))
8736 return LowerINT_TO_FPVector(Op, DAG, dl);
8737
8738 // Conversions to f128 are legal.
8739 if (Op.getValueType() == MVT::f128)
8740 return Subtarget.hasP9Vector() ? Op : SDValue();
8741
8742 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8743 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8744 return SDValue();
8745
8746 if (Src.getValueType() == MVT::i1) {
8747 SDValue Sel = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: Op.getValueType(), N1: Src,
8748 N2: DAG.getConstantFP(Val: 1.0, DL: dl, VT: Op.getValueType()),
8749 N3: DAG.getConstantFP(Val: 0.0, DL: dl, VT: Op.getValueType()));
8750 if (IsStrict)
8751 return DAG.getMergeValues(Ops: {Sel, Chain}, dl);
8752 else
8753 return Sel;
8754 }
8755
8756 // If we have direct moves, we can do all the conversion, skip the store/load
8757 // however, without FPCVT we can't do most conversions.
8758 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8759 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8760 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8761
8762 assert((IsSigned || Subtarget.hasFPCVT()) &&
8763 "UINT_TO_FP is supported only with FPCVT");
8764
8765 if (Src.getValueType() == MVT::i64) {
8766 SDValue SINT = Src;
8767 // When converting to single-precision, we actually need to convert
8768 // to double-precision first and then round to single-precision.
8769 // To avoid double-rounding effects during that operation, we have
8770 // to prepare the input operand. Bits that might be truncated when
8771 // converting to double-precision are replaced by a bit that won't
8772 // be lost at this stage, but is below the single-precision rounding
8773 // position.
8774 //
8775 // However, if afn is in effect, accept double
8776 // rounding to avoid the extra overhead.
8777 // FIXME: Currently INT_TO_FP can't support fast math flags because
8778 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8779 // false.
8780 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8781 !Op->getFlags().hasApproximateFuncs()) {
8782
8783 // Twiddle input to make sure the low 11 bits are zero. (If this
8784 // is the case, we are guaranteed the value will fit into the 53 bit
8785 // mantissa of an IEEE double-precision value without rounding.)
8786 // If any of those low 11 bits were not zero originally, make sure
8787 // bit 12 (value 2048) is set instead, so that the final rounding
8788 // to single-precision gets the correct result.
8789 SDValue Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64,
8790 N1: SINT, N2: DAG.getConstant(Val: 2047, DL: dl, VT: MVT::i64));
8791 Round = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8792 N1: Round, N2: DAG.getConstant(Val: 2047, DL: dl, VT: MVT::i64));
8793 Round = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i64, N1: Round, N2: SINT);
8794 Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64, N1: Round,
8795 N2: DAG.getSignedConstant(Val: -2048, DL: dl, VT: MVT::i64));
8796
8797 // However, we cannot use that value unconditionally: if the magnitude
8798 // of the input value is small, the bit-twiddling we did above might
8799 // end up visibly changing the output. Fortunately, in that case, we
8800 // don't need to twiddle bits since the original input will convert
8801 // exactly to double-precision floating-point already. Therefore,
8802 // construct a conditional to use the original value if the top 11
8803 // bits are all sign-bit copies, and use the rounded value computed
8804 // above otherwise.
8805 SDValue Cond = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: MVT::i64,
8806 N1: SINT, N2: DAG.getConstant(Val: 53, DL: dl, VT: MVT::i32));
8807 Cond = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8808 N1: Cond, N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i64));
8809 Cond = DAG.getSetCC(
8810 DL: dl,
8811 VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: MVT::i64),
8812 LHS: Cond, RHS: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i64), Cond: ISD::SETUGT);
8813
8814 SINT = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i64, N1: Cond, N2: Round, N3: SINT);
8815 }
8816
8817 ReuseLoadInfo RLI;
8818 SDValue Bits;
8819
8820 MachineFunction &MF = DAG.getMachineFunction();
8821 if (canReuseLoadAddress(Op: SINT, MemVT: MVT::i64, RLI, DAG)) {
8822 Bits = DAG.getLoad(VT: MVT::f64, dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8823 Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8824 if (RLI.ResChain)
8825 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8826 } else if (Subtarget.hasLFIWAX() &&
8827 canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::SEXTLOAD)) {
8828 MachineMemOperand *MMO =
8829 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8830 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8831 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8832 Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWAX, dl,
8833 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8834 Ops, MemVT: MVT::i32, MMO);
8835 if (RLI.ResChain)
8836 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8837 } else if (Subtarget.hasFPCVT() &&
8838 canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::ZEXTLOAD)) {
8839 MachineMemOperand *MMO =
8840 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8841 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8842 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8843 Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWZX, dl,
8844 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8845 Ops, MemVT: MVT::i32, MMO);
8846 if (RLI.ResChain)
8847 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8848 } else if (((Subtarget.hasLFIWAX() &&
8849 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8850 (Subtarget.hasFPCVT() &&
8851 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8852 SINT.getOperand(i: 0).getValueType() == MVT::i32) {
8853 MachineFrameInfo &MFI = MF.getFrameInfo();
8854 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8855
8856 int FrameIdx = MFI.CreateStackObject(Size: 4, Alignment: Align(4), isSpillSlot: false);
8857 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8858
8859 SDValue Store = DAG.getStore(Chain, dl, Val: SINT.getOperand(i: 0), Ptr: FIdx,
8860 PtrInfo: MachinePointerInfo::getFixedStack(
8861 MF&: DAG.getMachineFunction(), FI: FrameIdx));
8862 Chain = Store;
8863
8864 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8865 "Expected an i32 store");
8866
8867 RLI.Ptr = FIdx;
8868 RLI.Chain = Chain;
8869 RLI.MPI =
8870 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8871 RLI.Alignment = Align(4);
8872
8873 MachineMemOperand *MMO =
8874 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8875 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8876 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8877 Bits = DAG.getMemIntrinsicNode(Opcode: SINT.getOpcode() == ISD::ZERO_EXTEND ?
8878 PPCISD::LFIWZX : PPCISD::LFIWAX,
8879 dl, VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8880 Ops, MemVT: MVT::i32, MMO);
8881 Chain = Bits.getValue(R: 1);
8882 } else
8883 Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f64, Operand: SINT);
8884
8885 SDValue FP = convertIntToFP(Op, Src: Bits, DAG, Subtarget, Chain);
8886 if (IsStrict)
8887 Chain = FP.getValue(R: 1);
8888
8889 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8890 if (IsStrict)
8891 FP = DAG.getNode(
8892 Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8893 Ops: {Chain, FP, DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)},
8894 Flags);
8895 else
8896 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8897 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
8898 }
8899 return FP;
8900 }
8901
8902 assert(Src.getValueType() == MVT::i32 &&
8903 "Unhandled INT_TO_FP type in custom expander!");
8904 // Since we only generate this in 64-bit mode, we can take advantage of
8905 // 64-bit registers. In particular, sign extend the input value into the
8906 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8907 // then lfd it and fcfid it.
8908 MachineFunction &MF = DAG.getMachineFunction();
8909 MachineFrameInfo &MFI = MF.getFrameInfo();
8910 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8911
8912 SDValue Ld;
8913 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8914 ReuseLoadInfo RLI;
8915 bool ReusingLoad;
8916 if (!(ReusingLoad = canReuseLoadAddress(Op: Src, MemVT: MVT::i32, RLI, DAG))) {
8917 int FrameIdx = MFI.CreateStackObject(Size: 4, Alignment: Align(4), isSpillSlot: false);
8918 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8919
8920 SDValue Store = DAG.getStore(Chain, dl, Val: Src, Ptr: FIdx,
8921 PtrInfo: MachinePointerInfo::getFixedStack(
8922 MF&: DAG.getMachineFunction(), FI: FrameIdx));
8923 Chain = Store;
8924
8925 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8926 "Expected an i32 store");
8927
8928 RLI.Ptr = FIdx;
8929 RLI.Chain = Chain;
8930 RLI.MPI =
8931 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8932 RLI.Alignment = Align(4);
8933 }
8934
8935 MachineMemOperand *MMO =
8936 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8937 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8938 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8939 Ld = DAG.getMemIntrinsicNode(Opcode: IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8940 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops,
8941 MemVT: MVT::i32, MMO);
8942 Chain = Ld.getValue(R: 1);
8943 if (ReusingLoad && RLI.ResChain) {
8944 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Ld.getValue(R: 1));
8945 }
8946 } else {
8947 assert(Subtarget.isPPC64() &&
8948 "i32->FP without LFIWAX supported only on PPC64");
8949
8950 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
8951 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8952
8953 SDValue Ext64 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i64, Operand: Src);
8954
8955 // STD the extended value into the stack slot.
8956 SDValue Store = DAG.getStore(
8957 Chain, dl, Val: Ext64, Ptr: FIdx,
8958 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8959 Chain = Store;
8960
8961 // Load the value as a double.
8962 Ld = DAG.getLoad(
8963 VT: MVT::f64, dl, Chain, Ptr: FIdx,
8964 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8965 Chain = Ld.getValue(R: 1);
8966 }
8967
8968 // FCFID it and return it.
8969 SDValue FP = convertIntToFP(Op, Src: Ld, DAG, Subtarget, Chain);
8970 if (IsStrict)
8971 Chain = FP.getValue(R: 1);
8972 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973 if (IsStrict)
8974 FP = DAG.getNode(
8975 Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8976 Ops: {Chain, FP, DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)}, Flags);
8977 else
8978 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8979 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
8980 }
8981 return FP;
8982}
8983
8984SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8985 SelectionDAG &DAG) const {
8986 SDLoc Dl(Op);
8987 MachineFunction &MF = DAG.getMachineFunction();
8988 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8989 SDValue Chain = Op.getOperand(i: 0);
8990
8991 // If requested mode is constant, just use simpler mtfsb/mffscrni
8992 if (auto *CVal = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) {
8993 uint64_t Mode = CVal->getZExtValue();
8994 assert(Mode < 4 && "Unsupported rounding mode!");
8995 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8996 if (Subtarget.isISA3_0())
8997 return SDValue(
8998 DAG.getMachineNode(
8999 Opcode: PPC::MFFSCRNI, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9000 Ops: {DAG.getConstant(Val: InternalRnd, DL: Dl, VT: MVT::i32, isTarget: true), Chain}),
9001 1);
9002 SDNode *SetHi = DAG.getMachineNode(
9003 Opcode: (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9004 Ops: {DAG.getConstant(Val: 30, DL: Dl, VT: MVT::i32, isTarget: true), Chain});
9005 SDNode *SetLo = DAG.getMachineNode(
9006 Opcode: (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9007 Ops: {DAG.getConstant(Val: 31, DL: Dl, VT: MVT::i32, isTarget: true), SDValue(SetHi, 0)});
9008 return SDValue(SetLo, 0);
9009 }
9010
9011 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9012 SDValue One = DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32);
9013 SDValue SrcFlag = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: Op.getOperand(i: 1),
9014 N2: DAG.getConstant(Val: 3, DL: Dl, VT: MVT::i32));
9015 SDValue DstFlag = DAG.getNode(
9016 Opcode: ISD::XOR, DL: Dl, VT: MVT::i32, N1: SrcFlag,
9017 N2: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32,
9018 N1: DAG.getNOT(DL: Dl,
9019 Val: DAG.getNode(Opcode: ISD::SRL, DL: Dl, VT: MVT::i32, N1: SrcFlag, N2: One),
9020 VT: MVT::i32),
9021 N2: One));
9022 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9023 SDValue MFFS;
9024 if (!Subtarget.isISA3_0()) {
9025 MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: Dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9026 Chain = MFFS.getValue(R: 1);
9027 }
9028 SDValue NewFPSCR;
9029 if (Subtarget.isPPC64()) {
9030 if (Subtarget.isISA3_0()) {
9031 NewFPSCR = DAG.getAnyExtOrTrunc(Op: DstFlag, DL: Dl, VT: MVT::i64);
9032 } else {
9033 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9034 SDNode *InsertRN = DAG.getMachineNode(
9035 Opcode: PPC::RLDIMI, dl: Dl, VT: MVT::i64,
9036 Ops: {DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::i64, Operand: MFFS),
9037 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: Dl, VT: MVT::i64, Operand: DstFlag),
9038 DAG.getTargetConstant(Val: 0, DL: Dl, VT: MVT::i32),
9039 DAG.getTargetConstant(Val: 62, DL: Dl, VT: MVT::i32)});
9040 NewFPSCR = SDValue(InsertRN, 0);
9041 }
9042 NewFPSCR = DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::f64, Operand: NewFPSCR);
9043 } else {
9044 // In 32-bit mode, store f64, load and update the lower half.
9045 int SSFI = MF.getFrameInfo().CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9046 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9047 SDValue Addr = Subtarget.isLittleEndian()
9048 ? StackSlot
9049 : DAG.getNode(Opcode: ISD::ADD, DL: Dl, VT: PtrVT, N1: StackSlot,
9050 N2: DAG.getConstant(Val: 4, DL: Dl, VT: PtrVT));
9051 if (Subtarget.isISA3_0()) {
9052 Chain = DAG.getStore(Chain, dl: Dl, Val: DstFlag, Ptr: Addr, PtrInfo: MachinePointerInfo());
9053 } else {
9054 Chain = DAG.getStore(Chain, dl: Dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9055 SDValue Tmp =
9056 DAG.getLoad(VT: MVT::i32, dl: Dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo());
9057 Chain = Tmp.getValue(R: 1);
9058 Tmp = SDValue(DAG.getMachineNode(
9059 Opcode: PPC::RLWIMI, dl: Dl, VT: MVT::i32,
9060 Ops: {Tmp, DstFlag, DAG.getTargetConstant(Val: 0, DL: Dl, VT: MVT::i32),
9061 DAG.getTargetConstant(Val: 30, DL: Dl, VT: MVT::i32),
9062 DAG.getTargetConstant(Val: 31, DL: Dl, VT: MVT::i32)}),
9063 0);
9064 Chain = DAG.getStore(Chain, dl: Dl, Val: Tmp, Ptr: Addr, PtrInfo: MachinePointerInfo());
9065 }
9066 NewFPSCR =
9067 DAG.getLoad(VT: MVT::f64, dl: Dl, Chain, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9068 Chain = NewFPSCR.getValue(R: 1);
9069 }
9070 if (Subtarget.isISA3_0())
9071 return SDValue(DAG.getMachineNode(Opcode: PPC::MFFSCRN, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9072 Ops: {NewFPSCR, Chain}),
9073 1);
9074 SDValue Zero = DAG.getConstant(Val: 0, DL: Dl, VT: MVT::i32, isTarget: true);
9075 SDNode *MTFSF = DAG.getMachineNode(
9076 Opcode: PPC::MTFSF, dl: Dl, VT: MVT::Other,
9077 Ops: {DAG.getConstant(Val: 255, DL: Dl, VT: MVT::i32, isTarget: true), NewFPSCR, Zero, Zero, Chain});
9078 return SDValue(MTFSF, 0);
9079}
9080
9081SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9082 SelectionDAG &DAG) const {
9083 SDLoc dl(Op);
9084 /*
9085 The rounding mode is in bits 30:31 of FPSR, and has the following
9086 settings:
9087 00 Round to nearest
9088 01 Round to 0
9089 10 Round to +inf
9090 11 Round to -inf
9091
9092 GET_ROUNDING, on the other hand, expects the following:
9093 -1 Undefined
9094 0 Round to 0
9095 1 Round to nearest
9096 2 Round to +inf
9097 3 Round to -inf
9098
9099 To perform the conversion, we do:
9100 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9101 */
9102
9103 MachineFunction &MF = DAG.getMachineFunction();
9104 EVT VT = Op.getValueType();
9105 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9106
9107 // Save FP Control Word to register
9108 SDValue Chain = Op.getOperand(i: 0);
9109 SDValue MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9110 Chain = MFFS.getValue(R: 1);
9111
9112 SDValue CWD;
9113 if (isTypeLegal(VT: MVT::i64)) {
9114 CWD = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32,
9115 Operand: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: MFFS));
9116 } else {
9117 // Save FP register to stack slot
9118 int SSFI = MF.getFrameInfo().CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9119 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9120 Chain = DAG.getStore(Chain, dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9121
9122 // Load FP Control Word from low 32 bits of stack slot.
9123 assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9124 "Stack slot adjustment is valid only on big endian subtargets!");
9125 SDValue Four = DAG.getConstant(Val: 4, DL: dl, VT: PtrVT);
9126 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackSlot, N2: Four);
9127 CWD = DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo());
9128 Chain = CWD.getValue(R: 1);
9129 }
9130
9131 // Transform as necessary
9132 SDValue CWD1 =
9133 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9134 N1: CWD, N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32));
9135 SDValue CWD2 =
9136 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
9137 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9138 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32,
9139 N1: CWD, N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)),
9140 N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)),
9141 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
9142
9143 SDValue RetVal =
9144 DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1, N2: CWD2);
9145
9146 RetVal =
9147 DAG.getNode(Opcode: (VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9148 DL: dl, VT, Operand: RetVal);
9149
9150 return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
9151}
9152
9153SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9154 EVT VT = Op.getValueType();
9155 uint64_t BitWidth = VT.getSizeInBits();
9156 SDLoc dl(Op);
9157 assert(Op.getNumOperands() == 3 &&
9158 VT == Op.getOperand(1).getValueType() &&
9159 "Unexpected SHL!");
9160
9161 // Expand into a bunch of logical ops. Note that these ops
9162 // depend on the PPC behavior for oversized shift amounts.
9163 SDValue Lo = Op.getOperand(i: 0);
9164 SDValue Hi = Op.getOperand(i: 1);
9165 SDValue Amt = Op.getOperand(i: 2);
9166 EVT AmtVT = Amt.getValueType();
9167
9168 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9169 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9170 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Amt);
9171 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Tmp1);
9172 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR , DL: dl, VT, N1: Tmp2, N2: Tmp3);
9173 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9174 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9175 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Tmp5);
9176 SDValue OutHi = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9177 SDValue OutLo = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Amt);
9178 SDValue OutOps[] = { OutLo, OutHi };
9179 return DAG.getMergeValues(Ops: OutOps, dl);
9180}
9181
9182SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9183 EVT VT = Op.getValueType();
9184 SDLoc dl(Op);
9185 uint64_t BitWidth = VT.getSizeInBits();
9186 assert(Op.getNumOperands() == 3 &&
9187 VT == Op.getOperand(1).getValueType() &&
9188 "Unexpected SRL!");
9189
9190 // Expand into a bunch of logical ops. Note that these ops
9191 // depend on the PPC behavior for oversized shift amounts.
9192 SDValue Lo = Op.getOperand(i: 0);
9193 SDValue Hi = Op.getOperand(i: 1);
9194 SDValue Amt = Op.getOperand(i: 2);
9195 EVT AmtVT = Amt.getValueType();
9196
9197 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9198 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9199 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9200 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9201 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9202 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9203 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9204 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Tmp5);
9205 SDValue OutLo = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9206 SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Amt);
9207 SDValue OutOps[] = { OutLo, OutHi };
9208 return DAG.getMergeValues(Ops: OutOps, dl);
9209}
9210
9211SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9212 SDLoc dl(Op);
9213 EVT VT = Op.getValueType();
9214 uint64_t BitWidth = VT.getSizeInBits();
9215 assert(Op.getNumOperands() == 3 &&
9216 VT == Op.getOperand(1).getValueType() &&
9217 "Unexpected SRA!");
9218
9219 // Expand into a bunch of logical ops, followed by a select_cc.
9220 SDValue Lo = Op.getOperand(i: 0);
9221 SDValue Hi = Op.getOperand(i: 1);
9222 SDValue Amt = Op.getOperand(i: 2);
9223 EVT AmtVT = Amt.getValueType();
9224
9225 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9226 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9227 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9228 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9229 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9230 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9231 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9232 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Tmp5);
9233 SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Amt);
9234 SDValue OutLo = DAG.getSelectCC(DL: dl, LHS: Tmp5, RHS: DAG.getConstant(Val: 0, DL: dl, VT: AmtVT),
9235 True: Tmp4, False: Tmp6, Cond: ISD::SETLE);
9236 SDValue OutOps[] = { OutLo, OutHi };
9237 return DAG.getMergeValues(Ops: OutOps, dl);
9238}
9239
9240SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9241 SelectionDAG &DAG) const {
9242 SDLoc dl(Op);
9243 EVT VT = Op.getValueType();
9244 unsigned BitWidth = VT.getSizeInBits();
9245
9246 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9247 SDValue X = Op.getOperand(i: 0);
9248 SDValue Y = Op.getOperand(i: 1);
9249 SDValue Z = Op.getOperand(i: 2);
9250 EVT AmtVT = Z.getValueType();
9251
9252 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9253 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9254 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9255 // on PowerPC shift by BW being well defined.
9256 Z = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Z,
9257 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT: AmtVT));
9258 SDValue SubZ =
9259 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT, N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Z);
9260 X = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: X, N2: IsFSHL ? Z : SubZ);
9261 Y = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Y, N2: IsFSHL ? SubZ : Z);
9262 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: X, N2: Y);
9263}
9264
9265//===----------------------------------------------------------------------===//
9266// Vector related lowering.
9267//
9268
9269/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9270/// element size of SplatSize. Cast the result to VT.
9271static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9272 SelectionDAG &DAG, const SDLoc &dl) {
9273 static const MVT VTys[] = { // canonical VT to use for each size.
9274 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9275 };
9276
9277 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9278
9279 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9280 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9281 SplatSize = 1;
9282 Val = 0xFF;
9283 }
9284
9285 EVT CanonicalVT = VTys[SplatSize-1];
9286
9287 // Build a canonical splat for this value.
9288 // Explicitly truncate APInt here, as this API is used with a mix of
9289 // signed and unsigned values.
9290 return DAG.getBitcast(
9291 VT: ReqVT,
9292 V: DAG.getConstant(Val: APInt(64, Val).trunc(width: SplatSize * 8), DL: dl, VT: CanonicalVT));
9293}
9294
9295/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9296/// specified intrinsic ID.
9297static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9298 const SDLoc &dl, EVT DestVT = MVT::Other) {
9299 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9300 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9301 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op);
9302}
9303
9304/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9305/// specified intrinsic ID.
9306static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9307 SelectionDAG &DAG, const SDLoc &dl,
9308 EVT DestVT = MVT::Other) {
9309 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9310 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9311 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: LHS, N3: RHS);
9312}
9313
9314/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9315/// specified intrinsic ID.
9316static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9317 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9318 EVT DestVT = MVT::Other) {
9319 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9320 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9321 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op0, N3: Op1, N4: Op2);
9322}
9323
9324/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9325/// amount. The result has the specified value type.
9326static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9327 SelectionDAG &DAG, const SDLoc &dl) {
9328 // Force LHS/RHS to be the right type.
9329 LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: LHS);
9330 RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: RHS);
9331
9332 int Ops[16];
9333 for (unsigned i = 0; i != 16; ++i)
9334 Ops[i] = i + Amt;
9335 SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: LHS, N2: RHS, Mask: Ops);
9336 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9337}
9338
9339/// Do we have an efficient pattern in a .td file for this node?
9340///
9341/// \param V - pointer to the BuildVectorSDNode being matched
9342/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9343///
9344/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9345/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9346/// the opposite is true (expansion is beneficial) are:
9347/// - The node builds a vector out of integers that are not 32 or 64-bits
9348/// - The node builds a vector out of constants
9349/// - The node is a "load-and-splat"
9350/// In all other cases, we will choose to keep the BUILD_VECTOR.
9351static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9352 bool HasDirectMove,
9353 bool HasP8Vector) {
9354 EVT VecVT = V->getValueType(ResNo: 0);
9355 bool RightType = VecVT == MVT::v2f64 ||
9356 (HasP8Vector && VecVT == MVT::v4f32) ||
9357 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9358 if (!RightType)
9359 return false;
9360
9361 bool IsSplat = true;
9362 bool IsLoad = false;
9363 SDValue Op0 = V->getOperand(Num: 0);
9364
9365 // This function is called in a block that confirms the node is not a constant
9366 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9367 // different constants.
9368 if (V->isConstant())
9369 return false;
9370 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9371 if (V->getOperand(Num: i).isUndef())
9372 return false;
9373 // We want to expand nodes that represent load-and-splat even if the
9374 // loaded value is a floating point truncation or conversion to int.
9375 if (V->getOperand(Num: i).getOpcode() == ISD::LOAD ||
9376 (V->getOperand(Num: i).getOpcode() == ISD::FP_ROUND &&
9377 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD) ||
9378 (V->getOperand(Num: i).getOpcode() == ISD::FP_TO_SINT &&
9379 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD) ||
9380 (V->getOperand(Num: i).getOpcode() == ISD::FP_TO_UINT &&
9381 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD))
9382 IsLoad = true;
9383 // If the operands are different or the input is not a load and has more
9384 // uses than just this BV node, then it isn't a splat.
9385 if (V->getOperand(Num: i) != Op0 ||
9386 (!IsLoad && !V->isOnlyUserOf(N: V->getOperand(Num: i).getNode())))
9387 IsSplat = false;
9388 }
9389 return !(IsSplat && IsLoad);
9390}
9391
9392// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9393SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9394
9395 SDLoc dl(Op);
9396 SDValue Op0 = Op->getOperand(Num: 0);
9397
9398 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9399 (Op.getValueType() != MVT::f128))
9400 return SDValue();
9401
9402 SDValue Lo = Op0.getOperand(i: 0);
9403 SDValue Hi = Op0.getOperand(i: 1);
9404 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9405 return SDValue();
9406
9407 if (!Subtarget.isLittleEndian())
9408 std::swap(a&: Lo, b&: Hi);
9409
9410 return DAG.getNode(Opcode: PPCISD::BUILD_FP128, DL: dl, VT: MVT::f128, N1: Lo, N2: Hi);
9411}
9412
9413static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9414 const SDValue *InputLoad = &Op;
9415 while (InputLoad->getOpcode() == ISD::BITCAST)
9416 InputLoad = &InputLoad->getOperand(i: 0);
9417 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9418 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9419 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9420 InputLoad = &InputLoad->getOperand(i: 0);
9421 }
9422 if (InputLoad->getOpcode() != ISD::LOAD)
9423 return nullptr;
9424 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
9425 return ISD::isNormalLoad(N: LD) ? InputLoad : nullptr;
9426}
9427
9428// Convert the argument APFloat to a single precision APFloat if there is no
9429// loss in information during the conversion to single precision APFloat and the
9430// resulting number is not a denormal number. Return true if successful.
9431bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9432 APFloat APFloatToConvert = ArgAPFloat;
9433 bool LosesInfo = true;
9434 APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9435 losesInfo: &LosesInfo);
9436 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9437 if (Success)
9438 ArgAPFloat = APFloatToConvert;
9439 return Success;
9440}
9441
9442// Bitcast the argument APInt to a double and convert it to a single precision
9443// APFloat, bitcast the APFloat to an APInt and assign it to the original
9444// argument if there is no loss in information during the conversion from
9445// double to single precision APFloat and the resulting number is not a denormal
9446// number. Return true if successful.
9447bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9448 double DpValue = ArgAPInt.bitsToDouble();
9449 APFloat APFloatDp(DpValue);
9450 bool Success = convertToNonDenormSingle(ArgAPFloat&: APFloatDp);
9451 if (Success)
9452 ArgAPInt = APFloatDp.bitcastToAPInt();
9453 return Success;
9454}
9455
9456// Nondestructive check for convertTonNonDenormSingle.
9457bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9458 // Only convert if it loses info, since XXSPLTIDP should
9459 // handle the other case.
9460 APFloat APFloatToConvert = ArgAPFloat;
9461 bool LosesInfo = true;
9462 APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9463 losesInfo: &LosesInfo);
9464
9465 return (!LosesInfo && !APFloatToConvert.isDenormal());
9466}
9467
9468static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9469 unsigned &Opcode) {
9470 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Val: Op.getOperand(i: 0));
9471 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(N: InputNode))
9472 return false;
9473
9474 EVT Ty = Op->getValueType(ResNo: 0);
9475 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9476 // as we cannot handle extending loads for these types.
9477 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9478 ISD::isNON_EXTLoad(N: InputNode))
9479 return true;
9480
9481 EVT MemVT = InputNode->getMemoryVT();
9482 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9483 // memory VT is the same vector element VT type.
9484 // The loads feeding into the v8i16 and v16i8 types will be extending because
9485 // scalar i8/i16 are not legal types.
9486 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(N: InputNode) &&
9487 (MemVT == Ty.getVectorElementType()))
9488 return true;
9489
9490 if (Ty == MVT::v2i64) {
9491 // Check the extend type, when the input type is i32, and the output vector
9492 // type is v2i64.
9493 if (MemVT == MVT::i32) {
9494 if (ISD::isZEXTLoad(N: InputNode))
9495 Opcode = PPCISD::ZEXT_LD_SPLAT;
9496 if (ISD::isSEXTLoad(N: InputNode))
9497 Opcode = PPCISD::SEXT_LD_SPLAT;
9498 }
9499 return true;
9500 }
9501 return false;
9502}
9503
9504bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
9505 bool IsLittleEndian) {
9506 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9507
9508 BitMask.clearAllBits();
9509 EVT VT = BVN.getValueType(ResNo: 0);
9510 unsigned VTSize = VT.getSizeInBits();
9511 APInt ConstValue(VTSize, 0);
9512
9513 unsigned EltWidth = VT.getScalarSizeInBits();
9514
9515 unsigned BitPos = 0;
9516 for (auto OpVal : BVN.op_values()) {
9517 auto *CN = dyn_cast<ConstantSDNode>(Val&: OpVal);
9518
9519 if (!CN)
9520 return false;
9521 // The elements in a vector register are ordered in reverse byte order
9522 // between little-endian and big-endian modes.
9523 ConstValue.insertBits(SubBits: CN->getAPIntValue().zextOrTrunc(width: EltWidth),
9524 bitPosition: IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9525 BitPos += EltWidth;
9526 }
9527
9528 for (unsigned J = 0; J < 16; ++J) {
9529 APInt ExtractValue = ConstValue.extractBits(numBits: 8, bitPosition: J * 8);
9530 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9531 return false;
9532 if (ExtractValue == 0xFF)
9533 BitMask.setBit(J);
9534 }
9535 return true;
9536}
9537
9538// If this is a case we can't handle, return null and let the default
9539// expansion code take care of it. If we CAN select this case, and if it
9540// selects to a single instruction, return Op. Otherwise, if we can codegen
9541// this case more efficiently than a constant pool load, lower it to the
9542// sequence of ops that should be used.
9543SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9544 SelectionDAG &DAG) const {
9545 SDLoc dl(Op);
9546 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: Op.getNode());
9547 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9548
9549 if (Subtarget.hasP10Vector()) {
9550 APInt BitMask(32, 0);
9551 // If the value of the vector is all zeros or all ones,
9552 // we do not convert it to MTVSRBMI.
9553 // The xxleqv instruction sets a vector with all ones.
9554 // The xxlxor instruction sets a vector with all zeros.
9555 if (isValidMtVsrBmi(BitMask, BVN&: *BVN, IsLittleEndian: Subtarget.isLittleEndian()) &&
9556 BitMask != 0 && BitMask != 0xffff) {
9557 SDValue SDConstant = DAG.getTargetConstant(Val: BitMask, DL: dl, VT: MVT::i32);
9558 MachineSDNode *MSDNode =
9559 DAG.getMachineNode(Opcode: PPC::MTVSRBMI, dl, VT: MVT::v16i8, Op1: SDConstant);
9560 SDValue SDV = SDValue(MSDNode, 0);
9561 EVT DVT = BVN->getValueType(ResNo: 0);
9562 EVT SVT = SDV.getValueType();
9563 if (SVT != DVT) {
9564 SDV = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: DVT, Operand: SDV);
9565 }
9566 return SDV;
9567 }
9568 // Recognize build vector patterns to emit VSX vector instructions
9569 // instead of loading value from memory.
9570 if (SDValue VecPat = combineBVLoadsSpecialValue(Operand: Op, DAG))
9571 return VecPat;
9572 }
9573 // Check if this is a splat of a constant value.
9574 APInt APSplatBits, APSplatUndef;
9575 unsigned SplatBitSize;
9576 bool HasAnyUndefs;
9577 bool BVNIsConstantSplat =
9578 BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
9579 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian());
9580
9581 // If it is a splat of a double, check if we can shrink it to a 32 bit
9582 // non-denormal float which when converted back to double gives us the same
9583 // double. This is to exploit the XXSPLTIDP instruction.
9584 // If we lose precision, we use XXSPLTI32DX.
9585 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9586 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9587 // Check the type first to short-circuit so we don't modify APSplatBits if
9588 // this block isn't executed.
9589 if ((Op->getValueType(ResNo: 0) == MVT::v2f64) &&
9590 convertToNonDenormSingle(ArgAPInt&: APSplatBits)) {
9591 SDValue SplatNode = DAG.getNode(
9592 Opcode: PPCISD::XXSPLTI_SP_TO_DP, DL: dl, VT: MVT::v2f64,
9593 Operand: DAG.getTargetConstant(Val: APSplatBits.getZExtValue(), DL: dl, VT: MVT::i32));
9594 return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9595 } else {
9596 // We may lose precision, so we have to use XXSPLTI32DX.
9597
9598 uint32_t Hi = Hi_32(Value: APSplatBits.getZExtValue());
9599 uint32_t Lo = Lo_32(Value: APSplatBits.getZExtValue());
9600 SDValue SplatNode = DAG.getUNDEF(VT: MVT::v2i64);
9601
9602 if (!Hi || !Lo)
9603 // If either load is 0, then we should generate XXLXOR to set to 0.
9604 SplatNode = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::v2i64);
9605
9606 if (Hi)
9607 SplatNode = DAG.getNode(
9608 Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9609 N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
9610 N3: DAG.getTargetConstant(Val: Hi, DL: dl, VT: MVT::i32));
9611
9612 if (Lo)
9613 SplatNode =
9614 DAG.getNode(Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9615 N2: DAG.getTargetConstant(Val: 1, DL: dl, VT: MVT::i32),
9616 N3: DAG.getTargetConstant(Val: Lo, DL: dl, VT: MVT::i32));
9617
9618 return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9619 }
9620 }
9621
9622 bool IsSplat64 = false;
9623 uint64_t SplatBits = 0;
9624 int32_t SextVal = 0;
9625 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9626 SplatBits = APSplatBits.getZExtValue();
9627 if (SplatBitSize <= 32) {
9628 SextVal = SignExtend32(X: SplatBits, B: SplatBitSize);
9629 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9630 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9631 bool P9Vector = Subtarget.hasP9Vector();
9632 int32_t Hi = P9Vector ? 127 : 15;
9633 int32_t Lo = P9Vector ? -128 : -16;
9634 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9635 SextVal = static_cast<int32_t>(SplatBits);
9636 }
9637 }
9638
9639 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9640 unsigned NewOpcode = PPCISD::LD_SPLAT;
9641
9642 // Handle load-and-splat patterns as we have instructions that will do this
9643 // in one go.
9644 if (DAG.isSplatValue(V: Op, AllowUndefs: true) &&
9645 isValidSplatLoad(Subtarget, Op, Opcode&: NewOpcode)) {
9646 const SDValue *InputLoad = &Op.getOperand(i: 0);
9647 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
9648
9649 // If the input load is an extending load, it will be an i32 -> i64
9650 // extending load and isValidSplatLoad() will update NewOpcode.
9651 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9652 unsigned ElementSize =
9653 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9654
9655 assert(((ElementSize == 2 * MemorySize)
9656 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9657 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9658 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9659 "Unmatched element size and opcode!\n");
9660
9661 // Checking for a single use of this load, we have to check for vector
9662 // width (128 bits) / ElementSize uses (since each operand of the
9663 // BUILD_VECTOR is a separate use of the value.
9664 unsigned NumUsesOfInputLD = 128 / ElementSize;
9665 for (SDValue BVInOp : Op->ops())
9666 if (BVInOp.isUndef())
9667 NumUsesOfInputLD--;
9668
9669 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9670 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9671 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9672 // 15", but function IsValidSplatLoad() now will only return true when
9673 // the data at index 0 is not nullptr. So we will not get into trouble for
9674 // these cases.
9675 //
9676 // case 1 - lfiwzx/lfiwax
9677 // 1.1: load result is i32 and is sign/zero extend to i64;
9678 // 1.2: build a v2i64 vector type with above loaded value;
9679 // 1.3: the vector has only one value at index 0, others are all undef;
9680 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9681 if (NumUsesOfInputLD == 1 &&
9682 (Op->getValueType(ResNo: 0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9683 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9684 Subtarget.hasLFIWAX()))
9685 return SDValue();
9686
9687 // case 2 - lxvr[hb]x
9688 // 2.1: load result is at most i16;
9689 // 2.2: build a vector with above loaded value;
9690 // 2.3: the vector has only one value at index 0, others are all undef;
9691 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9692 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9693 Subtarget.isISA3_1() && ElementSize <= 16)
9694 return SDValue();
9695
9696 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9697 if (InputLoad->getNode()->hasNUsesOfValue(NUses: NumUsesOfInputLD, Value: 0) &&
9698 Subtarget.hasVSX()) {
9699 SDValue Ops[] = {
9700 LD->getChain(), // Chain
9701 LD->getBasePtr(), // Ptr
9702 DAG.getValueType(Op.getValueType()) // VT
9703 };
9704 SDValue LdSplt = DAG.getMemIntrinsicNode(
9705 Opcode: NewOpcode, dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other), Ops,
9706 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
9707 // Replace all uses of the output chain of the original load with the
9708 // output chain of the new load.
9709 DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: 1),
9710 To: LdSplt.getValue(R: 1));
9711 return LdSplt;
9712 }
9713 }
9714
9715 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9716 // 32-bits can be lowered to VSX instructions under certain conditions.
9717 // Without VSX, there is no pattern more efficient than expanding the node.
9718 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9719 haveEfficientBuildVectorPattern(V: BVN, HasDirectMove: Subtarget.hasDirectMove(),
9720 HasP8Vector: Subtarget.hasP8Vector()))
9721 return Op;
9722 return SDValue();
9723 }
9724
9725 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9726 unsigned SplatSize = SplatBitSize / 8;
9727
9728 // First, handle single instruction cases.
9729
9730 // All zeros?
9731 if (SplatBits == 0) {
9732 // Canonicalize all zero vectors to be v4i32.
9733 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9734 SDValue Z = DAG.getConstant(Val: 0, DL: dl, VT: MVT::v4i32);
9735 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Z);
9736 }
9737 return Op;
9738 }
9739
9740 // We have XXSPLTIW for constant splats four bytes wide.
9741 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9742 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9743 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9744 // turned into a 4-byte splat of 0xABABABAB.
9745 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9746 return getCanonicalConstSplat(Val: SplatBits | (SplatBits << 16), SplatSize: SplatSize * 2,
9747 VT: Op.getValueType(), DAG, dl);
9748
9749 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9750 return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9751 dl);
9752
9753 // We have XXSPLTIB for constant splats one byte wide.
9754 if (Subtarget.hasP9Vector() && SplatSize == 1)
9755 return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9756 dl);
9757
9758 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9759 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9760 if (SextVal >= -16 && SextVal <= 15) {
9761 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9762 // generate a splat word with extend for size 8.
9763 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9764 SDValue Res =
9765 getCanonicalConstSplat(Val: SextVal, SplatSize: UseSize, VT: Op.getValueType(), DAG, dl);
9766 if (SplatSize != 8)
9767 return Res;
9768 return BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vupklsw, Op: Res, DAG, dl);
9769 }
9770
9771 // Two instruction sequences.
9772
9773 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9774 SDValue C = DAG.getConstant(Val: (unsigned char)SextVal, DL: dl, VT: MVT::i32);
9775 SmallVector<SDValue, 16> Ops(16, C);
9776 SDValue BV = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops);
9777 unsigned IID;
9778 EVT VT;
9779 switch (SplatSize) {
9780 default:
9781 llvm_unreachable("Unexpected type for vector constant.");
9782 case 2:
9783 IID = Intrinsic::ppc_altivec_vupklsb;
9784 VT = MVT::v8i16;
9785 break;
9786 case 4:
9787 IID = Intrinsic::ppc_altivec_vextsb2w;
9788 VT = MVT::v4i32;
9789 break;
9790 case 8:
9791 IID = Intrinsic::ppc_altivec_vextsb2d;
9792 VT = MVT::v2i64;
9793 break;
9794 }
9795 SDValue Extend = BuildIntrinsicOp(IID, Op: BV, DAG, dl, DestVT: VT);
9796 return DAG.getBitcast(VT: Op->getValueType(ResNo: 0), V: Extend);
9797 }
9798 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9799
9800 // If this value is in the range [-32,30] and is even, use:
9801 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9802 // If this value is in the range [17,31] and is odd, use:
9803 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9804 // If this value is in the range [-31,-17] and is odd, use:
9805 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9806 // Note the last two are three-instruction sequences.
9807 if (SextVal >= -32 && SextVal <= 31) {
9808 // To avoid having these optimizations undone by constant folding,
9809 // we convert to a pseudo that will be expanded later into one of
9810 // the above forms.
9811 SDValue Elt = DAG.getSignedConstant(Val: SextVal, DL: dl, VT: MVT::i32);
9812 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9813 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9814 SDValue EltSize = DAG.getConstant(Val: SplatSize, DL: dl, VT: MVT::i32);
9815 SDValue RetVal = DAG.getNode(Opcode: PPCISD::VADD_SPLAT, DL: dl, VT, N1: Elt, N2: EltSize);
9816 if (VT == Op.getValueType())
9817 return RetVal;
9818 else
9819 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: RetVal);
9820 }
9821
9822 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9823 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9824 // for fneg/fabs.
9825 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9826 // Make -1 and vspltisw -1:
9827 SDValue OnesV = getCanonicalConstSplat(Val: -1, SplatSize: 4, VT: MVT::v4i32, DAG, dl);
9828
9829 // Make the VSLW intrinsic, computing 0x8000_0000.
9830 SDValue Res = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: OnesV,
9831 RHS: OnesV, DAG, dl);
9832
9833 // xor by OnesV to invert it.
9834 Res = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::v4i32, N1: Res, N2: OnesV);
9835 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9836 }
9837
9838 // Check to see if this is a wide variety of vsplti*, binop self cases.
9839 static const signed char SplatCsts[] = {
9840 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9841 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9842 };
9843
9844 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9845 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9846 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9847 int i = SplatCsts[idx];
9848
9849 // Figure out what shift amount will be used by altivec if shifted by i in
9850 // this splat size.
9851 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9852
9853 // vsplti + shl self.
9854 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9855 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9856 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9857 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9858 Intrinsic::ppc_altivec_vslw
9859 };
9860 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9861 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9862 }
9863
9864 // vsplti + srl self.
9865 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9866 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9867 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9868 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9869 Intrinsic::ppc_altivec_vsrw
9870 };
9871 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9872 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9873 }
9874
9875 // vsplti + rol self.
9876 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9877 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9878 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9879 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9880 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9881 Intrinsic::ppc_altivec_vrlw
9882 };
9883 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9884 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9885 }
9886
9887 // t = vsplti c, result = vsldoi t, t, 1
9888 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9889 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9890 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9891 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9892 }
9893 // t = vsplti c, result = vsldoi t, t, 2
9894 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9895 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9896 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9897 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9898 }
9899 // t = vsplti c, result = vsldoi t, t, 3
9900 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9901 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9902 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9903 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9904 }
9905 }
9906
9907 return SDValue();
9908}
9909
9910/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9911/// the specified operations to build the shuffle.
9912static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9913 SDValue RHS, SelectionDAG &DAG,
9914 const SDLoc &dl) {
9915 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9916 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9917 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9918
9919 enum {
9920 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9921 OP_VMRGHW,
9922 OP_VMRGLW,
9923 OP_VSPLTISW0,
9924 OP_VSPLTISW1,
9925 OP_VSPLTISW2,
9926 OP_VSPLTISW3,
9927 OP_VSLDOI4,
9928 OP_VSLDOI8,
9929 OP_VSLDOI12
9930 };
9931
9932 if (OpNum == OP_COPY) {
9933 if (LHSID == (1*9+2)*9+3) return LHS;
9934 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9935 return RHS;
9936 }
9937
9938 SDValue OpLHS, OpRHS;
9939 OpLHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9940 OpRHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9941
9942 int ShufIdxs[16];
9943 switch (OpNum) {
9944 default: llvm_unreachable("Unknown i32 permute!");
9945 case OP_VMRGHW:
9946 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9947 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9948 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9949 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9950 break;
9951 case OP_VMRGLW:
9952 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9953 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9954 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9955 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9956 break;
9957 case OP_VSPLTISW0:
9958 for (unsigned i = 0; i != 16; ++i)
9959 ShufIdxs[i] = (i&3)+0;
9960 break;
9961 case OP_VSPLTISW1:
9962 for (unsigned i = 0; i != 16; ++i)
9963 ShufIdxs[i] = (i&3)+4;
9964 break;
9965 case OP_VSPLTISW2:
9966 for (unsigned i = 0; i != 16; ++i)
9967 ShufIdxs[i] = (i&3)+8;
9968 break;
9969 case OP_VSPLTISW3:
9970 for (unsigned i = 0; i != 16; ++i)
9971 ShufIdxs[i] = (i&3)+12;
9972 break;
9973 case OP_VSLDOI4:
9974 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 4, VT: OpLHS.getValueType(), DAG, dl);
9975 case OP_VSLDOI8:
9976 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 8, VT: OpLHS.getValueType(), DAG, dl);
9977 case OP_VSLDOI12:
9978 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 12, VT: OpLHS.getValueType(), DAG, dl);
9979 }
9980 EVT VT = OpLHS.getValueType();
9981 OpLHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpLHS);
9982 OpRHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpRHS);
9983 SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OpLHS, N2: OpRHS, Mask: ShufIdxs);
9984 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9985}
9986
9987/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9988/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9989/// SDValue.
9990SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9991 SelectionDAG &DAG) const {
9992 const unsigned BytesInVector = 16;
9993 bool IsLE = Subtarget.isLittleEndian();
9994 SDLoc dl(N);
9995 SDValue V1 = N->getOperand(Num: 0);
9996 SDValue V2 = N->getOperand(Num: 1);
9997 unsigned ShiftElts = 0, InsertAtByte = 0;
9998 bool Swap = false;
9999
10000 // Shifts required to get the byte we want at element 7.
10001 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10002 0, 15, 14, 13, 12, 11, 10, 9};
10003 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10004 1, 2, 3, 4, 5, 6, 7, 8};
10005
10006 ArrayRef<int> Mask = N->getMask();
10007 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10008
10009 // For each mask element, find out if we're just inserting something
10010 // from V2 into V1 or vice versa.
10011 // Possible permutations inserting an element from V2 into V1:
10012 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10013 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10014 // ...
10015 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10016 // Inserting from V1 into V2 will be similar, except mask range will be
10017 // [16,31].
10018
10019 bool FoundCandidate = false;
10020 // If both vector operands for the shuffle are the same vector, the mask
10021 // will contain only elements from the first one and the second one will be
10022 // undef.
10023 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10024 // Go through the mask of half-words to find an element that's being moved
10025 // from one vector to the other.
10026 for (unsigned i = 0; i < BytesInVector; ++i) {
10027 unsigned CurrentElement = Mask[i];
10028 // If 2nd operand is undefined, we should only look for element 7 in the
10029 // Mask.
10030 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10031 continue;
10032
10033 bool OtherElementsInOrder = true;
10034 // Examine the other elements in the Mask to see if they're in original
10035 // order.
10036 for (unsigned j = 0; j < BytesInVector; ++j) {
10037 if (j == i)
10038 continue;
10039 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10040 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10041 // in which we always assume we're always picking from the 1st operand.
10042 int MaskOffset =
10043 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10044 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10045 OtherElementsInOrder = false;
10046 break;
10047 }
10048 }
10049 // If other elements are in original order, we record the number of shifts
10050 // we need to get the element we want into element 7. Also record which byte
10051 // in the vector we should insert into.
10052 if (OtherElementsInOrder) {
10053 // If 2nd operand is undefined, we assume no shifts and no swapping.
10054 if (V2.isUndef()) {
10055 ShiftElts = 0;
10056 Swap = false;
10057 } else {
10058 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10059 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10060 : BigEndianShifts[CurrentElement & 0xF];
10061 Swap = CurrentElement < BytesInVector;
10062 }
10063 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10064 FoundCandidate = true;
10065 break;
10066 }
10067 }
10068
10069 if (!FoundCandidate)
10070 return SDValue();
10071
10072 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10073 // optionally with VECSHL if shift is required.
10074 if (Swap)
10075 std::swap(a&: V1, b&: V2);
10076 if (V2.isUndef())
10077 V2 = V1;
10078 if (ShiftElts) {
10079 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10080 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10081 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: Shl,
10082 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10083 }
10084 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: V2,
10085 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10086}
10087
10088/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10089/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10090/// SDValue.
10091SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10092 SelectionDAG &DAG) const {
10093 const unsigned NumHalfWords = 8;
10094 const unsigned BytesInVector = NumHalfWords * 2;
10095 // Check that the shuffle is on half-words.
10096 if (!isNByteElemShuffleMask(N, Width: 2, StepLen: 1))
10097 return SDValue();
10098
10099 bool IsLE = Subtarget.isLittleEndian();
10100 SDLoc dl(N);
10101 SDValue V1 = N->getOperand(Num: 0);
10102 SDValue V2 = N->getOperand(Num: 1);
10103 unsigned ShiftElts = 0, InsertAtByte = 0;
10104 bool Swap = false;
10105
10106 // Shifts required to get the half-word we want at element 3.
10107 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10108 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10109
10110 uint32_t Mask = 0;
10111 uint32_t OriginalOrderLow = 0x1234567;
10112 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10113 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10114 // 32-bit space, only need 4-bit nibbles per element.
10115 for (unsigned i = 0; i < NumHalfWords; ++i) {
10116 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10117 Mask |= ((uint32_t)(N->getMaskElt(Idx: i * 2) / 2) << MaskShift);
10118 }
10119
10120 // For each mask element, find out if we're just inserting something
10121 // from V2 into V1 or vice versa. Possible permutations inserting an element
10122 // from V2 into V1:
10123 // X, 1, 2, 3, 4, 5, 6, 7
10124 // 0, X, 2, 3, 4, 5, 6, 7
10125 // 0, 1, X, 3, 4, 5, 6, 7
10126 // 0, 1, 2, X, 4, 5, 6, 7
10127 // 0, 1, 2, 3, X, 5, 6, 7
10128 // 0, 1, 2, 3, 4, X, 6, 7
10129 // 0, 1, 2, 3, 4, 5, X, 7
10130 // 0, 1, 2, 3, 4, 5, 6, X
10131 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10132
10133 bool FoundCandidate = false;
10134 // Go through the mask of half-words to find an element that's being moved
10135 // from one vector to the other.
10136 for (unsigned i = 0; i < NumHalfWords; ++i) {
10137 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10138 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10139 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10140 uint32_t TargetOrder = 0x0;
10141
10142 // If both vector operands for the shuffle are the same vector, the mask
10143 // will contain only elements from the first one and the second one will be
10144 // undef.
10145 if (V2.isUndef()) {
10146 ShiftElts = 0;
10147 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10148 TargetOrder = OriginalOrderLow;
10149 Swap = false;
10150 // Skip if not the correct element or mask of other elements don't equal
10151 // to our expected order.
10152 if (MaskOneElt == VINSERTHSrcElem &&
10153 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10154 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10155 FoundCandidate = true;
10156 break;
10157 }
10158 } else { // If both operands are defined.
10159 // Target order is [8,15] if the current mask is between [0,7].
10160 TargetOrder =
10161 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10162 // Skip if mask of other elements don't equal our expected order.
10163 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10164 // We only need the last 3 bits for the number of shifts.
10165 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10166 : BigEndianShifts[MaskOneElt & 0x7];
10167 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10168 Swap = MaskOneElt < NumHalfWords;
10169 FoundCandidate = true;
10170 break;
10171 }
10172 }
10173 }
10174
10175 if (!FoundCandidate)
10176 return SDValue();
10177
10178 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10179 // optionally with VECSHL if shift is required.
10180 if (Swap)
10181 std::swap(a&: V1, b&: V2);
10182 if (V2.isUndef())
10183 V2 = V1;
10184 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10185 if (ShiftElts) {
10186 // Double ShiftElts because we're left shifting on v16i8 type.
10187 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10188 N3: DAG.getConstant(Val: 2 * ShiftElts, DL: dl, VT: MVT::i32));
10189 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: Shl);
10190 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10191 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10192 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10193 }
10194 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V2);
10195 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10196 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10197 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10198}
10199
10200/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10201/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10202/// return the default SDValue.
10203SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10204 SelectionDAG &DAG) const {
10205 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10206 // to v16i8. Peek through the bitcasts to get the actual operands.
10207 SDValue LHS = peekThroughBitcasts(V: SVN->getOperand(Num: 0));
10208 SDValue RHS = peekThroughBitcasts(V: SVN->getOperand(Num: 1));
10209
10210 auto ShuffleMask = SVN->getMask();
10211 SDValue VecShuffle(SVN, 0);
10212 SDLoc DL(SVN);
10213
10214 // Check that we have a four byte shuffle.
10215 if (!isNByteElemShuffleMask(N: SVN, Width: 4, StepLen: 1))
10216 return SDValue();
10217
10218 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10219 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10220 std::swap(a&: LHS, b&: RHS);
10221 VecShuffle = peekThroughBitcasts(V: DAG.getCommutedVectorShuffle(SV: *SVN));
10222 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(Val&: VecShuffle);
10223 if (!CommutedSV)
10224 return SDValue();
10225 ShuffleMask = CommutedSV->getMask();
10226 }
10227
10228 // Ensure that the RHS is a vector of constants.
10229 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: RHS.getNode());
10230 if (!BVN)
10231 return SDValue();
10232
10233 // Check if RHS is a splat of 4-bytes (or smaller).
10234 APInt APSplatValue, APSplatUndef;
10235 unsigned SplatBitSize;
10236 bool HasAnyUndefs;
10237 if (!BVN->isConstantSplat(SplatValue&: APSplatValue, SplatUndef&: APSplatUndef, SplatBitSize,
10238 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian()) ||
10239 SplatBitSize > 32)
10240 return SDValue();
10241
10242 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10243 // The instruction splats a constant C into two words of the source vector
10244 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10245 // Thus we check that the shuffle mask is the equivalent of
10246 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10247 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10248 // within each word are consecutive, so we only need to check the first byte.
10249 SDValue Index;
10250 bool IsLE = Subtarget.isLittleEndian();
10251 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10252 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10253 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10254 Index = DAG.getTargetConstant(Val: IsLE ? 0 : 1, DL, VT: MVT::i32);
10255 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10256 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10257 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10258 Index = DAG.getTargetConstant(Val: IsLE ? 1 : 0, DL, VT: MVT::i32);
10259 else
10260 return SDValue();
10261
10262 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10263 // for XXSPLTI32DX.
10264 unsigned SplatVal = APSplatValue.getZExtValue();
10265 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10266 SplatVal |= (SplatVal << SplatBitSize);
10267
10268 SDValue SplatNode = DAG.getNode(
10269 Opcode: PPCISD::XXSPLTI32DX, DL, VT: MVT::v2i64, N1: DAG.getBitcast(VT: MVT::v2i64, V: LHS),
10270 N2: Index, N3: DAG.getTargetConstant(Val: SplatVal, DL, VT: MVT::i32));
10271 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: SplatNode);
10272}
10273
10274/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10275/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10276/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10277/// i.e (or (shl x, C1), (srl x, 128-C1)).
10278SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10279 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10280 assert(Op.getValueType() == MVT::v1i128 &&
10281 "Only set v1i128 as custom, other type shouldn't reach here!");
10282 SDLoc dl(Op);
10283 SDValue N0 = peekThroughBitcasts(V: Op.getOperand(i: 0));
10284 SDValue N1 = peekThroughBitcasts(V: Op.getOperand(i: 1));
10285 unsigned SHLAmt = N1.getConstantOperandVal(i: 0);
10286 if (SHLAmt % 8 == 0) {
10287 std::array<int, 16> Mask;
10288 std::iota(first: Mask.begin(), last: Mask.end(), value: 0);
10289 std::rotate(first: Mask.begin(), middle: Mask.begin() + SHLAmt / 8, last: Mask.end());
10290 if (SDValue Shuffle =
10291 DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: DAG.getBitcast(VT: MVT::v16i8, V: N0),
10292 N2: DAG.getUNDEF(VT: MVT::v16i8), Mask))
10293 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: Shuffle);
10294 }
10295 SDValue ArgVal = DAG.getBitcast(VT: MVT::i128, V: N0);
10296 SDValue SHLOp = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ArgVal,
10297 N2: DAG.getConstant(Val: SHLAmt, DL: dl, VT: MVT::i32));
10298 SDValue SRLOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: ArgVal,
10299 N2: DAG.getConstant(Val: 128 - SHLAmt, DL: dl, VT: MVT::i32));
10300 SDValue OROp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i128, N1: SHLOp, N2: SRLOp);
10301 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: OROp);
10302}
10303
10304/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10305/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10306/// return the code it can be lowered into. Worst case, it can always be
10307/// lowered into a vperm.
10308SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10309 SelectionDAG &DAG) const {
10310 SDLoc dl(Op);
10311 SDValue V1 = Op.getOperand(i: 0);
10312 SDValue V2 = Op.getOperand(i: 1);
10313 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10314
10315 // Any nodes that were combined in the target-independent combiner prior
10316 // to vector legalization will not be sent to the target combine. Try to
10317 // combine it here.
10318 if (SDValue NewShuffle = combineVectorShuffle(SVN: SVOp, DAG)) {
10319 if (!isa<ShuffleVectorSDNode>(Val: NewShuffle))
10320 return NewShuffle;
10321 Op = NewShuffle;
10322 SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10323 V1 = Op.getOperand(i: 0);
10324 V2 = Op.getOperand(i: 1);
10325 }
10326 EVT VT = Op.getValueType();
10327 bool isLittleEndian = Subtarget.isLittleEndian();
10328
10329 unsigned ShiftElts, InsertAtByte;
10330 bool Swap = false;
10331
10332 // If this is a load-and-splat, we can do that with a single instruction
10333 // in some cases. However if the load has multiple uses, we don't want to
10334 // combine it because that will just produce multiple loads.
10335 bool IsPermutedLoad = false;
10336 const SDValue *InputLoad = getNormalLoadInput(Op: V1, IsPermuted&: IsPermutedLoad);
10337 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10338 (PPC::isSplatShuffleMask(N: SVOp, EltSize: 4) || PPC::isSplatShuffleMask(N: SVOp, EltSize: 8)) &&
10339 InputLoad->hasOneUse()) {
10340 bool IsFourByte = PPC::isSplatShuffleMask(N: SVOp, EltSize: 4);
10341 int SplatIdx =
10342 PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: IsFourByte ? 4 : 8, DAG);
10343
10344 // The splat index for permuted loads will be in the left half of the vector
10345 // which is strictly wider than the loaded value by 8 bytes. So we need to
10346 // adjust the splat index to point to the correct address in memory.
10347 if (IsPermutedLoad) {
10348 assert((isLittleEndian || IsFourByte) &&
10349 "Unexpected size for permuted load on big endian target");
10350 SplatIdx += IsFourByte ? 2 : 1;
10351 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10352 "Splat of a value outside of the loaded memory");
10353 }
10354
10355 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
10356 // For 4-byte load-and-splat, we need Power9.
10357 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10358 uint64_t Offset = 0;
10359 if (IsFourByte)
10360 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10361 else
10362 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10363
10364 // If the width of the load is the same as the width of the splat,
10365 // loading with an offset would load the wrong memory.
10366 if (LD->getValueType(ResNo: 0).getSizeInBits() == (IsFourByte ? 32 : 64))
10367 Offset = 0;
10368
10369 SDValue BasePtr = LD->getBasePtr();
10370 if (Offset != 0)
10371 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
10372 N1: BasePtr, N2: DAG.getIntPtrConstant(Val: Offset, DL: dl));
10373 SDValue Ops[] = {
10374 LD->getChain(), // Chain
10375 BasePtr, // BasePtr
10376 DAG.getValueType(Op.getValueType()) // VT
10377 };
10378 SDVTList VTL =
10379 DAG.getVTList(VT1: IsFourByte ? MVT::v4i32 : MVT::v2i64, VT2: MVT::Other);
10380 SDValue LdSplt =
10381 DAG.getMemIntrinsicNode(Opcode: PPCISD::LD_SPLAT, dl, VTList: VTL,
10382 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
10383 DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: 1), To: LdSplt.getValue(R: 1));
10384 if (LdSplt.getValueType() != SVOp->getValueType(ResNo: 0))
10385 LdSplt = DAG.getBitcast(VT: SVOp->getValueType(ResNo: 0), V: LdSplt);
10386 return LdSplt;
10387 }
10388 }
10389
10390 // All v2i64 and v2f64 shuffles are legal
10391 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10392 return Op;
10393
10394 if (Subtarget.hasP9Vector() &&
10395 PPC::isXXINSERTWMask(N: SVOp, ShiftElts, InsertAtByte, Swap,
10396 IsLE: isLittleEndian)) {
10397 if (V2.isUndef())
10398 V2 = V1;
10399 else if (Swap)
10400 std::swap(a&: V1, b&: V2);
10401 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10402 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2);
10403 if (ShiftElts) {
10404 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv2, N2: Conv2,
10405 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10406 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Shl,
10407 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10408 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10409 }
10410 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10411 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10412 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10413 }
10414
10415 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10416 SDValue SplatInsertNode;
10417 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVN: SVOp, DAG)))
10418 return SplatInsertNode;
10419 }
10420
10421 if (Subtarget.hasP9Altivec()) {
10422 SDValue NewISDNode;
10423 if ((NewISDNode = lowerToVINSERTH(N: SVOp, DAG)))
10424 return NewISDNode;
10425
10426 if ((NewISDNode = lowerToVINSERTB(N: SVOp, DAG)))
10427 return NewISDNode;
10428 }
10429
10430 if (Subtarget.hasVSX() &&
10431 PPC::isXXSLDWIShuffleMask(N: SVOp, ShiftElts, Swap, IsLE: isLittleEndian)) {
10432 if (Swap)
10433 std::swap(a&: V1, b&: V2);
10434 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10435 SDValue Conv2 =
10436 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2.isUndef() ? V1 : V2);
10437
10438 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10439 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10440 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Shl);
10441 }
10442
10443 if (Subtarget.hasVSX() &&
10444 PPC::isXXPERMDIShuffleMask(N: SVOp, DM&: ShiftElts, Swap, IsLE: isLittleEndian)) {
10445 if (Swap)
10446 std::swap(a&: V1, b&: V2);
10447 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10448 SDValue Conv2 =
10449 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V2.isUndef() ? V1 : V2);
10450
10451 SDValue PermDI = DAG.getNode(Opcode: PPCISD::XXPERMDI, DL: dl, VT: MVT::v2i64, N1: Conv1, N2: Conv2,
10452 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10453 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: PermDI);
10454 }
10455
10456 if (Subtarget.hasP9Vector()) {
10457 if (PPC::isXXBRHShuffleMask(N: SVOp)) {
10458 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10459 SDValue ReveHWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v8i16, Operand: Conv);
10460 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveHWord);
10461 } else if (PPC::isXXBRWShuffleMask(N: SVOp)) {
10462 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10463 SDValue ReveWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v4i32, Operand: Conv);
10464 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveWord);
10465 } else if (PPC::isXXBRDShuffleMask(N: SVOp)) {
10466 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10467 SDValue ReveDWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Conv);
10468 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveDWord);
10469 } else if (PPC::isXXBRQShuffleMask(N: SVOp)) {
10470 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: V1);
10471 SDValue ReveQWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v1i128, Operand: Conv);
10472 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveQWord);
10473 }
10474 }
10475
10476 if (Subtarget.hasVSX()) {
10477 if (V2.isUndef() && PPC::isSplatShuffleMask(N: SVOp, EltSize: 4)) {
10478 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: 4, DAG);
10479
10480 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10481 SDValue Splat = DAG.getNode(Opcode: PPCISD::XXSPLT, DL: dl, VT: MVT::v4i32, N1: Conv,
10482 N2: DAG.getConstant(Val: SplatIdx, DL: dl, VT: MVT::i32));
10483 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Splat);
10484 }
10485
10486 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10487 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: 1, DAG) == 8) {
10488 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: V1);
10489 SDValue Swap = DAG.getNode(Opcode: PPCISD::SWAP_NO_CHAIN, DL: dl, VT: MVT::v2f64, Operand: Conv);
10490 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Swap);
10491 }
10492 }
10493
10494 // Cases that are handled by instructions that take permute immediates
10495 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10496 // selected by the instruction selector.
10497 if (V2.isUndef()) {
10498 if (PPC::isSplatShuffleMask(N: SVOp, EltSize: 1) ||
10499 PPC::isSplatShuffleMask(N: SVOp, EltSize: 2) ||
10500 PPC::isSplatShuffleMask(N: SVOp, EltSize: 4) ||
10501 PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10502 PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10503 PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: 1, DAG) != -1 ||
10504 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind: 1, DAG) ||
10505 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind: 1, DAG) ||
10506 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind: 1, DAG) ||
10507 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind: 1, DAG) ||
10508 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind: 1, DAG) ||
10509 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind: 1, DAG) ||
10510 (Subtarget.hasP8Altivec() && (
10511 PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10512 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind: 1, DAG) ||
10513 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind: 1, DAG)))) {
10514 return Op;
10515 }
10516 }
10517
10518 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10519 // and produce a fixed permutation. If any of these match, do not lower to
10520 // VPERM.
10521 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10522 if (PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10523 PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10524 PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind, DAG) != -1 ||
10525 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind, DAG) ||
10526 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind, DAG) ||
10527 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind, DAG) ||
10528 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind, DAG) ||
10529 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind, DAG) ||
10530 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind, DAG) ||
10531 (Subtarget.hasP8Altivec() && (
10532 PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10533 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind, DAG) ||
10534 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind, DAG))))
10535 return Op;
10536
10537 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10538 // perfect shuffle table to emit an optimal matching sequence.
10539 ArrayRef<int> PermMask = SVOp->getMask();
10540
10541 if (!DisablePerfectShuffle && !isLittleEndian) {
10542 unsigned PFIndexes[4];
10543 bool isFourElementShuffle = true;
10544 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10545 ++i) { // Element number
10546 unsigned EltNo = 8; // Start out undef.
10547 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10548 if (PermMask[i * 4 + j] < 0)
10549 continue; // Undef, ignore it.
10550
10551 unsigned ByteSource = PermMask[i * 4 + j];
10552 if ((ByteSource & 3) != j) {
10553 isFourElementShuffle = false;
10554 break;
10555 }
10556
10557 if (EltNo == 8) {
10558 EltNo = ByteSource / 4;
10559 } else if (EltNo != ByteSource / 4) {
10560 isFourElementShuffle = false;
10561 break;
10562 }
10563 }
10564 PFIndexes[i] = EltNo;
10565 }
10566
10567 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10568 // perfect shuffle vector to determine if it is cost effective to do this as
10569 // discrete instructions, or whether we should use a vperm.
10570 // For now, we skip this for little endian until such time as we have a
10571 // little-endian perfect shuffle table.
10572 if (isFourElementShuffle) {
10573 // Compute the index in the perfect shuffle table.
10574 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10575 PFIndexes[2] * 9 + PFIndexes[3];
10576
10577 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10578 unsigned Cost = (PFEntry >> 30);
10579
10580 // Determining when to avoid vperm is tricky. Many things affect the cost
10581 // of vperm, particularly how many times the perm mask needs to be
10582 // computed. For example, if the perm mask can be hoisted out of a loop or
10583 // is already used (perhaps because there are multiple permutes with the
10584 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10585 // permute mask out of the loop requires an extra register.
10586 //
10587 // As a compromise, we only emit discrete instructions if the shuffle can
10588 // be generated in 3 or fewer operations. When we have loop information
10589 // available, if this block is within a loop, we should avoid using vperm
10590 // for 3-operation perms and use a constant pool load instead.
10591 if (Cost < 3)
10592 return GeneratePerfectShuffle(PFEntry, LHS: V1, RHS: V2, DAG, dl);
10593 }
10594 }
10595
10596 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10597 // vector that will get spilled to the constant pool.
10598 if (V2.isUndef()) V2 = V1;
10599
10600 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10601}
10602
10603SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10604 ArrayRef<int> PermMask, EVT VT,
10605 SDValue V1, SDValue V2) const {
10606 unsigned Opcode = PPCISD::VPERM;
10607 EVT ValType = V1.getValueType();
10608 SDLoc dl(Op);
10609 bool NeedSwap = false;
10610 bool isLittleEndian = Subtarget.isLittleEndian();
10611 bool isPPC64 = Subtarget.isPPC64();
10612
10613 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10614 (V1->hasOneUse() || V2->hasOneUse())) {
10615 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10616 "XXPERM instead\n");
10617 Opcode = PPCISD::XXPERM;
10618
10619 // The second input to XXPERM is also an output so if the second input has
10620 // multiple uses then copying is necessary, as a result we want the
10621 // single-use operand to be used as the second input to prevent copying.
10622 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10623 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10624 std::swap(a&: V1, b&: V2);
10625 NeedSwap = !NeedSwap;
10626 }
10627 }
10628
10629 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10630 // that it is in input element units, not in bytes. Convert now.
10631
10632 // For little endian, the order of the input vectors is reversed, and
10633 // the permutation mask is complemented with respect to 31. This is
10634 // necessary to produce proper semantics with the big-endian-based vperm
10635 // instruction.
10636 EVT EltVT = V1.getValueType().getVectorElementType();
10637 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10638
10639 bool V1HasXXSWAPD = V1->getOperand(Num: 0)->getOpcode() == PPCISD::XXSWAPD;
10640 bool V2HasXXSWAPD = V2->getOperand(Num: 0)->getOpcode() == PPCISD::XXSWAPD;
10641
10642 /*
10643 Vectors will be appended like so: [ V1 | v2 ]
10644 XXSWAPD on V1:
10645 [ A | B | C | D ] -> [ C | D | A | B ]
10646 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10647 i.e. index of A, B += 8, and index of C, D -= 8.
10648 XXSWAPD on V2:
10649 [ E | F | G | H ] -> [ G | H | E | F ]
10650 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10651 i.e. index of E, F += 8, index of G, H -= 8
10652 Swap V1 and V2:
10653 [ V1 | V2 ] -> [ V2 | V1 ]
10654 0-15 16-31 0-15 16-31
10655 i.e. index of V1 += 16, index of V2 -= 16
10656 */
10657
10658 SmallVector<SDValue, 16> ResultMask;
10659 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10660 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10661
10662 if (V1HasXXSWAPD) {
10663 if (SrcElt < 8)
10664 SrcElt += 8;
10665 else if (SrcElt < 16)
10666 SrcElt -= 8;
10667 }
10668 if (V2HasXXSWAPD) {
10669 if (SrcElt > 23)
10670 SrcElt -= 8;
10671 else if (SrcElt > 15)
10672 SrcElt += 8;
10673 }
10674 if (NeedSwap) {
10675 if (SrcElt < 16)
10676 SrcElt += 16;
10677 else
10678 SrcElt -= 16;
10679 }
10680 for (unsigned j = 0; j != BytesPerElement; ++j)
10681 if (isLittleEndian)
10682 ResultMask.push_back(
10683 Elt: DAG.getConstant(Val: 31 - (SrcElt * BytesPerElement + j), DL: dl, VT: MVT::i32));
10684 else
10685 ResultMask.push_back(
10686 Elt: DAG.getConstant(Val: SrcElt * BytesPerElement + j, DL: dl, VT: MVT::i32));
10687 }
10688
10689 if (V1HasXXSWAPD) {
10690 dl = SDLoc(V1->getOperand(Num: 0));
10691 V1 = V1->getOperand(Num: 0)->getOperand(Num: 1);
10692 }
10693 if (V2HasXXSWAPD) {
10694 dl = SDLoc(V2->getOperand(Num: 0));
10695 V2 = V2->getOperand(Num: 0)->getOperand(Num: 1);
10696 }
10697
10698 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10699 if (ValType != MVT::v2f64)
10700 V1 = DAG.getBitcast(VT: MVT::v2f64, V: V1);
10701 if (V2.getValueType() != MVT::v2f64)
10702 V2 = DAG.getBitcast(VT: MVT::v2f64, V: V2);
10703 }
10704
10705 ShufflesHandledWithVPERM++;
10706 SDValue VPermMask = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops: ResultMask);
10707 LLVM_DEBUG({
10708 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10709 if (Opcode == PPCISD::XXPERM) {
10710 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10711 } else {
10712 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10713 }
10714 SVOp->dump();
10715 dbgs() << "With the following permute control vector:\n";
10716 VPermMask.dump();
10717 });
10718
10719 if (Opcode == PPCISD::XXPERM)
10720 VPermMask = DAG.getBitcast(VT: MVT::v4i32, V: VPermMask);
10721
10722 // Only need to place items backwards in LE,
10723 // the mask was properly calculated.
10724 if (isLittleEndian)
10725 std::swap(a&: V1, b&: V2);
10726
10727 SDValue VPERMNode =
10728 DAG.getNode(Opcode, DL: dl, VT: V1.getValueType(), N1: V1, N2: V2, N3: VPermMask);
10729
10730 VPERMNode = DAG.getBitcast(VT: ValType, V: VPERMNode);
10731 return VPERMNode;
10732}
10733
10734/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10735/// vector comparison. If it is, return true and fill in Opc/isDot with
10736/// information about the intrinsic.
10737static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10738 bool &isDot, const PPCSubtarget &Subtarget) {
10739 unsigned IntrinsicID = Intrin.getConstantOperandVal(i: 0);
10740 CompareOpc = -1;
10741 isDot = false;
10742 switch (IntrinsicID) {
10743 default:
10744 return false;
10745 // Comparison predicates.
10746 case Intrinsic::ppc_altivec_vcmpbfp_p:
10747 CompareOpc = 966;
10748 isDot = true;
10749 break;
10750 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10751 CompareOpc = 198;
10752 isDot = true;
10753 break;
10754 case Intrinsic::ppc_altivec_vcmpequb_p:
10755 CompareOpc = 6;
10756 isDot = true;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpequh_p:
10759 CompareOpc = 70;
10760 isDot = true;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpequw_p:
10763 CompareOpc = 134;
10764 isDot = true;
10765 break;
10766 case Intrinsic::ppc_altivec_vcmpequd_p:
10767 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10768 CompareOpc = 199;
10769 isDot = true;
10770 } else
10771 return false;
10772 break;
10773 case Intrinsic::ppc_altivec_vcmpneb_p:
10774 case Intrinsic::ppc_altivec_vcmpneh_p:
10775 case Intrinsic::ppc_altivec_vcmpnew_p:
10776 case Intrinsic::ppc_altivec_vcmpnezb_p:
10777 case Intrinsic::ppc_altivec_vcmpnezh_p:
10778 case Intrinsic::ppc_altivec_vcmpnezw_p:
10779 if (Subtarget.hasP9Altivec()) {
10780 switch (IntrinsicID) {
10781 default:
10782 llvm_unreachable("Unknown comparison intrinsic.");
10783 case Intrinsic::ppc_altivec_vcmpneb_p:
10784 CompareOpc = 7;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpneh_p:
10787 CompareOpc = 71;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpnew_p:
10790 CompareOpc = 135;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnezb_p:
10793 CompareOpc = 263;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezh_p:
10796 CompareOpc = 327;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpnezw_p:
10799 CompareOpc = 391;
10800 break;
10801 }
10802 isDot = true;
10803 } else
10804 return false;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgefp_p:
10807 CompareOpc = 454;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10811 CompareOpc = 710;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10815 CompareOpc = 774;
10816 isDot = true;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10819 CompareOpc = 838;
10820 isDot = true;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10823 CompareOpc = 902;
10824 isDot = true;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10827 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10828 CompareOpc = 967;
10829 isDot = true;
10830 } else
10831 return false;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpgtub_p:
10834 CompareOpc = 518;
10835 isDot = true;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10838 CompareOpc = 582;
10839 isDot = true;
10840 break;
10841 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10842 CompareOpc = 646;
10843 isDot = true;
10844 break;
10845 case Intrinsic::ppc_altivec_vcmpgtud_p:
10846 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10847 CompareOpc = 711;
10848 isDot = true;
10849 } else
10850 return false;
10851 break;
10852
10853 case Intrinsic::ppc_altivec_vcmpequq:
10854 case Intrinsic::ppc_altivec_vcmpgtsq:
10855 case Intrinsic::ppc_altivec_vcmpgtuq:
10856 if (!Subtarget.isISA3_1())
10857 return false;
10858 switch (IntrinsicID) {
10859 default:
10860 llvm_unreachable("Unknown comparison intrinsic.");
10861 case Intrinsic::ppc_altivec_vcmpequq:
10862 CompareOpc = 455;
10863 break;
10864 case Intrinsic::ppc_altivec_vcmpgtsq:
10865 CompareOpc = 903;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpgtuq:
10868 CompareOpc = 647;
10869 break;
10870 }
10871 break;
10872
10873 // VSX predicate comparisons use the same infrastructure
10874 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10875 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10876 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10877 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10879 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10880 if (Subtarget.hasVSX()) {
10881 switch (IntrinsicID) {
10882 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10883 CompareOpc = 99;
10884 break;
10885 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10886 CompareOpc = 115;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10889 CompareOpc = 107;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10892 CompareOpc = 67;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10895 CompareOpc = 83;
10896 break;
10897 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10898 CompareOpc = 75;
10899 break;
10900 }
10901 isDot = true;
10902 } else
10903 return false;
10904 break;
10905
10906 // Normal Comparisons.
10907 case Intrinsic::ppc_altivec_vcmpbfp:
10908 CompareOpc = 966;
10909 break;
10910 case Intrinsic::ppc_altivec_vcmpeqfp:
10911 CompareOpc = 198;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpequb:
10914 CompareOpc = 6;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequh:
10917 CompareOpc = 70;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequw:
10920 CompareOpc = 134;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpequd:
10923 if (Subtarget.hasP8Altivec())
10924 CompareOpc = 199;
10925 else
10926 return false;
10927 break;
10928 case Intrinsic::ppc_altivec_vcmpneb:
10929 case Intrinsic::ppc_altivec_vcmpneh:
10930 case Intrinsic::ppc_altivec_vcmpnew:
10931 case Intrinsic::ppc_altivec_vcmpnezb:
10932 case Intrinsic::ppc_altivec_vcmpnezh:
10933 case Intrinsic::ppc_altivec_vcmpnezw:
10934 if (Subtarget.hasP9Altivec())
10935 switch (IntrinsicID) {
10936 default:
10937 llvm_unreachable("Unknown comparison intrinsic.");
10938 case Intrinsic::ppc_altivec_vcmpneb:
10939 CompareOpc = 7;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpneh:
10942 CompareOpc = 71;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpnew:
10945 CompareOpc = 135;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnezb:
10948 CompareOpc = 263;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezh:
10951 CompareOpc = 327;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpnezw:
10954 CompareOpc = 391;
10955 break;
10956 }
10957 else
10958 return false;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgefp:
10961 CompareOpc = 454;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtfp:
10964 CompareOpc = 710;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtsb:
10967 CompareOpc = 774;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsh:
10970 CompareOpc = 838;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsw:
10973 CompareOpc = 902;
10974 break;
10975 case Intrinsic::ppc_altivec_vcmpgtsd:
10976 if (Subtarget.hasP8Altivec())
10977 CompareOpc = 967;
10978 else
10979 return false;
10980 break;
10981 case Intrinsic::ppc_altivec_vcmpgtub:
10982 CompareOpc = 518;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtuh:
10985 CompareOpc = 582;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtuw:
10988 CompareOpc = 646;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtud:
10991 if (Subtarget.hasP8Altivec())
10992 CompareOpc = 711;
10993 else
10994 return false;
10995 break;
10996 case Intrinsic::ppc_altivec_vcmpequq_p:
10997 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10998 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10999 if (!Subtarget.isISA3_1())
11000 return false;
11001 switch (IntrinsicID) {
11002 default:
11003 llvm_unreachable("Unknown comparison intrinsic.");
11004 case Intrinsic::ppc_altivec_vcmpequq_p:
11005 CompareOpc = 455;
11006 break;
11007 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11008 CompareOpc = 903;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11011 CompareOpc = 647;
11012 break;
11013 }
11014 isDot = true;
11015 break;
11016 }
11017 return true;
11018}
11019
11020/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11021/// lower, do it, otherwise return null.
11022SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11023 SelectionDAG &DAG) const {
11024 unsigned IntrinsicID = Op.getConstantOperandVal(i: 0);
11025
11026 SDLoc dl(Op);
11027 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11028 // but the builtin provides it as a scalar. To satisfy the instruction
11029 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11030 auto MapNodeWithSplatVector =
11031 [&](unsigned Opcode,
11032 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11033 SDValue SplatVal =
11034 DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: MVT::v4i32, Operand: Op.getOperand(i: 2));
11035
11036 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(i: 1)};
11037 Ops.append(in_start: ExtraOps.begin(), in_end: ExtraOps.end());
11038 return DAG.getNode(Opcode, DL: dl, VT: MVT::v16i8, Ops);
11039 };
11040
11041 switch (IntrinsicID) {
11042 case Intrinsic::thread_pointer:
11043 // Reads the thread pointer register, used for __builtin_thread_pointer.
11044 if (Subtarget.isPPC64())
11045 return DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
11046 return DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
11047
11048 case Intrinsic::ppc_rldimi: {
11049 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11050 SDValue Src = Op.getOperand(i: 1);
11051 APInt Mask = Op.getConstantOperandAPInt(i: 4);
11052 if (Mask.isZero())
11053 return Op.getOperand(i: 2);
11054 if (Mask.isAllOnes())
11055 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src, N2: Op.getOperand(i: 3));
11056 uint64_t SH = Op.getConstantOperandVal(i: 3);
11057 unsigned MB = 0, ME = 0;
11058 if (!isRunOfOnes64(Val: Mask.getZExtValue(), MB, ME))
11059 report_fatal_error(reason: "invalid rldimi mask!");
11060 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11061 if (ME < 63 - SH) {
11062 Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11063 N2: DAG.getConstant(Val: ME + SH + 1, DL: dl, VT: MVT::i32));
11064 } else if (ME > 63 - SH) {
11065 Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11066 N2: DAG.getConstant(Val: ME + SH - 63, DL: dl, VT: MVT::i32));
11067 }
11068 return SDValue(
11069 DAG.getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64,
11070 Ops: {Op.getOperand(i: 2), Src,
11071 DAG.getTargetConstant(Val: 63 - ME, DL: dl, VT: MVT::i32),
11072 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32)}),
11073 0);
11074 }
11075
11076 case Intrinsic::ppc_rlwimi: {
11077 APInt Mask = Op.getConstantOperandAPInt(i: 4);
11078 if (Mask.isZero())
11079 return Op.getOperand(i: 2);
11080 if (Mask.isAllOnes())
11081 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i32, N1: Op.getOperand(i: 1),
11082 N2: Op.getOperand(i: 3));
11083 unsigned MB = 0, ME = 0;
11084 if (!isRunOfOnes(Val: Mask.getZExtValue(), MB, ME))
11085 report_fatal_error(reason: "invalid rlwimi mask!");
11086 return SDValue(DAG.getMachineNode(
11087 Opcode: PPC::RLWIMI, dl, VT: MVT::i32,
11088 Ops: {Op.getOperand(i: 2), Op.getOperand(i: 1), Op.getOperand(i: 3),
11089 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11090 DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11091 0);
11092 }
11093
11094 case Intrinsic::ppc_bcdshift:
11095 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(i: 3)});
11096 case Intrinsic::ppc_bcdshiftround:
11097 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(i: 3)});
11098 case Intrinsic::ppc_bcdtruncate:
11099 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(i: 3)});
11100 case Intrinsic::ppc_bcdunsignedtruncate:
11101 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11102 case Intrinsic::ppc_bcdunsignedshift:
11103 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11104
11105 case Intrinsic::ppc_rlwnm: {
11106 if (Op.getConstantOperandVal(i: 3) == 0)
11107 return DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32);
11108 unsigned MB = 0, ME = 0;
11109 if (!isRunOfOnes(Val: Op.getConstantOperandVal(i: 3), MB, ME))
11110 report_fatal_error(reason: "invalid rlwnm mask!");
11111 return SDValue(
11112 DAG.getMachineNode(Opcode: PPC::RLWNM, dl, VT: MVT::i32,
11113 Ops: {Op.getOperand(i: 1), Op.getOperand(i: 2),
11114 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11115 DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11116 0);
11117 }
11118
11119 case Intrinsic::ppc_mma_disassemble_acc: {
11120 if (Subtarget.isISAFuture()) {
11121 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11122 SDValue WideVec =
11123 SDValue(DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes,
11124 Ops: Op.getOperand(i: 1)),
11125 0);
11126 SmallVector<SDValue, 4> RetOps;
11127 SDValue Value = SDValue(WideVec.getNode(), 0);
11128 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11129
11130 SDValue Extract;
11131 Extract = DAG.getNode(
11132 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11133 N1: Subtarget.isLittleEndian() ? Value2 : Value,
11134 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 1 : 0,
11135 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11136 RetOps.push_back(Elt: Extract);
11137 Extract = DAG.getNode(
11138 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11139 N1: Subtarget.isLittleEndian() ? Value2 : Value,
11140 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 0 : 1,
11141 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11142 RetOps.push_back(Elt: Extract);
11143 Extract = DAG.getNode(
11144 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11145 N1: Subtarget.isLittleEndian() ? Value : Value2,
11146 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 1 : 0,
11147 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11148 RetOps.push_back(Elt: Extract);
11149 Extract = DAG.getNode(
11150 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11151 N1: Subtarget.isLittleEndian() ? Value : Value2,
11152 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 0 : 1,
11153 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11154 RetOps.push_back(Elt: Extract);
11155 return DAG.getMergeValues(Ops: RetOps, dl);
11156 }
11157 [[fallthrough]];
11158 }
11159 case Intrinsic::ppc_vsx_disassemble_pair: {
11160 int NumVecs = 2;
11161 SDValue WideVec = Op.getOperand(i: 1);
11162 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11163 NumVecs = 4;
11164 WideVec = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: WideVec);
11165 }
11166 SmallVector<SDValue, 4> RetOps;
11167 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11168 SDValue Extract = DAG.getNode(
11169 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: WideVec,
11170 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11171 : VecNo,
11172 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11173 RetOps.push_back(Elt: Extract);
11174 }
11175 return DAG.getMergeValues(Ops: RetOps, dl);
11176 }
11177
11178 case Intrinsic::ppc_build_dmr: {
11179 SmallVector<SDValue, 8> Pairs;
11180 SmallVector<SDValue, 8> Chains;
11181 for (int i = 1; i < 9; i += 2) {
11182 SDValue Hi = Op.getOperand(i);
11183 SDValue Lo = Op.getOperand(i: i + 1);
11184 if (Hi->getOpcode() == ISD::LOAD)
11185 Chains.push_back(Elt: Hi.getValue(R: 1));
11186 if (Lo->getOpcode() == ISD::LOAD)
11187 Chains.push_back(Elt: Lo.getValue(R: 1));
11188 Pairs.push_back(
11189 Elt: DAG.getNode(Opcode: PPCISD::PAIR_BUILD, DL: dl, VT: MVT::v256i1, Ops: {Hi, Lo}));
11190 }
11191 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Chains);
11192 SDValue Value = DMFInsert1024(Pairs, dl: SDLoc(Op), DAG);
11193 return DAG.getMergeValues(Ops: {Value, TF}, dl);
11194 }
11195
11196 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11197 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11198 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11199 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11200 "Specify P of 0 or 1 for lower or upper 512 bytes");
11201 unsigned HiLo = Idx->getSExtValue();
11202 unsigned Opcode;
11203 unsigned Subx;
11204 if (HiLo == 0) {
11205 Opcode = PPC::DMXXEXTFDMR512;
11206 Subx = PPC::sub_wacc_lo;
11207 } else {
11208 Opcode = PPC::DMXXEXTFDMR512_HI;
11209 Subx = PPC::sub_wacc_hi;
11210 }
11211 SDValue Subreg(
11212 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
11213 Op1: Op.getOperand(i: 1),
11214 Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11215 0);
11216 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11217 return SDValue(DAG.getMachineNode(Opcode, dl, ResultTys: ReturnTypes, Ops: Subreg), 0);
11218 }
11219
11220 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11221 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11222 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11223 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11224 "Specify a dmr row pair 0-3");
11225 unsigned IdxVal = Idx->getSExtValue();
11226 unsigned Subx;
11227 switch (IdxVal) {
11228 case 0:
11229 Subx = PPC::sub_dmrrowp0;
11230 break;
11231 case 1:
11232 Subx = PPC::sub_dmrrowp1;
11233 break;
11234 case 2:
11235 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11236 break;
11237 case 3:
11238 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11239 break;
11240 }
11241 SDValue Subreg(
11242 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v256i1,
11243 Op1: Op.getOperand(i: 1),
11244 Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11245 0);
11246 SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11247 return SDValue(
11248 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR256, dl, VT: MVT::v256i1, Ops: {Subreg, P}),
11249 0);
11250 }
11251
11252 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11253 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11254 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 4));
11255 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11256 "Specify P of 0 or 1 for lower or upper 512 bytes");
11257 unsigned HiLo = Idx->getSExtValue();
11258 unsigned Opcode;
11259 unsigned Subx;
11260 if (HiLo == 0) {
11261 Opcode = PPCISD::INST512;
11262 Subx = PPC::sub_wacc_lo;
11263 } else {
11264 Opcode = PPCISD::INST512HI;
11265 Subx = PPC::sub_wacc_hi;
11266 }
11267 SDValue Wacc = DAG.getNode(Opcode, DL: dl, VT: MVT::v512i1, N1: Op.getOperand(i: 2),
11268 N2: Op.getOperand(i: 3));
11269 SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11270 return SDValue(DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11271 Op1: Op.getOperand(i: 1), Op2: Wacc, Op3: SubReg),
11272 0);
11273 }
11274
11275 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11276 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11277 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 3));
11278 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11279 "Specify a dmr row pair 0-3");
11280 unsigned IdxVal = Idx->getSExtValue();
11281 unsigned Subx;
11282 switch (IdxVal) {
11283 case 0:
11284 Subx = PPC::sub_dmrrowp0;
11285 break;
11286 case 1:
11287 Subx = PPC::sub_dmrrowp1;
11288 break;
11289 case 2:
11290 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11291 break;
11292 case 3:
11293 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11294 break;
11295 }
11296 SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11297 SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11298 SDValue DMRRowp =
11299 DAG.getNode(Opcode: PPCISD::INST256, DL: dl, VT: MVT::v256i1, N1: Op.getOperand(i: 2), N2: P);
11300 return SDValue(DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11301 Op1: Op.getOperand(i: 1), Op2: DMRRowp, Op3: SubReg),
11302 0);
11303 }
11304
11305 case Intrinsic::ppc_mma_xxmfacc:
11306 case Intrinsic::ppc_mma_xxmtacc: {
11307 // Allow pre-isa-future subtargets to lower as normal.
11308 if (!Subtarget.isISAFuture())
11309 return SDValue();
11310 // The intrinsics for xxmtacc and xxmfacc take one argument of
11311 // type v512i1, for future cpu the corresponding wacc instruction
11312 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11313 // the need to produce the xxm[t|f]acc.
11314 SDValue WideVec = Op.getOperand(i: 1);
11315 DAG.ReplaceAllUsesWith(From: Op, To: WideVec);
11316 return SDValue();
11317 }
11318
11319 case Intrinsic::ppc_unpack_longdouble: {
11320 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11321 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11322 "Argument of long double unpack must be 0 or 1!");
11323 return DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: dl, VT: MVT::f64, N1: Op.getOperand(i: 1),
11324 N2: DAG.getConstant(Val: !!(Idx->getSExtValue()), DL: dl,
11325 VT: Idx->getValueType(ResNo: 0)));
11326 }
11327
11328 case Intrinsic::ppc_compare_exp_lt:
11329 case Intrinsic::ppc_compare_exp_gt:
11330 case Intrinsic::ppc_compare_exp_eq:
11331 case Intrinsic::ppc_compare_exp_uo: {
11332 unsigned Pred;
11333 switch (IntrinsicID) {
11334 case Intrinsic::ppc_compare_exp_lt:
11335 Pred = PPC::PRED_LT;
11336 break;
11337 case Intrinsic::ppc_compare_exp_gt:
11338 Pred = PPC::PRED_GT;
11339 break;
11340 case Intrinsic::ppc_compare_exp_eq:
11341 Pred = PPC::PRED_EQ;
11342 break;
11343 case Intrinsic::ppc_compare_exp_uo:
11344 Pred = PPC::PRED_UN;
11345 break;
11346 }
11347 return SDValue(
11348 DAG.getMachineNode(
11349 Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11350 Ops: {SDValue(DAG.getMachineNode(Opcode: PPC::XSCMPEXPDP, dl, VT: MVT::i32,
11351 Op1: Op.getOperand(i: 1), Op2: Op.getOperand(i: 2)),
11352 0),
11353 DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32), DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
11354 DAG.getTargetConstant(Val: Pred, DL: dl, VT: MVT::i32)}),
11355 0);
11356 }
11357 case Intrinsic::ppc_test_data_class: {
11358 EVT OpVT = Op.getOperand(i: 1).getValueType();
11359 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11360 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11361 : PPC::XSTSTDCSP);
11362 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11363 // The XSTSTDC* instructions test if a floating-point value matches any of
11364 // the data classes specified in the mask, setting CR field bits
11365 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11366 // convert it to an integer result (1 if match, 0 if no match).
11367 //
11368 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11369 // intrinsic provides (value, mask) as Op.getOperand(1) and
11370 // Op.getOperand(2).
11371 SDValue TestDataClass =
11372 SDValue(DAG.getMachineNode(Opcode: CmprOpc, dl, VT: MVT::i32,
11373 Ops: {Op.getOperand(i: 2), Op.getOperand(i: 1)}),
11374 0);
11375 if (Subtarget.isISA3_1()) {
11376 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11377 // This is more efficient than the SELECT_CC approach used in earlier
11378 // ISAs.
11379 SDValue SubRegIdx = DAG.getTargetConstant(Val: PPC::sub_eq, DL: dl, VT: MVT::i32);
11380 SDValue CRBit =
11381 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11382 Op1: TestDataClass, Op2: SubRegIdx),
11383 0);
11384
11385 return DAG.getNode(Opcode: PPCISD::SETBC, DL: dl, VT: MVT::i32, Operand: CRBit);
11386 }
11387
11388 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11389 return SDValue(
11390 DAG.getMachineNode(Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11391 Ops: {TestDataClass, DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32),
11392 DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
11393 DAG.getTargetConstant(Val: PPC::PRED_EQ, DL: dl, VT: MVT::i32)}),
11394 0);
11395 }
11396 case Intrinsic::ppc_fnmsub: {
11397 EVT VT = Op.getOperand(i: 1).getValueType();
11398 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11399 return DAG.getNode(
11400 Opcode: ISD::FNEG, DL: dl, VT,
11401 Operand: DAG.getNode(Opcode: ISD::FMA, DL: dl, VT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
11402 N3: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op.getOperand(i: 3))));
11403 return DAG.getNode(Opcode: PPCISD::FNMSUB, DL: dl, VT, N1: Op.getOperand(i: 1),
11404 N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
11405 }
11406 case Intrinsic::ppc_convert_f128_to_ppcf128:
11407 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11408 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11409 ? RTLIB::CONVERT_PPCF128_F128
11410 : RTLIB::CONVERT_F128_PPCF128;
11411 MakeLibCallOptions CallOptions;
11412 std::pair<SDValue, SDValue> Result =
11413 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op.getOperand(i: 1), CallOptions,
11414 dl, Chain: SDValue());
11415 return Result.first;
11416 }
11417 case Intrinsic::ppc_maxfe:
11418 case Intrinsic::ppc_maxfl:
11419 case Intrinsic::ppc_maxfs:
11420 case Intrinsic::ppc_minfe:
11421 case Intrinsic::ppc_minfl:
11422 case Intrinsic::ppc_minfs: {
11423 EVT VT = Op.getValueType();
11424 assert(
11425 all_of(Op->ops().drop_front(4),
11426 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11427 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11428 (void)VT;
11429 ISD::CondCode CC = ISD::SETGT;
11430 if (IntrinsicID == Intrinsic::ppc_minfe ||
11431 IntrinsicID == Intrinsic::ppc_minfl ||
11432 IntrinsicID == Intrinsic::ppc_minfs)
11433 CC = ISD::SETLT;
11434 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11435 SDValue Res = Op.getOperand(i: I);
11436 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11437 Res =
11438 DAG.getSelectCC(DL: dl, LHS: Res, RHS: Op.getOperand(i: I), True: Res, False: Op.getOperand(i: I), Cond: CC);
11439 }
11440 return Res;
11441 }
11442 }
11443
11444 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11445 // opcode number of the comparison.
11446 int CompareOpc;
11447 bool isDot;
11448 if (!getVectorCompareInfo(Intrin: Op, CompareOpc, isDot, Subtarget))
11449 return SDValue(); // Don't custom lower most intrinsics.
11450
11451 // If this is a non-dot comparison, make the VCMP node and we are done.
11452 if (!isDot) {
11453 SDValue Tmp = DAG.getNode(Opcode: PPCISD::VCMP, DL: dl, VT: Op.getOperand(i: 2).getValueType(),
11454 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
11455 N3: DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32));
11456 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Tmp);
11457 }
11458
11459 // Create the PPCISD altivec 'dot' comparison node.
11460 SDValue Ops[] = {
11461 Op.getOperand(i: 2), // LHS
11462 Op.getOperand(i: 3), // RHS
11463 DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
11464 };
11465 EVT VTs[] = { Op.getOperand(i: 2).getValueType(), MVT::Glue };
11466 SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
11467
11468 // Unpack the result based on how the target uses it.
11469 unsigned BitNo; // Bit # of CR6.
11470 bool InvertBit; // Invert result?
11471 unsigned Bitx;
11472 unsigned SetOp;
11473 switch (Op.getConstantOperandVal(i: 1)) {
11474 default: // Can't happen, don't crash on invalid number though.
11475 case 0: // Return the value of the EQ bit of CR6.
11476 BitNo = 0;
11477 InvertBit = false;
11478 Bitx = PPC::sub_eq;
11479 SetOp = PPCISD::SETBC;
11480 break;
11481 case 1: // Return the inverted value of the EQ bit of CR6.
11482 BitNo = 0;
11483 InvertBit = true;
11484 Bitx = PPC::sub_eq;
11485 SetOp = PPCISD::SETBCR;
11486 break;
11487 case 2: // Return the value of the LT bit of CR6.
11488 BitNo = 2;
11489 InvertBit = false;
11490 Bitx = PPC::sub_lt;
11491 SetOp = PPCISD::SETBC;
11492 break;
11493 case 3: // Return the inverted value of the LT bit of CR6.
11494 BitNo = 2;
11495 InvertBit = true;
11496 Bitx = PPC::sub_lt;
11497 SetOp = PPCISD::SETBCR;
11498 break;
11499 }
11500
11501 SDValue GlueOp = CompNode.getValue(R: 1);
11502 if (Subtarget.isISA3_1()) {
11503 SDValue SubRegIdx = DAG.getTargetConstant(Val: Bitx, DL: dl, VT: MVT::i32);
11504 SDValue CR6Reg = DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32);
11505 SDValue CRBit =
11506 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11507 Op1: CR6Reg, Op2: SubRegIdx, Op3: GlueOp),
11508 0);
11509 return DAG.getNode(Opcode: SetOp, DL: dl, VT: MVT::i32, Operand: CRBit);
11510 }
11511
11512 // Now that we have the comparison, emit a copy from the CR to a GPR.
11513 // This is flagged to the above dot comparison.
11514 SDValue Flags = DAG.getNode(Opcode: PPCISD::MFOCRF, DL: dl, VT: MVT::i32,
11515 N1: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32), N2: GlueOp);
11516
11517 // Shift the bit into the low position.
11518 Flags = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: Flags,
11519 N2: DAG.getConstant(Val: 8 - (3 - BitNo), DL: dl, VT: MVT::i32));
11520 // Isolate the bit.
11521 Flags = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Flags,
11522 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11523
11524 // If we are supposed to, toggle the bit.
11525 if (InvertBit)
11526 Flags = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: Flags,
11527 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11528 return Flags;
11529}
11530
11531SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11532 SelectionDAG &DAG) const {
11533 unsigned IntrinsicID = Op.getConstantOperandVal(i: 1);
11534 SDLoc dl(Op);
11535 switch (IntrinsicID) {
11536 case Intrinsic::ppc_amo_lwat_csne:
11537 case Intrinsic::ppc_amo_ldat_csne:
11538 SDValue Chain = Op.getOperand(i: 0);
11539 SDValue Ptr = Op.getOperand(i: 2);
11540 SDValue CmpVal = Op.getOperand(i: 3);
11541 SDValue NewVal = Op.getOperand(i: 4);
11542
11543 EVT VT = IntrinsicID == Intrinsic::ppc_amo_ldat_csne ? MVT::i64 : MVT::i32;
11544 Type *Ty = VT.getTypeForEVT(Context&: *DAG.getContext());
11545 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext());
11546
11547 TargetLowering::ArgListTy Args;
11548 Args.emplace_back(args: DAG.getUNDEF(VT: MVT::i64),
11549 args: Type::getInt64Ty(C&: *DAG.getContext()));
11550 Args.emplace_back(args&: CmpVal, args&: Ty);
11551 Args.emplace_back(args&: NewVal, args&: Ty);
11552 Args.emplace_back(args&: Ptr, args&: IntPtrTy);
11553
11554 // Lower to dummy call to use ABI for consecutive register allocation.
11555 // Places return value, compare value, and new value in X3/X4/X5 as required
11556 // by lwat/ldat FC=16, avoiding a new register class for 3 adjacent
11557 // registers.
11558 const char *SymName = IntrinsicID == Intrinsic::ppc_amo_ldat_csne
11559 ? "__ldat_csne_pseudo"
11560 : "__lwat_csne_pseudo";
11561 SDValue Callee =
11562 DAG.getExternalSymbol(Sym: SymName, VT: getPointerTy(DL: DAG.getDataLayout()));
11563
11564 TargetLowering::CallLoweringInfo CLI(DAG);
11565 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee,
11566 ArgsList: std::move(Args));
11567
11568 auto Result = LowerCallTo(CLI);
11569 return DAG.getMergeValues(Ops: {Result.first, Result.second}, dl);
11570 }
11571 return SDValue();
11572}
11573
11574SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11577 // the beginning of the argument list.
11578 int ArgStart = isa<ConstantSDNode>(Val: Op.getOperand(i: 0)) ? 0 : 1;
11579 SDLoc DL(Op);
11580 switch (Op.getConstantOperandVal(i: ArgStart)) {
11581 case Intrinsic::ppc_cfence: {
11582 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11583 SDValue Val = Op.getOperand(i: ArgStart + 1);
11584 EVT Ty = Val.getValueType();
11585 if (Ty == MVT::i128) {
11586 // FIXME: Testing one of two paired registers is sufficient to guarantee
11587 // ordering?
11588 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: Val);
11589 }
11590 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11591 return SDValue(
11592 DAG.getMachineNode(
11593 Opcode, dl: DL, VT: MVT::Other,
11594 Op1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getScalarIntVT(), Operand: Val),
11595 Op2: Op.getOperand(i: 0)),
11596 0);
11597 }
11598 case Intrinsic::ppc_disassemble_dmr: {
11599 assert(ArgStart == 1 &&
11600 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11601 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: Op.getOperand(i: ArgStart + 2),
11602 Ptr: Op.getOperand(i: ArgStart + 1), PtrInfo: MachinePointerInfo());
11603 }
11604 case Intrinsic::ppc_amo_stwat:
11605 case Intrinsic::ppc_amo_stdat: {
11606 SDLoc dl(Op);
11607 SDValue Chain = Op.getOperand(i: 0);
11608 SDValue Ptr = Op.getOperand(i: ArgStart + 1);
11609 SDValue Val = Op.getOperand(i: ArgStart + 2);
11610 SDValue FC = Op.getOperand(i: ArgStart + 3);
11611
11612 return DAG.getNode(Opcode: PPCISD::STAT, DL: dl, VT: MVT::Other, N1: Chain, N2: Val, N3: Ptr, N4: FC);
11613 }
11614 default:
11615 break;
11616 }
11617 return SDValue();
11618}
11619
11620// Lower scalar BSWAP64 to xxbrd.
11621SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11622 SDLoc dl(Op);
11623 if (!Subtarget.isPPC64())
11624 return Op;
11625 // MTVSRDD
11626 Op = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: MVT::v2i64, N1: Op.getOperand(i: 0),
11627 N2: Op.getOperand(i: 0));
11628 // XXBRD
11629 Op = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Op);
11630 // MFVSRD
11631 int VectorIndex = 0;
11632 if (Subtarget.isLittleEndian())
11633 VectorIndex = 1;
11634 Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Op,
11635 N2: DAG.getTargetConstant(Val: VectorIndex, DL: dl, VT: MVT::i32));
11636 return Op;
11637}
11638
11639// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11640// compared to a value that is atomically loaded (atomic loads zero-extend).
11641SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11642 SelectionDAG &DAG) const {
11643 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11644 "Expecting an atomic compare-and-swap here.");
11645 SDLoc dl(Op);
11646 auto *AtomicNode = cast<AtomicSDNode>(Val: Op.getNode());
11647 EVT MemVT = AtomicNode->getMemoryVT();
11648 if (MemVT.getSizeInBits() >= 32)
11649 return Op;
11650
11651 SDValue CmpOp = Op.getOperand(i: 2);
11652 // If this is already correctly zero-extended, leave it alone.
11653 auto HighBits = APInt::getHighBitsSet(numBits: 32, hiBitsSet: 32 - MemVT.getSizeInBits());
11654 if (DAG.MaskedValueIsZero(Op: CmpOp, Mask: HighBits))
11655 return Op;
11656
11657 // Clear the high bits of the compare operand.
11658 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11659 SDValue NewCmpOp =
11660 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: CmpOp,
11661 N2: DAG.getConstant(Val: MaskVal, DL: dl, VT: MVT::i32));
11662
11663 // Replace the existing compare operand with the properly zero-extended one.
11664 SmallVector<SDValue, 4> Ops;
11665 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11666 Ops.push_back(Elt: AtomicNode->getOperand(Num: i));
11667 Ops[2] = NewCmpOp;
11668 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11669 SDVTList Tys = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
11670 auto NodeTy =
11671 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11672 return DAG.getMemIntrinsicNode(Opcode: NodeTy, dl, VTList: Tys, Ops, MemVT, MMO);
11673}
11674
11675SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11676 SelectionDAG &DAG) const {
11677 AtomicSDNode *N = cast<AtomicSDNode>(Val: Op.getNode());
11678 EVT MemVT = N->getMemoryVT();
11679 assert(MemVT.getSimpleVT() == MVT::i128 &&
11680 "Expect quadword atomic operations");
11681 SDLoc dl(N);
11682 unsigned Opc = N->getOpcode();
11683 switch (Opc) {
11684 case ISD::ATOMIC_LOAD: {
11685 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11686 // lowered to ppc instructions by pattern matching instruction selector.
11687 SDVTList Tys = DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::Other);
11688 SmallVector<SDValue, 4> Ops{
11689 N->getOperand(Num: 0),
11690 DAG.getConstant(Val: Intrinsic::ppc_atomic_load_i128, DL: dl, VT: MVT::i32)};
11691 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11692 Ops.push_back(Elt: N->getOperand(Num: I));
11693 SDValue LoadedVal = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys,
11694 Ops, MemVT, MMO: N->getMemOperand());
11695 SDValue ValLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal);
11696 SDValue ValHi =
11697 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal.getValue(R: 1));
11698 ValHi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ValHi,
11699 N2: DAG.getConstant(Val: 64, DL: dl, VT: MVT::i32));
11700 SDValue Val =
11701 DAG.getNode(Opcode: ISD::OR, DL: dl, ResultTys: {MVT::i128, MVT::Other}, Ops: {ValLo, ValHi});
11702 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, ResultTys: {MVT::i128, MVT::Other},
11703 Ops: {Val, LoadedVal.getValue(R: 2)});
11704 }
11705 case ISD::ATOMIC_STORE: {
11706 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11707 // lowered to ppc instructions by pattern matching instruction selector.
11708 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11709 SmallVector<SDValue, 4> Ops{
11710 N->getOperand(Num: 0),
11711 DAG.getConstant(Val: Intrinsic::ppc_atomic_store_i128, DL: dl, VT: MVT::i32)};
11712 SDValue Val = N->getOperand(Num: 1);
11713 SDValue ValLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: Val);
11714 SDValue ValHi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: Val,
11715 N2: DAG.getConstant(Val: 64, DL: dl, VT: MVT::i32));
11716 ValHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: ValHi);
11717 Ops.push_back(Elt: ValLo);
11718 Ops.push_back(Elt: ValHi);
11719 Ops.push_back(Elt: N->getOperand(Num: 2));
11720 return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops, MemVT,
11721 MMO: N->getMemOperand());
11722 }
11723 default:
11724 llvm_unreachable("Unexpected atomic opcode");
11725 }
11726}
11727
11728static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11729 SelectionDAG &DAG,
11730 const PPCSubtarget &Subtarget) {
11731 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11732
11733 enum DataClassMask {
11734 DC_NAN = 1 << 6,
11735 DC_NEG_INF = 1 << 4,
11736 DC_POS_INF = 1 << 5,
11737 DC_NEG_ZERO = 1 << 2,
11738 DC_POS_ZERO = 1 << 3,
11739 DC_NEG_SUBNORM = 1,
11740 DC_POS_SUBNORM = 1 << 1,
11741 };
11742
11743 EVT VT = Op.getValueType();
11744
11745 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11746 : VT == MVT::f64 ? PPC::XSTSTDCDP
11747 : PPC::XSTSTDCSP;
11748
11749 if (Mask == fcAllFlags)
11750 return DAG.getBoolConstant(V: true, DL: Dl, VT: MVT::i1, OpVT: VT);
11751 if (Mask == 0)
11752 return DAG.getBoolConstant(V: false, DL: Dl, VT: MVT::i1, OpVT: VT);
11753
11754 // When it's cheaper or necessary to test reverse flags.
11755 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11756 SDValue Rev = getDataClassTest(Op, Mask: ~Mask, Dl, DAG, Subtarget);
11757 return DAG.getNOT(DL: Dl, Val: Rev, VT: MVT::i1);
11758 }
11759
11760 // Power doesn't support testing whether a value is 'normal'. Test the rest
11761 // first, and test if it's 'not not-normal' with expected sign.
11762 if (Mask & fcNormal) {
11763 SDValue Rev(DAG.getMachineNode(
11764 Opcode: TestOp, dl: Dl, VT: MVT::i32,
11765 Op1: DAG.getTargetConstant(Val: DC_NAN | DC_NEG_INF | DC_POS_INF |
11766 DC_NEG_ZERO | DC_POS_ZERO |
11767 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11768 DL: Dl, VT: MVT::i32),
11769 Op2: Op),
11770 0);
11771 // Sign are stored in CR bit 0, result are in CR bit 2.
11772 SDValue Sign(
11773 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11774 Op2: DAG.getTargetConstant(Val: PPC::sub_lt, DL: Dl, VT: MVT::i32)),
11775 0);
11776 SDValue Normal(DAG.getNOT(
11777 DL: Dl,
11778 Val: SDValue(DAG.getMachineNode(
11779 Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11780 Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11781 0),
11782 VT: MVT::i1));
11783 if (Mask & fcPosNormal)
11784 Sign = DAG.getNOT(DL: Dl, Val: Sign, VT: MVT::i1);
11785 SDValue Result = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: Sign, N2: Normal);
11786 if (Mask == fcPosNormal || Mask == fcNegNormal)
11787 return Result;
11788
11789 return DAG.getNode(
11790 Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11791 N1: getDataClassTest(Op, Mask: Mask & ~fcNormal, Dl, DAG, Subtarget), N2: Result);
11792 }
11793
11794 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11795 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11796 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11797 bool IsQuiet = Mask & fcQNan;
11798 SDValue NanCheck = getDataClassTest(Op, Mask: fcNan, Dl, DAG, Subtarget);
11799
11800 // Quietness is determined by the first bit in fraction field.
11801 uint64_t QuietMask = 0;
11802 SDValue HighWord;
11803 if (VT == MVT::f128) {
11804 HighWord = DAG.getNode(
11805 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: DAG.getBitcast(VT: MVT::v4i32, V: Op),
11806 N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? 3 : 0, DL: Dl));
11807 QuietMask = 0x8000;
11808 } else if (VT == MVT::f64) {
11809 if (Subtarget.isPPC64()) {
11810 HighWord = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::i32,
11811 N1: DAG.getBitcast(VT: MVT::i64, V: Op),
11812 N2: DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32));
11813 } else {
11814 SDValue Vec = DAG.getBitcast(
11815 VT: MVT::v4i32, V: DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: Dl, VT: MVT::v2f64, Operand: Op));
11816 HighWord = DAG.getNode(
11817 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: Vec,
11818 N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? 1 : 0, DL: Dl));
11819 }
11820 QuietMask = 0x80000;
11821 } else if (VT == MVT::f32) {
11822 HighWord = DAG.getBitcast(VT: MVT::i32, V: Op);
11823 QuietMask = 0x400000;
11824 }
11825 SDValue NanRes = DAG.getSetCC(
11826 DL: Dl, VT: MVT::i1,
11827 LHS: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: HighWord,
11828 N2: DAG.getConstant(Val: QuietMask, DL: Dl, VT: MVT::i32)),
11829 RHS: DAG.getConstant(Val: 0, DL: Dl, VT: MVT::i32), Cond: IsQuiet ? ISD::SETNE : ISD::SETEQ);
11830 NanRes = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: NanCheck, N2: NanRes);
11831 if (Mask == fcQNan || Mask == fcSNan)
11832 return NanRes;
11833
11834 return DAG.getNode(Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11835 N1: getDataClassTest(Op, Mask: Mask & ~fcNan, Dl, DAG, Subtarget),
11836 N2: NanRes);
11837 }
11838
11839 unsigned NativeMask = 0;
11840 if ((Mask & fcNan) == fcNan)
11841 NativeMask |= DC_NAN;
11842 if (Mask & fcNegInf)
11843 NativeMask |= DC_NEG_INF;
11844 if (Mask & fcPosInf)
11845 NativeMask |= DC_POS_INF;
11846 if (Mask & fcNegZero)
11847 NativeMask |= DC_NEG_ZERO;
11848 if (Mask & fcPosZero)
11849 NativeMask |= DC_POS_ZERO;
11850 if (Mask & fcNegSubnormal)
11851 NativeMask |= DC_NEG_SUBNORM;
11852 if (Mask & fcPosSubnormal)
11853 NativeMask |= DC_POS_SUBNORM;
11854 return SDValue(
11855 DAG.getMachineNode(
11856 Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1,
11857 Op1: SDValue(DAG.getMachineNode(
11858 Opcode: TestOp, dl: Dl, VT: MVT::i32,
11859 Op1: DAG.getTargetConstant(Val: NativeMask, DL: Dl, VT: MVT::i32), Op2: Op),
11860 0),
11861 Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11862 0);
11863}
11864
11865SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11866 SelectionDAG &DAG) const {
11867 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11868 SDValue LHS = Op.getOperand(i: 0);
11869 uint64_t RHSC = Op.getConstantOperandVal(i: 1);
11870 SDLoc Dl(Op);
11871 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11872 if (LHS.getValueType() == MVT::ppcf128) {
11873 // The higher part determines the value class.
11874 LHS = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::f64, N1: LHS,
11875 N2: DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32));
11876 }
11877
11878 return getDataClassTest(Op: LHS, Mask: Category, Dl, DAG, Subtarget);
11879}
11880
11881// Adjust the length value for a load/store with length to account for the
11882// instructions requiring a left justified length, and for non-byte element
11883// types requiring scaling by element size.
11884static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11885 SelectionDAG &DAG) {
11886 SDLoc dl(Val);
11887 EVT VT = Val->getValueType(ResNo: 0);
11888 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11889 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Val: Bits / 8);
11890 SDValue SHLAmt = DAG.getConstant(Val: LeftAdj + TypeAdj, DL: dl, VT);
11891 return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Val, N2: SHLAmt);
11892}
11893
11894SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11895 auto VPLD = cast<VPLoadSDNode>(Val&: Op);
11896 bool Future = Subtarget.isISAFuture();
11897 SDLoc dl(Op);
11898 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11899 "Mask predication not supported");
11900 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11901 SDValue Len = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: PtrVT, Operand: VPLD->getOperand(Num: 4));
11902 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11903 unsigned EltBits = Op->getValueType(ResNo: 0).getScalarType().getSizeInBits();
11904 Len = AdjustLength(Val: Len, Bits: EltBits, Left: !Future, DAG);
11905 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32),
11906 VPLD->getOperand(Num: 1), Len};
11907 SDVTList Tys = DAG.getVTList(VT1: Op->getValueType(ResNo: 0), VT2: MVT::Other);
11908 SDValue VPL =
11909 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys, Ops,
11910 MemVT: VPLD->getMemoryVT(), MMO: VPLD->getMemOperand());
11911 return VPL;
11912}
11913
11914SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11915 auto VPST = cast<VPStoreSDNode>(Val&: Op);
11916 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11917 "Mask predication not supported");
11918 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11919 SDLoc dl(Op);
11920 SDValue Len = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: PtrVT, Operand: VPST->getOperand(Num: 5));
11921 unsigned EltBits =
11922 Op->getOperand(Num: 1).getValueType().getScalarType().getSizeInBits();
11923 bool Future = Subtarget.isISAFuture();
11924 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11925 Len = AdjustLength(Val: Len, Bits: EltBits, Left: !Future, DAG);
11926 SDValue Ops[] = {
11927 VPST->getChain(), DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32),
11928 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: VPST->getOperand(Num: 1)),
11929 VPST->getOperand(Num: 2), Len};
11930 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11931 SDValue VPS =
11932 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
11933 MemVT: VPST->getMemoryVT(), MMO: VPST->getMemOperand());
11934 return VPS;
11935}
11936
11937SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11938 SelectionDAG &DAG) const {
11939 SDLoc dl(Op);
11940
11941 MachineFunction &MF = DAG.getMachineFunction();
11942 SDValue Op0 = Op.getOperand(i: 0);
11943 EVT ValVT = Op0.getValueType();
11944 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11945 if (isa<ConstantSDNode>(Val: Op0) && EltSize <= 32) {
11946 int64_t IntVal = Op.getConstantOperandVal(i: 0);
11947 if (IntVal >= -16 && IntVal <= 15)
11948 return getCanonicalConstSplat(Val: IntVal, SplatSize: EltSize / 8, VT: Op.getValueType(), DAG,
11949 dl);
11950 }
11951
11952 ReuseLoadInfo RLI;
11953 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11954 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11955 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11956 canReuseLoadAddress(Op: Op0, MemVT: MVT::i32, RLI, DAG, ET: ISD::NON_EXTLOAD)) {
11957
11958 MachineMemOperand *MMO =
11959 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
11960 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
11961 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11962 SDValue Bits = DAG.getMemIntrinsicNode(
11963 Opcode: PPCISD::LD_SPLAT, dl, VTList: DAG.getVTList(VT1: MVT::v4i32, VT2: MVT::Other), Ops,
11964 MemVT: MVT::i32, MMO);
11965 if (RLI.ResChain)
11966 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
11967 return Bits.getValue(R: 0);
11968 }
11969
11970 // Create a stack slot that is 16-byte aligned.
11971 MachineFrameInfo &MFI = MF.getFrameInfo();
11972 int FrameIdx = MFI.CreateStackObject(Size: 16, Alignment: Align(16), isSpillSlot: false);
11973 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11974 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
11975
11976 SDValue Val = Op0;
11977 // P10 hardware store forwarding requires that a single store contains all
11978 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11979 // to avoid load hit store on P10 when running binaries compiled for older
11980 // processors by generating two mergeable scalar stores to forward with the
11981 // vector load.
11982 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11983 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11984 ValVT.getSizeInBits() <= 64) {
11985 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: Val);
11986 EVT ShiftAmountTy = getShiftAmountTy(LHSTy: MVT::i64, DL: DAG.getDataLayout());
11987 SDValue ShiftBy = DAG.getConstant(
11988 Val: 64 - Op.getValueType().getScalarSizeInBits(), DL: dl, VT: ShiftAmountTy);
11989 Val = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: Val, N2: ShiftBy);
11990 SDValue Plus8 =
11991 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FIdx, N2: DAG.getConstant(Val: 8, DL: dl, VT: PtrVT));
11992 SDValue Store2 =
11993 DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: Plus8, PtrInfo: MachinePointerInfo());
11994 SDValue Store = DAG.getStore(Chain: Store2, dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo());
11995 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx,
11996 PtrInfo: MachinePointerInfo());
11997 }
11998
11999 // Store the input value into Value#0 of the stack slot.
12000 SDValue Store =
12001 DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo());
12002 // Load it out.
12003 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx, PtrInfo: MachinePointerInfo());
12004}
12005
12006SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12007 SelectionDAG &DAG) const {
12008 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12009 "Should only be called for ISD::INSERT_VECTOR_ELT");
12010
12011 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
12012
12013 EVT VT = Op.getValueType();
12014 SDLoc dl(Op);
12015 SDValue V1 = Op.getOperand(i: 0);
12016 SDValue V2 = Op.getOperand(i: 1);
12017
12018 if (VT == MVT::v2f64 && C)
12019 return Op;
12020
12021 if (Subtarget.hasP9Vector()) {
12022 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12023 // because on P10, it allows this specific insert_vector_elt load pattern to
12024 // utilize the refactored load and store infrastructure in order to exploit
12025 // prefixed loads.
12026 // On targets with inexpensive direct moves (Power9 and up), a
12027 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12028 // load since a single precision load will involve conversion to double
12029 // precision on the load followed by another conversion to single precision.
12030 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12031 (isa<LoadSDNode>(Val: V2))) {
12032 SDValue BitcastVector = DAG.getBitcast(VT: MVT::v4i32, V: V1);
12033 SDValue BitcastLoad = DAG.getBitcast(VT: MVT::i32, V: V2);
12034 SDValue InsVecElt =
12035 DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, VT: MVT::v4i32, N1: BitcastVector,
12036 N2: BitcastLoad, N3: Op.getOperand(i: 2));
12037 return DAG.getBitcast(VT: MVT::v4f32, V: InsVecElt);
12038 }
12039 }
12040
12041 if (Subtarget.isISA3_1()) {
12042 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12043 return SDValue();
12044 // On P10, we have legal lowering for constant and variable indices for
12045 // all vectors.
12046 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12047 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12048 return Op;
12049 }
12050
12051 // Before P10, we have legal lowering for constant indices but not for
12052 // variable ones.
12053 if (!C)
12054 return SDValue();
12055
12056 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12057 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12058 SDValue Mtvsrz = DAG.getNode(Opcode: PPCISD::MTVSRZ, DL: dl, VT, Operand: V2);
12059 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12060 unsigned InsertAtElement = C->getZExtValue();
12061 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12062 if (Subtarget.isLittleEndian()) {
12063 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12064 }
12065 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT, N1: V1, N2: Mtvsrz,
12066 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
12067 }
12068 return Op;
12069}
12070
12071SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12072 SelectionDAG &DAG) const {
12073 SDLoc dl(Op);
12074 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12075 SDValue LoadChain = LN->getChain();
12076 SDValue BasePtr = LN->getBasePtr();
12077 EVT VT = Op.getValueType();
12078 bool IsV1024i1 = VT == MVT::v1024i1;
12079 bool IsV2048i1 = VT == MVT::v2048i1;
12080
12081 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12082 // Dense Math dmr pair registers, respectively.
12083 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12084 (void)IsV2048i1;
12085 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12086 "Dense Math support required.");
12087 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12088
12089 SmallVector<SDValue, 8> Loads;
12090 SmallVector<SDValue, 8> LoadChains;
12091
12092 SDValue IntrinID = DAG.getConstant(Val: Intrinsic::ppc_vsx_lxvp, DL: dl, VT: MVT::i32);
12093 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12094 MachineMemOperand *MMO = LN->getMemOperand();
12095 unsigned NumVecs = VT.getSizeInBits() / 256;
12096 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12097 MachineMemOperand *NewMMO =
12098 DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * 32, Size: 32);
12099 if (Idx > 0) {
12100 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12101 N2: DAG.getConstant(Val: 32, DL: dl, VT: BasePtr.getValueType()));
12102 LoadOps[2] = BasePtr;
12103 }
12104 SDValue Ld = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
12105 VTList: DAG.getVTList(VT1: MVT::v256i1, VT2: MVT::Other),
12106 Ops: LoadOps, MemVT: MVT::v256i1, MMO: NewMMO);
12107 LoadChains.push_back(Elt: Ld.getValue(R: 1));
12108 Loads.push_back(Elt: Ld);
12109 }
12110
12111 if (Subtarget.isLittleEndian()) {
12112 std::reverse(first: Loads.begin(), last: Loads.end());
12113 std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12114 }
12115
12116 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12117 SDValue Value = DMFInsert1024(Pairs: Loads, dl, DAG);
12118
12119 if (IsV1024i1) {
12120 return DAG.getMergeValues(Ops: {Value, TF}, dl);
12121 }
12122
12123 // Handle Loads for V2048i1 which represents a dmr pair.
12124 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12125 SDValue Dmr1Value = DMFInsert1024(Pairs: MoreLoads, dl, DAG);
12126
12127 SDValue Dmr0Sub = DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32);
12128 SDValue Dmr1Sub = DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32);
12129
12130 SDValue DmrPRC = DAG.getTargetConstant(Val: PPC::DMRpRCRegClassID, DL: dl, VT: MVT::i32);
12131 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12132
12133 SDValue DmrPValue = SDValue(
12134 DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v2048i1, Ops: DmrPOps), 0);
12135
12136 return DAG.getMergeValues(Ops: {DmrPValue, TF}, dl);
12137}
12138
12139SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12140 const SDLoc &dl,
12141 SelectionDAG &DAG) const {
12142 SDValue Lo =
12143 DAG.getNode(Opcode: PPCISD::INST512, DL: dl, VT: MVT::v512i1, N1: Pairs[0], N2: Pairs[1]);
12144 SDValue LoSub = DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32);
12145 SDValue Hi =
12146 DAG.getNode(Opcode: PPCISD::INST512HI, DL: dl, VT: MVT::v512i1, N1: Pairs[2], N2: Pairs[3]);
12147 SDValue HiSub = DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32);
12148 SDValue RC = DAG.getTargetConstant(Val: PPC::DMRRCRegClassID, DL: dl, VT: MVT::i32);
12149
12150 return SDValue(DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1,
12151 Ops: {RC, Lo, LoSub, Hi, HiSub}),
12152 0);
12153}
12154
12155SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12156 SelectionDAG &DAG) const {
12157 SDLoc dl(Op);
12158 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12159 SDValue LoadChain = LN->getChain();
12160 SDValue BasePtr = LN->getBasePtr();
12161 EVT VT = Op.getValueType();
12162
12163 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12164 return LowerDMFVectorLoad(Op, DAG);
12165
12166 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12167 return Op;
12168
12169 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12170 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12171 "Type unsupported without MMA");
12172 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12173 "Type unsupported without paired vector support");
12174
12175 // For v256i1 on ISA Future, let the load go through to instruction selection
12176 // where it will be matched to lxvp/plxvp by the instruction patterns.
12177 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12178 return Op;
12179
12180 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12181 // value in 2 or 4 vsx registers.
12182 Align Alignment = LN->getAlign();
12183 SmallVector<SDValue, 4> Loads;
12184 SmallVector<SDValue, 4> LoadChains;
12185 unsigned NumVecs = VT.getSizeInBits() / 128;
12186 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12187 SDValue Load =
12188 DAG.getLoad(VT: MVT::v16i8, dl, Chain: LoadChain, Ptr: BasePtr,
12189 PtrInfo: LN->getPointerInfo().getWithOffset(O: Idx * 16),
12190 Alignment: commonAlignment(A: Alignment, Offset: Idx * 16),
12191 MMOFlags: LN->getMemOperand()->getFlags(), AAInfo: LN->getAAInfo());
12192 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12193 N2: DAG.getConstant(Val: 16, DL: dl, VT: BasePtr.getValueType()));
12194 Loads.push_back(Elt: Load);
12195 LoadChains.push_back(Elt: Load.getValue(R: 1));
12196 }
12197 if (Subtarget.isLittleEndian()) {
12198 std::reverse(first: Loads.begin(), last: Loads.end());
12199 std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12200 }
12201 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12202 SDValue Value =
12203 DAG.getNode(Opcode: VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12204 DL: dl, VT, Ops: Loads);
12205 SDValue RetOps[] = {Value, TF};
12206 return DAG.getMergeValues(Ops: RetOps, dl);
12207}
12208
12209SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12210 SelectionDAG &DAG) const {
12211
12212 SDLoc dl(Op);
12213 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12214 SDValue StoreChain = SN->getChain();
12215 SDValue BasePtr = SN->getBasePtr();
12216 SmallVector<SDValue, 8> Values;
12217 SmallVector<SDValue, 8> Stores;
12218 EVT VT = SN->getValue().getValueType();
12219 bool IsV1024i1 = VT == MVT::v1024i1;
12220 bool IsV2048i1 = VT == MVT::v2048i1;
12221
12222 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12223 // Dense Math dmr pair registers, respectively.
12224 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12225 (void)IsV2048i1;
12226 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12227 "Dense Math support required.");
12228 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12229
12230 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12231 if (IsV1024i1) {
12232 SDValue Lo(DAG.getMachineNode(
12233 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12234 Op1: Op.getOperand(i: 1),
12235 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12236 0);
12237 SDValue Hi(DAG.getMachineNode(
12238 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12239 Op1: Op.getOperand(i: 1),
12240 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12241 0);
12242 MachineSDNode *ExtNode =
12243 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Lo);
12244 Values.push_back(Elt: SDValue(ExtNode, 0));
12245 Values.push_back(Elt: SDValue(ExtNode, 1));
12246 ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Hi);
12247 Values.push_back(Elt: SDValue(ExtNode, 0));
12248 Values.push_back(Elt: SDValue(ExtNode, 1));
12249 } else {
12250 // This corresponds to v2048i1 which represents a dmr pair.
12251 SDValue Dmr0(
12252 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12253 Op1: Op.getOperand(i: 1),
12254 Op2: DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32)),
12255 0);
12256
12257 SDValue Dmr1(
12258 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12259 Op1: Op.getOperand(i: 1),
12260 Op2: DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32)),
12261 0);
12262
12263 SDValue Dmr0Lo(DAG.getMachineNode(
12264 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12265 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12266 0);
12267
12268 SDValue Dmr0Hi(DAG.getMachineNode(
12269 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12270 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12271 0);
12272
12273 SDValue Dmr1Lo(DAG.getMachineNode(
12274 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12275 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12276 0);
12277
12278 SDValue Dmr1Hi(DAG.getMachineNode(
12279 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12280 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12281 0);
12282
12283 MachineSDNode *ExtNode =
12284 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr0Lo);
12285 Values.push_back(Elt: SDValue(ExtNode, 0));
12286 Values.push_back(Elt: SDValue(ExtNode, 1));
12287 ExtNode =
12288 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr0Hi);
12289 Values.push_back(Elt: SDValue(ExtNode, 0));
12290 Values.push_back(Elt: SDValue(ExtNode, 1));
12291 ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr1Lo);
12292 Values.push_back(Elt: SDValue(ExtNode, 0));
12293 Values.push_back(Elt: SDValue(ExtNode, 1));
12294 ExtNode =
12295 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr1Hi);
12296 Values.push_back(Elt: SDValue(ExtNode, 0));
12297 Values.push_back(Elt: SDValue(ExtNode, 1));
12298 }
12299
12300 if (Subtarget.isLittleEndian())
12301 std::reverse(first: Values.begin(), last: Values.end());
12302
12303 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
12304 SmallVector<SDValue, 4> Ops{
12305 StoreChain, DAG.getConstant(Val: Intrinsic::ppc_vsx_stxvp, DL: dl, VT: MVT::i32),
12306 Values[0], BasePtr};
12307 MachineMemOperand *MMO = SN->getMemOperand();
12308 unsigned NumVecs = VT.getSizeInBits() / 256;
12309 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12310 MachineMemOperand *NewMMO =
12311 DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * 32, Size: 32);
12312 if (Idx > 0) {
12313 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12314 N2: DAG.getConstant(Val: 32, DL: dl, VT: BasePtr.getValueType()));
12315 Ops[3] = BasePtr;
12316 }
12317 Ops[2] = Values[Idx];
12318 SDValue St = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
12319 MemVT: MVT::v256i1, MMO: NewMMO);
12320 Stores.push_back(Elt: St);
12321 }
12322
12323 SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12324 return TF;
12325}
12326
12327SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12328 SelectionDAG &DAG) const {
12329 SDLoc dl(Op);
12330 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12331 SDValue StoreChain = SN->getChain();
12332 SDValue BasePtr = SN->getBasePtr();
12333 SDValue Value = SN->getValue();
12334 SDValue Value2 = SN->getValue();
12335 EVT StoreVT = Value.getValueType();
12336
12337 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12338 return LowerDMFVectorStore(Op, DAG);
12339
12340 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12341 return Op;
12342
12343 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12344 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12345 "Type unsupported without MMA");
12346 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12347 "Type unsupported without paired vector support");
12348
12349 // For v256i1 on ISA Future, let the store go through to instruction selection
12350 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12351 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12352 !DisableAutoPairedVecSt)
12353 return Op;
12354
12355 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12356 // accumulator underlying registers individually.
12357 Align Alignment = SN->getAlign();
12358 SmallVector<SDValue, 4> Stores;
12359 unsigned NumVecs = 2;
12360 if (StoreVT == MVT::v512i1) {
12361 if (Subtarget.isISAFuture()) {
12362 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12363 MachineSDNode *ExtNode = DAG.getMachineNode(
12364 Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Op.getOperand(i: 1));
12365
12366 Value = SDValue(ExtNode, 0);
12367 Value2 = SDValue(ExtNode, 1);
12368 } else
12369 Value = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: Value);
12370 NumVecs = 4;
12371 }
12372 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12373 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12374 SDValue Elt;
12375 if (Subtarget.isISAFuture()) {
12376 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12377 Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
12378 N1: Idx > 1 ? Value2 : Value,
12379 N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12380 } else
12381 Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: Value,
12382 N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12383
12384 SDValue Store =
12385 DAG.getStore(Chain: StoreChain, dl, Val: Elt, Ptr: BasePtr,
12386 PtrInfo: SN->getPointerInfo().getWithOffset(O: Idx * 16),
12387 Alignment: commonAlignment(A: Alignment, Offset: Idx * 16),
12388 MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
12389 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12390 N2: DAG.getConstant(Val: 16, DL: dl, VT: BasePtr.getValueType()));
12391 Stores.push_back(Elt: Store);
12392 }
12393 SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12394 return TF;
12395}
12396
12397SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12398 SDLoc dl(Op);
12399 if (Op.getValueType() == MVT::v4i32) {
12400 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
12401
12402 SDValue Zero = getCanonicalConstSplat(Val: 0, SplatSize: 1, VT: MVT::v4i32, DAG, dl);
12403 // +16 as shift amt.
12404 SDValue Neg16 = getCanonicalConstSplat(Val: -16, SplatSize: 4, VT: MVT::v4i32, DAG, dl);
12405 SDValue RHSSwap = // = vrlw RHS, 16
12406 BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vrlw, LHS: RHS, RHS: Neg16, DAG, dl);
12407
12408 // Shrinkify inputs to v8i16.
12409 LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: LHS);
12410 RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHS);
12411 RHSSwap = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHSSwap);
12412
12413 // Low parts multiplied together, generating 32-bit results (we ignore the
12414 // top parts).
12415 SDValue LoProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmulouh,
12416 LHS, RHS, DAG, dl, DestVT: MVT::v4i32);
12417
12418 SDValue HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmsumuhm,
12419 Op0: LHS, Op1: RHSSwap, Op2: Zero, DAG, dl, DestVT: MVT::v4i32);
12420 // Shift the high parts up 16 bits.
12421 HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: HiProd,
12422 RHS: Neg16, DAG, dl);
12423 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::v4i32, N1: LoProd, N2: HiProd);
12424 } else if (Op.getValueType() == MVT::v16i8) {
12425 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
12426 bool isLittleEndian = Subtarget.isLittleEndian();
12427
12428 // Multiply the even 8-bit parts, producing 16-bit sums.
12429 SDValue EvenParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuleub,
12430 LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12431 EvenParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: EvenParts);
12432
12433 // Multiply the odd 8-bit parts, producing 16-bit sums.
12434 SDValue OddParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuloub,
12435 LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12436 OddParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OddParts);
12437
12438 // Merge the results together. Because vmuleub and vmuloub are
12439 // instructions with a big-endian bias, we must reverse the
12440 // element numbering and reverse the meaning of "odd" and "even"
12441 // when generating little endian code.
12442 int Ops[16];
12443 for (unsigned i = 0; i != 8; ++i) {
12444 if (isLittleEndian) {
12445 Ops[i*2 ] = 2*i;
12446 Ops[i*2+1] = 2*i+16;
12447 } else {
12448 Ops[i*2 ] = 2*i+1;
12449 Ops[i*2+1] = 2*i+1+16;
12450 }
12451 }
12452 if (isLittleEndian)
12453 return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OddParts, N2: EvenParts, Mask: Ops);
12454 else
12455 return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: EvenParts, N2: OddParts, Mask: Ops);
12456 } else {
12457 llvm_unreachable("Unknown mul to lower!");
12458 }
12459}
12460
12461SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12462 bool IsStrict = Op->isStrictFPOpcode();
12463 if (Op.getOperand(i: IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12464 !Subtarget.hasP9Vector())
12465 return SDValue();
12466
12467 return Op;
12468}
12469
12470// Custom lowering for fpext vf32 to v2f64
12471SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12472
12473 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12474 "Should only be called for ISD::FP_EXTEND");
12475
12476 // FIXME: handle extends from half precision float vectors on P9.
12477 // We only want to custom lower an extend from v2f32 to v2f64.
12478 if (Op.getValueType() != MVT::v2f64 ||
12479 Op.getOperand(i: 0).getValueType() != MVT::v2f32)
12480 return SDValue();
12481
12482 SDLoc dl(Op);
12483 SDValue Op0 = Op.getOperand(i: 0);
12484
12485 switch (Op0.getOpcode()) {
12486 default:
12487 return SDValue();
12488 case ISD::EXTRACT_SUBVECTOR: {
12489 assert(Op0.getNumOperands() == 2 &&
12490 isa<ConstantSDNode>(Op0->getOperand(1)) &&
12491 "Node should have 2 operands with second one being a constant!");
12492
12493 if (Op0.getOperand(i: 0).getValueType() != MVT::v4f32)
12494 return SDValue();
12495
12496 // Custom lower is only done for high or low doubleword.
12497 int Idx = Op0.getConstantOperandVal(i: 1);
12498 if (Idx % 2 != 0)
12499 return SDValue();
12500
12501 // Since input is v4f32, at this point Idx is either 0 or 2.
12502 // Shift to get the doubleword position we want.
12503 int DWord = Idx >> 1;
12504
12505 // High and low word positions are different on little endian.
12506 if (Subtarget.isLittleEndian())
12507 DWord ^= 0x1;
12508
12509 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64,
12510 N1: Op0.getOperand(i: 0), N2: DAG.getConstant(Val: DWord, DL: dl, VT: MVT::i32));
12511 }
12512 case ISD::FADD:
12513 case ISD::FMUL:
12514 case ISD::FSUB: {
12515 SDValue NewLoad[2];
12516 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12517 // Ensure both input are loads.
12518 SDValue LdOp = Op0.getOperand(i);
12519 if (LdOp.getOpcode() != ISD::LOAD)
12520 return SDValue();
12521 // Generate new load node.
12522 LoadSDNode *LD = cast<LoadSDNode>(Val&: LdOp);
12523 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12524 NewLoad[i] = DAG.getMemIntrinsicNode(
12525 Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12526 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12527 }
12528 SDValue NewOp =
12529 DAG.getNode(Opcode: Op0.getOpcode(), DL: SDLoc(Op0), VT: MVT::v4f32, N1: NewLoad[0],
12530 N2: NewLoad[1], Flags: Op0.getNode()->getFlags());
12531 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewOp,
12532 N2: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32));
12533 }
12534 case ISD::LOAD: {
12535 LoadSDNode *LD = cast<LoadSDNode>(Val&: Op0);
12536 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12537 SDValue NewLd = DAG.getMemIntrinsicNode(
12538 Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12539 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12540 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewLd,
12541 N2: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32));
12542 }
12543 }
12544 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12545}
12546
12547static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value,
12548 SelectionDAG &DAG,
12549 const PPCSubtarget &STI) {
12550 SDLoc DL(Value);
12551 if (STI.useCRBits())
12552 Value = DAG.getNode(Opcode: ISD::SELECT, DL, VT: SumType, N1: Value,
12553 N2: DAG.getConstant(Val: 1, DL, VT: SumType),
12554 N3: DAG.getConstant(Val: 0, DL, VT: SumType));
12555 else
12556 Value = DAG.getZExtOrTrunc(Op: Value, DL, VT: SumType);
12557 SDValue Sum = DAG.getNode(Opcode: PPCISD::ADDC, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32),
12558 N1: Value, N2: DAG.getAllOnesConstant(DL, VT: SumType));
12559 return Sum.getValue(R: 1);
12560}
12561
12562static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag,
12563 EVT CarryType, SelectionDAG &DAG,
12564 const PPCSubtarget &STI) {
12565 SDLoc DL(Flag);
12566 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SumType);
12567 SDValue Carry = DAG.getNode(
12568 Opcode: PPCISD::ADDE, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32), N1: Zero, N2: Zero, N3: Flag);
12569 if (STI.useCRBits())
12570 return DAG.getSetCC(DL, VT: CarryType, LHS: Carry, RHS: Zero, Cond: ISD::SETNE);
12571 return DAG.getZExtOrTrunc(Op: Carry, DL, VT: CarryType);
12572}
12573
12574SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12575
12576 SDLoc DL(Op);
12577 SDNode *N = Op.getNode();
12578 EVT VT = N->getValueType(ResNo: 0);
12579 EVT CarryType = N->getValueType(ResNo: 1);
12580 unsigned Opc = N->getOpcode();
12581 bool IsAdd = Opc == ISD::UADDO;
12582 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12583 SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12584 N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1));
12585 SDValue Carry = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: 1), CarryType,
12586 DAG, STI: Subtarget);
12587 if (!IsAdd)
12588 Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Carry,
12589 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryType));
12590 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: Carry);
12591}
12592
12593SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12594 SelectionDAG &DAG) const {
12595 SDLoc DL(Op);
12596 SDNode *N = Op.getNode();
12597 unsigned Opc = N->getOpcode();
12598 EVT VT = N->getValueType(ResNo: 0);
12599 EVT CarryType = N->getValueType(ResNo: 1);
12600 SDValue CarryOp = N->getOperand(Num: 2);
12601 bool IsAdd = Opc == ISD::UADDO_CARRY;
12602 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12603 if (!IsAdd)
12604 CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12605 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryOp.getValueType()));
12606 CarryOp = ConvertCarryValueToCarryFlag(SumType: VT, Value: CarryOp, DAG, STI: Subtarget);
12607 SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12608 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1), N3: CarryOp);
12609 CarryOp = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: 1), CarryType, DAG,
12610 STI: Subtarget);
12611 if (!IsAdd)
12612 CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12613 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryOp.getValueType()));
12614 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: CarryOp);
12615}
12616
12617SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12618
12619 SDLoc dl(Op);
12620 SDValue LHS = Op.getOperand(i: 0);
12621 SDValue RHS = Op.getOperand(i: 1);
12622 EVT VT = Op.getNode()->getValueType(ResNo: 0);
12623
12624 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
12625
12626 SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: RHS, N2: LHS);
12627 SDValue Xor2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sub, N2: LHS);
12628
12629 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Xor1, N2: Xor2);
12630
12631 SDValue Overflow =
12632 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: And,
12633 N2: DAG.getConstant(Val: VT.getSizeInBits() - 1, DL: dl, VT: MVT::i32));
12634
12635 SDValue OverflowTrunc =
12636 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: 1), Operand: Overflow);
12637
12638 return DAG.getMergeValues(Ops: {Sub, OverflowTrunc}, dl);
12639}
12640
12641/// Implements signed add with overflow detection using the rule:
12642/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12643SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12644
12645 SDLoc dl(Op);
12646 SDValue LHS = Op.getOperand(i: 0);
12647 SDValue RHS = Op.getOperand(i: 1);
12648 EVT VT = Op.getNode()->getValueType(ResNo: 0);
12649
12650 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
12651
12652 // Compute ~(x xor y)
12653 SDValue XorXY = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
12654 SDValue EqvXY = DAG.getNOT(DL: dl, Val: XorXY, VT);
12655 // Compute (s xor x)
12656 SDValue SumXorX = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sum, N2: LHS);
12657
12658 // overflow = (x eqv y) & (s xor x)
12659 SDValue OverflowInSign = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: EqvXY, N2: SumXorX);
12660
12661 // Shift sign bit down to LSB
12662 SDValue Overflow =
12663 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: OverflowInSign,
12664 N2: DAG.getConstant(Val: VT.getSizeInBits() - 1, DL: dl, VT: MVT::i32));
12665 // Truncate to the overflow type (i1)
12666 SDValue OverflowTrunc =
12667 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: 1), Operand: Overflow);
12668
12669 return DAG.getMergeValues(Ops: {Sum, OverflowTrunc}, dl);
12670}
12671
12672// Lower unsigned 3-way compare producing -1/0/1.
12673SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12674 SDLoc DL(Op);
12675 SDValue A = DAG.getFreeze(V: Op.getOperand(i: 0));
12676 SDValue B = DAG.getFreeze(V: Op.getOperand(i: 1));
12677 EVT OpVT = A.getValueType();
12678 EVT ResVT = Op.getValueType();
12679
12680 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12681 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12682 // comparison.
12683 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12684 A = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: A);
12685 B = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: B);
12686 OpVT = MVT::i64;
12687 }
12688
12689 // First compute diff = A - B.
12690 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: OpVT, N1: A, N2: B);
12691
12692 // Generate B - A using SUBC to capture carry.
12693 SDVTList VTs = DAG.getVTList(VT1: OpVT, VT2: MVT::i32);
12694 SDValue SubC = DAG.getNode(Opcode: PPCISD::SUBC, DL, VTList: VTs, N1: B, N2: A);
12695 SDValue CA0 = SubC.getValue(R: 1);
12696
12697 // t2 = A - B + CA0 using SUBE.
12698 SDValue SubE1 = DAG.getNode(Opcode: PPCISD::SUBE, DL, VTList: VTs, N1: A, N2: B, N3: CA0);
12699 SDValue CA1 = SubE1.getValue(R: 1);
12700
12701 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12702 SDValue ResPair = DAG.getNode(Opcode: PPCISD::SUBE, DL, VTList: VTs, N1: Diff, N2: SubE1, N3: CA1);
12703
12704 // Extract the first result and truncate to result type if needed.
12705 return DAG.getSExtOrTrunc(Op: ResPair.getValue(R: 0), DL, VT: ResVT);
12706}
12707
12708/// LowerOperation - Provide custom lowering hooks for some operations.
12709///
12710SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
12711 switch (Op.getOpcode()) {
12712 default:
12713 llvm_unreachable("Wasn't expecting to be able to lower this!");
12714 case ISD::FPOW: return lowerPow(Op, DAG);
12715 case ISD::FSIN: return lowerSin(Op, DAG);
12716 case ISD::FCOS: return lowerCos(Op, DAG);
12717 case ISD::FLOG: return lowerLog(Op, DAG);
12718 case ISD::FLOG10: return lowerLog10(Op, DAG);
12719 case ISD::FEXP: return lowerExp(Op, DAG);
12720 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12721 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12722 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12723 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12724 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12725 case ISD::STRICT_FSETCC:
12726 case ISD::STRICT_FSETCCS:
12727 case ISD::SETCC: return LowerSETCC(Op, DAG);
12728 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12729 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12730 case ISD::SSUBO:
12731 return LowerSSUBO(Op, DAG);
12732 case ISD::SADDO:
12733 return LowerSADDO(Op, DAG);
12734
12735 case ISD::INLINEASM:
12736 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12737 // Variable argument lowering.
12738 case ISD::VASTART: return LowerVASTART(Op, DAG);
12739 case ISD::VAARG: return LowerVAARG(Op, DAG);
12740 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12741
12742 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12743 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12744 case ISD::GET_DYNAMIC_AREA_OFFSET:
12745 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12746
12747 // Exception handling lowering.
12748 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12749 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12750 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12751
12752 case ISD::LOAD: return LowerLOAD(Op, DAG);
12753 case ISD::STORE: return LowerSTORE(Op, DAG);
12754 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12755 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12756 case ISD::STRICT_FP_TO_UINT:
12757 case ISD::STRICT_FP_TO_SINT:
12758 case ISD::FP_TO_UINT:
12759 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, dl: SDLoc(Op));
12760 case ISD::STRICT_UINT_TO_FP:
12761 case ISD::STRICT_SINT_TO_FP:
12762 case ISD::UINT_TO_FP:
12763 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12764 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12765 case ISD::SET_ROUNDING:
12766 return LowerSET_ROUNDING(Op, DAG);
12767
12768 // Lower 64-bit shifts.
12769 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12770 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12771 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12772
12773 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12774 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12775
12776 // Vector-related lowering.
12777 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12778 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12779 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12780 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12781 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12782 case ISD::MUL: return LowerMUL(Op, DAG);
12783 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12784 case ISD::STRICT_FP_ROUND:
12785 case ISD::FP_ROUND:
12786 return LowerFP_ROUND(Op, DAG);
12787 case ISD::ROTL: return LowerROTL(Op, DAG);
12788
12789 // For counter-based loop handling.
12790 case ISD::INTRINSIC_W_CHAIN:
12791 return LowerINTRINSIC_W_CHAIN(Op, DAG);
12792
12793 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12794
12795 // Frame & Return address.
12796 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12797 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12798
12799 case ISD::INTRINSIC_VOID:
12800 return LowerINTRINSIC_VOID(Op, DAG);
12801 case ISD::BSWAP:
12802 return LowerBSWAP(Op, DAG);
12803 case ISD::ATOMIC_CMP_SWAP:
12804 return LowerATOMIC_CMP_SWAP(Op, DAG);
12805 case ISD::ATOMIC_STORE:
12806 return LowerATOMIC_LOAD_STORE(Op, DAG);
12807 case ISD::IS_FPCLASS:
12808 return LowerIS_FPCLASS(Op, DAG);
12809 case ISD::UADDO:
12810 case ISD::USUBO:
12811 return LowerADDSUBO(Op, DAG);
12812 case ISD::UADDO_CARRY:
12813 case ISD::USUBO_CARRY:
12814 return LowerADDSUBO_CARRY(Op, DAG);
12815 case ISD::UCMP:
12816 return LowerUCMP(Op, DAG);
12817 case ISD::STRICT_LRINT:
12818 case ISD::STRICT_LLRINT:
12819 case ISD::STRICT_LROUND:
12820 case ISD::STRICT_LLROUND:
12821 case ISD::STRICT_FNEARBYINT:
12822 if (Op->getFlags().hasNoFPExcept())
12823 return Op;
12824 return SDValue();
12825 case ISD::VP_LOAD:
12826 return LowerVP_LOAD(Op, DAG);
12827 case ISD::VP_STORE:
12828 return LowerVP_STORE(Op, DAG);
12829 }
12830}
12831
12832void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
12833 SmallVectorImpl<SDValue>&Results,
12834 SelectionDAG &DAG) const {
12835 SDLoc dl(N);
12836 switch (N->getOpcode()) {
12837 default:
12838 llvm_unreachable("Do not know how to custom type legalize this operation!");
12839 case ISD::ATOMIC_LOAD: {
12840 SDValue Res = LowerATOMIC_LOAD_STORE(Op: SDValue(N, 0), DAG);
12841 Results.push_back(Elt: Res);
12842 Results.push_back(Elt: Res.getValue(R: 1));
12843 break;
12844 }
12845 case ISD::READCYCLECOUNTER: {
12846 SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other);
12847 SDValue RTB = DAG.getNode(Opcode: PPCISD::READ_TIME_BASE, DL: dl, VTList: VTs, N: N->getOperand(Num: 0));
12848
12849 Results.push_back(
12850 Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: RTB, N2: RTB.getValue(R: 1)));
12851 Results.push_back(Elt: RTB.getValue(R: 2));
12852 break;
12853 }
12854 case ISD::INTRINSIC_W_CHAIN: {
12855 if (N->getConstantOperandVal(Num: 1) != Intrinsic::loop_decrement)
12856 break;
12857
12858 assert(N->getValueType(0) == MVT::i1 &&
12859 "Unexpected result type for CTR decrement intrinsic");
12860 EVT SVT = getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(),
12861 VT: N->getValueType(ResNo: 0));
12862 SDVTList VTs = DAG.getVTList(VT1: SVT, VT2: MVT::Other);
12863 SDValue NewInt = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: VTs, N1: N->getOperand(Num: 0),
12864 N2: N->getOperand(Num: 1));
12865
12866 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewInt));
12867 Results.push_back(Elt: NewInt.getValue(R: 1));
12868 break;
12869 }
12870 case ISD::INTRINSIC_WO_CHAIN: {
12871 switch (N->getConstantOperandVal(Num: 0)) {
12872 case Intrinsic::ppc_pack_longdouble:
12873 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::ppcf128,
12874 N1: N->getOperand(Num: 2), N2: N->getOperand(Num: 1)));
12875 break;
12876 case Intrinsic::ppc_maxfe:
12877 case Intrinsic::ppc_minfe:
12878 case Intrinsic::ppc_fnmsub:
12879 case Intrinsic::ppc_convert_f128_to_ppcf128:
12880 Results.push_back(Elt: LowerINTRINSIC_WO_CHAIN(Op: SDValue(N, 0), DAG));
12881 break;
12882 }
12883 break;
12884 }
12885 case ISD::VAARG: {
12886 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12887 return;
12888
12889 EVT VT = N->getValueType(ResNo: 0);
12890
12891 if (VT == MVT::i64) {
12892 SDValue NewNode = LowerVAARG(Op: SDValue(N, 1), DAG);
12893
12894 Results.push_back(Elt: NewNode);
12895 Results.push_back(Elt: NewNode.getValue(R: 1));
12896 }
12897 return;
12898 }
12899 case ISD::STRICT_FP_TO_SINT:
12900 case ISD::STRICT_FP_TO_UINT:
12901 case ISD::FP_TO_SINT:
12902 case ISD::FP_TO_UINT: {
12903 // LowerFP_TO_INT() can only handle f32 and f64.
12904 if (N->getOperand(Num: N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12905 MVT::ppcf128)
12906 return;
12907 SDValue LoweredValue = LowerFP_TO_INT(Op: SDValue(N, 0), DAG, dl);
12908 Results.push_back(Elt: LoweredValue);
12909 if (N->isStrictFPOpcode())
12910 Results.push_back(Elt: LoweredValue.getValue(R: 1));
12911 return;
12912 }
12913 case ISD::TRUNCATE: {
12914 if (!N->getValueType(ResNo: 0).isVector())
12915 return;
12916 SDValue Lowered = LowerTRUNCATEVector(Op: SDValue(N, 0), DAG);
12917 if (Lowered)
12918 Results.push_back(Elt: Lowered);
12919 return;
12920 }
12921 case ISD::SCALAR_TO_VECTOR: {
12922 SDValue Lowered = LowerSCALAR_TO_VECTOR(Op: SDValue(N, 0), DAG);
12923 if (Lowered)
12924 Results.push_back(Elt: Lowered);
12925 return;
12926 }
12927 case ISD::FSHL:
12928 case ISD::FSHR:
12929 // Don't handle funnel shifts here.
12930 return;
12931 case ISD::BITCAST:
12932 // Don't handle bitcast here.
12933 return;
12934 case ISD::FP_EXTEND:
12935 SDValue Lowered = LowerFP_EXTEND(Op: SDValue(N, 0), DAG);
12936 if (Lowered)
12937 Results.push_back(Elt: Lowered);
12938 return;
12939 }
12940}
12941
12942//===----------------------------------------------------------------------===//
12943// Other Lowering Code
12944//===----------------------------------------------------------------------===//
12945
12946static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
12947 return Builder.CreateIntrinsic(ID: Id, Args: {});
12948}
12949
12950Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
12951 Value *Addr,
12952 AtomicOrdering Ord) const {
12953 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12954
12955 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12956 "Only 8/16/32/64-bit atomic loads supported");
12957 Intrinsic::ID IntID;
12958 switch (SZ) {
12959 default:
12960 llvm_unreachable("Unexpected PrimitiveSize");
12961 case 8:
12962 IntID = Intrinsic::ppc_lbarx;
12963 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12964 break;
12965 case 16:
12966 IntID = Intrinsic::ppc_lharx;
12967 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12968 break;
12969 case 32:
12970 IntID = Intrinsic::ppc_lwarx;
12971 break;
12972 case 64:
12973 IntID = Intrinsic::ppc_ldarx;
12974 break;
12975 }
12976 Value *Call =
12977 Builder.CreateIntrinsic(ID: IntID, Args: Addr, /*FMFSource=*/nullptr, Name: "larx");
12978
12979 return Builder.CreateTruncOrBitCast(V: Call, DestTy: ValueTy);
12980}
12981
12982// Perform a store-conditional operation to Addr. Return the status of the
12983// store. This should be 0 if the store succeeded, non-zero otherwise.
12984Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
12985 Value *Val, Value *Addr,
12986 AtomicOrdering Ord) const {
12987 Type *Ty = Val->getType();
12988 unsigned SZ = Ty->getPrimitiveSizeInBits();
12989
12990 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12991 "Only 8/16/32/64-bit atomic loads supported");
12992 Intrinsic::ID IntID;
12993 switch (SZ) {
12994 default:
12995 llvm_unreachable("Unexpected PrimitiveSize");
12996 case 8:
12997 IntID = Intrinsic::ppc_stbcx;
12998 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12999 break;
13000 case 16:
13001 IntID = Intrinsic::ppc_sthcx;
13002 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13003 break;
13004 case 32:
13005 IntID = Intrinsic::ppc_stwcx;
13006 break;
13007 case 64:
13008 IntID = Intrinsic::ppc_stdcx;
13009 break;
13010 }
13011
13012 if (SZ == 8 || SZ == 16)
13013 Val = Builder.CreateZExt(V: Val, DestTy: Builder.getInt32Ty());
13014
13015 Value *Call = Builder.CreateIntrinsic(ID: IntID, Args: {Addr, Val},
13016 /*FMFSource=*/nullptr, Name: "stcx");
13017 return Builder.CreateXor(LHS: Call, RHS: Builder.getInt32(C: 1));
13018}
13019
13020// The mappings for emitLeading/TrailingFence is taken from
13021// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13022Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
13023 Instruction *Inst,
13024 AtomicOrdering Ord) const {
13025 if (Ord == AtomicOrdering::SequentiallyConsistent)
13026 return callIntrinsic(Builder, Id: Intrinsic::ppc_sync);
13027 if (isReleaseOrStronger(AO: Ord))
13028 return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
13029 return nullptr;
13030}
13031
13032Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
13033 Instruction *Inst,
13034 AtomicOrdering Ord) const {
13035 if (Inst->hasAtomicLoad() && isAcquireOrStronger(AO: Ord)) {
13036 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13037 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13038 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13039 if (isa<LoadInst>(Val: Inst))
13040 return Builder.CreateIntrinsic(ID: Intrinsic::ppc_cfence, Types: {Inst->getType()},
13041 Args: {Inst});
13042 // FIXME: Can use isync for rmw operation.
13043 return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
13044 }
13045 return nullptr;
13046}
13047
13048MachineBasicBlock *
13049PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
13050 unsigned AtomicSize,
13051 unsigned BinOpcode,
13052 unsigned CmpOpcode,
13053 unsigned CmpPred) const {
13054 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13055 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13056
13057 auto LoadMnemonic = PPC::LDARX;
13058 auto StoreMnemonic = PPC::STDCX;
13059 switch (AtomicSize) {
13060 default:
13061 llvm_unreachable("Unexpected size of atomic entity");
13062 case 1:
13063 LoadMnemonic = PPC::LBARX;
13064 StoreMnemonic = PPC::STBCX;
13065 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13066 break;
13067 case 2:
13068 LoadMnemonic = PPC::LHARX;
13069 StoreMnemonic = PPC::STHCX;
13070 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13071 break;
13072 case 4:
13073 LoadMnemonic = PPC::LWARX;
13074 StoreMnemonic = PPC::STWCX;
13075 break;
13076 case 8:
13077 LoadMnemonic = PPC::LDARX;
13078 StoreMnemonic = PPC::STDCX;
13079 break;
13080 }
13081
13082 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13083 MachineFunction *F = BB->getParent();
13084 MachineFunction::iterator It = ++BB->getIterator();
13085
13086 Register dest = MI.getOperand(i: 0).getReg();
13087 Register ptrA = MI.getOperand(i: 1).getReg();
13088 Register ptrB = MI.getOperand(i: 2).getReg();
13089 Register incr = MI.getOperand(i: 3).getReg();
13090 DebugLoc dl = MI.getDebugLoc();
13091
13092 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13093 MachineBasicBlock *loop2MBB =
13094 CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13095 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13096 F->insert(MBBI: It, MBB: loopMBB);
13097 if (CmpOpcode)
13098 F->insert(MBBI: It, MBB: loop2MBB);
13099 F->insert(MBBI: It, MBB: exitMBB);
13100 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13101 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13102 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13103
13104 MachineRegisterInfo &RegInfo = F->getRegInfo();
13105 Register TmpReg = (!BinOpcode) ? incr :
13106 RegInfo.createVirtualRegister( RegClass: AtomicSize == 8 ? &PPC::G8RCRegClass
13107 : &PPC::GPRCRegClass);
13108
13109 // thisMBB:
13110 // ...
13111 // fallthrough --> loopMBB
13112 BB->addSuccessor(Succ: loopMBB);
13113
13114 // loopMBB:
13115 // l[wd]arx dest, ptr
13116 // add r0, dest, incr
13117 // st[wd]cx. r0, ptr
13118 // bne- loopMBB
13119 // fallthrough --> exitMBB
13120
13121 // For max/min...
13122 // loopMBB:
13123 // l[wd]arx dest, ptr
13124 // cmpl?[wd] dest, incr
13125 // bgt exitMBB
13126 // loop2MBB:
13127 // st[wd]cx. dest, ptr
13128 // bne- loopMBB
13129 // fallthrough --> exitMBB
13130
13131 BB = loopMBB;
13132 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest)
13133 .addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13134 if (BinOpcode)
13135 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg).addReg(RegNo: incr).addReg(RegNo: dest);
13136 if (CmpOpcode) {
13137 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13138 // Signed comparisons of byte or halfword values must be sign-extended.
13139 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13140 Register ExtReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13141 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13142 DestReg: ExtReg).addReg(RegNo: dest);
13143 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ExtReg).addReg(RegNo: incr);
13144 } else
13145 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: dest).addReg(RegNo: incr);
13146
13147 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13148 .addImm(Val: CmpPred)
13149 .addReg(RegNo: CrReg)
13150 .addMBB(MBB: exitMBB);
13151 BB->addSuccessor(Succ: loop2MBB);
13152 BB->addSuccessor(Succ: exitMBB);
13153 BB = loop2MBB;
13154 }
13155 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
13156 .addReg(RegNo: TmpReg).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13157 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13158 .addImm(Val: PPC::PRED_NE_MINUS)
13159 .addReg(RegNo: PPC::CR0)
13160 .addMBB(MBB: loopMBB);
13161 BB->addSuccessor(Succ: loopMBB);
13162 BB->addSuccessor(Succ: exitMBB);
13163
13164 // exitMBB:
13165 // ...
13166 BB = exitMBB;
13167 return BB;
13168}
13169
13170static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
13171 switch(MI.getOpcode()) {
13172 default:
13173 return false;
13174 case PPC::COPY:
13175 return TII->isSignExtended(Reg: MI.getOperand(i: 1).getReg(),
13176 MRI: &MI.getMF()->getRegInfo());
13177 case PPC::LHA:
13178 case PPC::LHA8:
13179 case PPC::LHAU:
13180 case PPC::LHAU8:
13181 case PPC::LHAUX:
13182 case PPC::LHAUX8:
13183 case PPC::LHAX:
13184 case PPC::LHAX8:
13185 case PPC::LWA:
13186 case PPC::LWAUX:
13187 case PPC::LWAX:
13188 case PPC::LWAX_32:
13189 case PPC::LWA_32:
13190 case PPC::PLHA:
13191 case PPC::PLHA8:
13192 case PPC::PLHA8pc:
13193 case PPC::PLHApc:
13194 case PPC::PLWA:
13195 case PPC::PLWA8:
13196 case PPC::PLWA8pc:
13197 case PPC::PLWApc:
13198 case PPC::EXTSB:
13199 case PPC::EXTSB8:
13200 case PPC::EXTSB8_32_64:
13201 case PPC::EXTSB8_rec:
13202 case PPC::EXTSB_rec:
13203 case PPC::EXTSH:
13204 case PPC::EXTSH8:
13205 case PPC::EXTSH8_32_64:
13206 case PPC::EXTSH8_rec:
13207 case PPC::EXTSH_rec:
13208 case PPC::EXTSW:
13209 case PPC::EXTSWSLI:
13210 case PPC::EXTSWSLI_32_64:
13211 case PPC::EXTSWSLI_32_64_rec:
13212 case PPC::EXTSWSLI_rec:
13213 case PPC::EXTSW_32:
13214 case PPC::EXTSW_32_64:
13215 case PPC::EXTSW_32_64_rec:
13216 case PPC::EXTSW_rec:
13217 case PPC::SRAW:
13218 case PPC::SRAWI:
13219 case PPC::SRAWI_rec:
13220 case PPC::SRAW_rec:
13221 return true;
13222 }
13223 return false;
13224}
13225
13226MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
13227 MachineInstr &MI, MachineBasicBlock *BB,
13228 bool is8bit, // operation
13229 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13230 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13231 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13232
13233 // If this is a signed comparison and the value being compared is not known
13234 // to be sign extended, sign extend it here.
13235 DebugLoc dl = MI.getDebugLoc();
13236 MachineFunction *F = BB->getParent();
13237 MachineRegisterInfo &RegInfo = F->getRegInfo();
13238 Register incr = MI.getOperand(i: 3).getReg();
13239 bool IsSignExtended =
13240 incr.isVirtual() && isSignExtended(MI&: *RegInfo.getVRegDef(Reg: incr), TII);
13241
13242 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13243 Register ValueReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13244 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueReg)
13245 .addReg(RegNo: MI.getOperand(i: 3).getReg());
13246 MI.getOperand(i: 3).setReg(ValueReg);
13247 incr = ValueReg;
13248 }
13249 // If we support part-word atomic mnemonics, just use them
13250 if (Subtarget.hasPartwordAtomics())
13251 return EmitAtomicBinary(MI, BB, AtomicSize: is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13252 CmpPred);
13253
13254 // In 64 bit mode we have to use 64 bits for addresses, even though the
13255 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13256 // registers without caring whether they're 32 or 64, but here we're
13257 // doing actual arithmetic on the addresses.
13258 bool is64bit = Subtarget.isPPC64();
13259 bool isLittleEndian = Subtarget.isLittleEndian();
13260 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13261
13262 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13263 MachineFunction::iterator It = ++BB->getIterator();
13264
13265 Register dest = MI.getOperand(i: 0).getReg();
13266 Register ptrA = MI.getOperand(i: 1).getReg();
13267 Register ptrB = MI.getOperand(i: 2).getReg();
13268
13269 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13270 MachineBasicBlock *loop2MBB =
13271 CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13272 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13273 F->insert(MBBI: It, MBB: loopMBB);
13274 if (CmpOpcode)
13275 F->insert(MBBI: It, MBB: loop2MBB);
13276 F->insert(MBBI: It, MBB: exitMBB);
13277 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13278 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13279 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13280
13281 const TargetRegisterClass *RC =
13282 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13283 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13284
13285 Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
13286 Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13287 Register ShiftReg =
13288 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13289 Register Incr2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13290 Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13291 Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13292 Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13293 Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13294 Register Tmp3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13295 Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13296 Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13297 Register SrwDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13298 Register Ptr1Reg;
13299 Register TmpReg =
13300 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13301
13302 // thisMBB:
13303 // ...
13304 // fallthrough --> loopMBB
13305 BB->addSuccessor(Succ: loopMBB);
13306
13307 // The 4-byte load must be aligned, while a char or short may be
13308 // anywhere in the word. Hence all this nasty bookkeeping code.
13309 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13310 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13311 // xori shift, shift1, 24 [16]
13312 // rlwinm ptr, ptr1, 0, 0, 29
13313 // slw incr2, incr, shift
13314 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13315 // slw mask, mask2, shift
13316 // loopMBB:
13317 // lwarx tmpDest, ptr
13318 // add tmp, tmpDest, incr2
13319 // andc tmp2, tmpDest, mask
13320 // and tmp3, tmp, mask
13321 // or tmp4, tmp3, tmp2
13322 // stwcx. tmp4, ptr
13323 // bne- loopMBB
13324 // fallthrough --> exitMBB
13325 // srw SrwDest, tmpDest, shift
13326 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13327 if (ptrA != ZeroReg) {
13328 Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
13329 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
13330 .addReg(RegNo: ptrA)
13331 .addReg(RegNo: ptrB);
13332 } else {
13333 Ptr1Reg = ptrB;
13334 }
13335 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13336 // mode.
13337 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
13338 .addReg(RegNo: Ptr1Reg, Flags: {}, SubReg: is64bit ? PPC::sub_32 : 0)
13339 .addImm(Val: 3)
13340 .addImm(Val: 27)
13341 .addImm(Val: is8bit ? 28 : 27);
13342 if (!isLittleEndian)
13343 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
13344 .addReg(RegNo: Shift1Reg)
13345 .addImm(Val: is8bit ? 24 : 16);
13346 if (is64bit)
13347 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
13348 .addReg(RegNo: Ptr1Reg)
13349 .addImm(Val: 0)
13350 .addImm(Val: 61);
13351 else
13352 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
13353 .addReg(RegNo: Ptr1Reg)
13354 .addImm(Val: 0)
13355 .addImm(Val: 0)
13356 .addImm(Val: 29);
13357 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: Incr2Reg).addReg(RegNo: incr).addReg(RegNo: ShiftReg);
13358 if (is8bit)
13359 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: 255);
13360 else {
13361 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: 0);
13362 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
13363 .addReg(RegNo: Mask3Reg)
13364 .addImm(Val: 65535);
13365 }
13366 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
13367 .addReg(RegNo: Mask2Reg)
13368 .addReg(RegNo: ShiftReg);
13369
13370 BB = loopMBB;
13371 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
13372 .addReg(RegNo: ZeroReg)
13373 .addReg(RegNo: PtrReg);
13374 if (BinOpcode)
13375 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg)
13376 .addReg(RegNo: Incr2Reg)
13377 .addReg(RegNo: TmpDestReg);
13378 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
13379 .addReg(RegNo: TmpDestReg)
13380 .addReg(RegNo: MaskReg);
13381 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: Tmp3Reg).addReg(RegNo: TmpReg).addReg(RegNo: MaskReg);
13382 if (CmpOpcode) {
13383 // For unsigned comparisons, we can directly compare the shifted values.
13384 // For signed comparisons we shift and sign extend.
13385 Register SReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13386 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13387 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: SReg)
13388 .addReg(RegNo: TmpDestReg)
13389 .addReg(RegNo: MaskReg);
13390 unsigned ValueReg = SReg;
13391 unsigned CmpReg = Incr2Reg;
13392 if (CmpOpcode == PPC::CMPW) {
13393 ValueReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13394 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: ValueReg)
13395 .addReg(RegNo: SReg)
13396 .addReg(RegNo: ShiftReg);
13397 Register ValueSReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13398 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueSReg)
13399 .addReg(RegNo: ValueReg);
13400 ValueReg = ValueSReg;
13401 CmpReg = incr;
13402 }
13403 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ValueReg).addReg(RegNo: CmpReg);
13404 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13405 .addImm(Val: CmpPred)
13406 .addReg(RegNo: CrReg)
13407 .addMBB(MBB: exitMBB);
13408 BB->addSuccessor(Succ: loop2MBB);
13409 BB->addSuccessor(Succ: exitMBB);
13410 BB = loop2MBB;
13411 }
13412 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg).addReg(RegNo: Tmp3Reg).addReg(RegNo: Tmp2Reg);
13413 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
13414 .addReg(RegNo: Tmp4Reg)
13415 .addReg(RegNo: ZeroReg)
13416 .addReg(RegNo: PtrReg);
13417 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13418 .addImm(Val: PPC::PRED_NE_MINUS)
13419 .addReg(RegNo: PPC::CR0)
13420 .addMBB(MBB: loopMBB);
13421 BB->addSuccessor(Succ: loopMBB);
13422 BB->addSuccessor(Succ: exitMBB);
13423
13424 // exitMBB:
13425 // ...
13426 BB = exitMBB;
13427 // Since the shift amount is not a constant, we need to clear
13428 // the upper bits with a separate RLWINM.
13429 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: dest)
13430 .addReg(RegNo: SrwDestReg)
13431 .addImm(Val: 0)
13432 .addImm(Val: is8bit ? 24 : 16)
13433 .addImm(Val: 31);
13434 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: SrwDestReg)
13435 .addReg(RegNo: TmpDestReg)
13436 .addReg(RegNo: ShiftReg);
13437 return BB;
13438}
13439
13440llvm::MachineBasicBlock *
13441PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
13442 MachineBasicBlock *MBB) const {
13443 DebugLoc DL = MI.getDebugLoc();
13444 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13445 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13446
13447 MachineFunction *MF = MBB->getParent();
13448 MachineRegisterInfo &MRI = MF->getRegInfo();
13449
13450 const BasicBlock *BB = MBB->getBasicBlock();
13451 MachineFunction::iterator I = ++MBB->getIterator();
13452
13453 Register DstReg = MI.getOperand(i: 0).getReg();
13454 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
13455 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13456 Register mainDstReg = MRI.createVirtualRegister(RegClass: RC);
13457 Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC);
13458
13459 MVT PVT = getPointerTy(DL: MF->getDataLayout());
13460 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13461 "Invalid Pointer Size!");
13462 // For v = setjmp(buf), we generate
13463 //
13464 // thisMBB:
13465 // SjLjSetup mainMBB
13466 // bl mainMBB
13467 // v_restore = 1
13468 // b sinkMBB
13469 //
13470 // mainMBB:
13471 // buf[LabelOffset] = LR
13472 // v_main = 0
13473 //
13474 // sinkMBB:
13475 // v = phi(main, restore)
13476 //
13477
13478 MachineBasicBlock *thisMBB = MBB;
13479 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13480 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13481 MF->insert(MBBI: I, MBB: mainMBB);
13482 MF->insert(MBBI: I, MBB: sinkMBB);
13483
13484 MachineInstrBuilder MIB;
13485
13486 // Transfer the remainder of BB and its successor edges to sinkMBB.
13487 sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB,
13488 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
13489 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13490
13491 // Note that the structure of the jmp_buf used here is not compatible
13492 // with that used by libc, and is not designed to be. Specifically, it
13493 // stores only those 'reserved' registers that LLVM does not otherwise
13494 // understand how to spill. Also, by convention, by the time this
13495 // intrinsic is called, Clang has already stored the frame address in the
13496 // first slot of the buffer and stack address in the third. Following the
13497 // X86 target code, we'll store the jump address in the second slot. We also
13498 // need to save the TOC pointer (R2) to handle jumps between shared
13499 // libraries, and that will be stored in the fourth slot. The thread
13500 // identifier (R13) is not affected.
13501
13502 // thisMBB:
13503 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13504 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13505 const int64_t BPOffset = 4 * PVT.getStoreSize();
13506
13507 // Prepare IP either in reg.
13508 const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
13509 Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
13510 Register BufReg = MI.getOperand(i: 1).getReg();
13511
13512 if (Subtarget.is64BitELFABI()) {
13513 setUsesTOCBasePtr(*MBB->getParent());
13514 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13515 .addReg(RegNo: PPC::X2)
13516 .addImm(Val: TOCOffset)
13517 .addReg(RegNo: BufReg)
13518 .cloneMemRefs(OtherMI: MI);
13519 }
13520
13521 // Naked functions never have a base pointer, and so we use r1. For all
13522 // other functions, this decision must be delayed until during PEI.
13523 unsigned BaseReg;
13524 if (MF->getFunction().hasFnAttribute(Kind: Attribute::Naked))
13525 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13526 else
13527 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13528
13529 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL,
13530 MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13531 .addReg(RegNo: BaseReg)
13532 .addImm(Val: BPOffset)
13533 .addReg(RegNo: BufReg)
13534 .cloneMemRefs(OtherMI: MI);
13535
13536 // Setup
13537 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::BCLalways)).addMBB(MBB: mainMBB);
13538 MIB.addRegMask(Mask: TRI->getNoPreservedMask());
13539
13540 BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: restoreDstReg).addImm(Val: 1);
13541
13542 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::EH_SjLj_Setup))
13543 .addMBB(MBB: mainMBB);
13544 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: sinkMBB);
13545
13546 thisMBB->addSuccessor(Succ: mainMBB, Prob: BranchProbability::getZero());
13547 thisMBB->addSuccessor(Succ: sinkMBB, Prob: BranchProbability::getOne());
13548
13549 // mainMBB:
13550 // mainDstReg = 0
13551 MIB =
13552 BuildMI(BB: mainMBB, MIMD: DL,
13553 MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), DestReg: LabelReg);
13554
13555 // Store IP
13556 if (Subtarget.isPPC64()) {
13557 MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13558 .addReg(RegNo: LabelReg)
13559 .addImm(Val: LabelOffset)
13560 .addReg(RegNo: BufReg);
13561 } else {
13562 MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STW))
13563 .addReg(RegNo: LabelReg)
13564 .addImm(Val: LabelOffset)
13565 .addReg(RegNo: BufReg);
13566 }
13567 MIB.cloneMemRefs(OtherMI: MI);
13568
13569 BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: mainDstReg).addImm(Val: 0);
13570 mainMBB->addSuccessor(Succ: sinkMBB);
13571
13572 // sinkMBB:
13573 BuildMI(BB&: *sinkMBB, I: sinkMBB->begin(), MIMD: DL,
13574 MCID: TII->get(Opcode: PPC::PHI), DestReg: DstReg)
13575 .addReg(RegNo: mainDstReg).addMBB(MBB: mainMBB)
13576 .addReg(RegNo: restoreDstReg).addMBB(MBB: thisMBB);
13577
13578 MI.eraseFromParent();
13579 return sinkMBB;
13580}
13581
13582MachineBasicBlock *
13583PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
13584 MachineBasicBlock *MBB) const {
13585 DebugLoc DL = MI.getDebugLoc();
13586 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13587
13588 MachineFunction *MF = MBB->getParent();
13589 MachineRegisterInfo &MRI = MF->getRegInfo();
13590
13591 MVT PVT = getPointerTy(DL: MF->getDataLayout());
13592 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13593 "Invalid Pointer Size!");
13594
13595 const TargetRegisterClass *RC =
13596 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13597 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
13598 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13599 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13600 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13601 unsigned BP =
13602 (PVT == MVT::i64)
13603 ? PPC::X30
13604 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13605 : PPC::R30);
13606
13607 MachineInstrBuilder MIB;
13608
13609 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13610 const int64_t SPOffset = 2 * PVT.getStoreSize();
13611 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13612 const int64_t BPOffset = 4 * PVT.getStoreSize();
13613
13614 Register BufReg = MI.getOperand(i: 0).getReg();
13615
13616 // Reload FP (the jumped-to function may not have had a
13617 // frame pointer, and if so, then its r31 will be restored
13618 // as necessary).
13619 if (PVT == MVT::i64) {
13620 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: FP)
13621 .addImm(Val: 0)
13622 .addReg(RegNo: BufReg);
13623 } else {
13624 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: FP)
13625 .addImm(Val: 0)
13626 .addReg(RegNo: BufReg);
13627 }
13628 MIB.cloneMemRefs(OtherMI: MI);
13629
13630 // Reload IP
13631 if (PVT == MVT::i64) {
13632 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: Tmp)
13633 .addImm(Val: LabelOffset)
13634 .addReg(RegNo: BufReg);
13635 } else {
13636 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: Tmp)
13637 .addImm(Val: LabelOffset)
13638 .addReg(RegNo: BufReg);
13639 }
13640 MIB.cloneMemRefs(OtherMI: MI);
13641
13642 // Reload SP
13643 if (PVT == MVT::i64) {
13644 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: SP)
13645 .addImm(Val: SPOffset)
13646 .addReg(RegNo: BufReg);
13647 } else {
13648 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: SP)
13649 .addImm(Val: SPOffset)
13650 .addReg(RegNo: BufReg);
13651 }
13652 MIB.cloneMemRefs(OtherMI: MI);
13653
13654 // Reload BP
13655 if (PVT == MVT::i64) {
13656 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: BP)
13657 .addImm(Val: BPOffset)
13658 .addReg(RegNo: BufReg);
13659 } else {
13660 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: BP)
13661 .addImm(Val: BPOffset)
13662 .addReg(RegNo: BufReg);
13663 }
13664 MIB.cloneMemRefs(OtherMI: MI);
13665
13666 // Reload TOC
13667 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13668 setUsesTOCBasePtr(*MBB->getParent());
13669 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: PPC::X2)
13670 .addImm(Val: TOCOffset)
13671 .addReg(RegNo: BufReg)
13672 .cloneMemRefs(OtherMI: MI);
13673 }
13674
13675 // Jump
13676 BuildMI(BB&: *MBB, I&: MI, MIMD: DL,
13677 MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(RegNo: Tmp);
13678 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13679
13680 MI.eraseFromParent();
13681 return MBB;
13682}
13683
13684bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
13685 // If the function specifically requests inline stack probes, emit them.
13686 if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
13687 return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
13688 "inline-asm";
13689 return false;
13690}
13691
13692unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
13693 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13694 unsigned StackAlign = TFI->getStackAlignment();
13695 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13696 "Unexpected stack alignment");
13697 // The default stack probe size is 4096 if the function has no
13698 // stack-probe-size attribute.
13699 const Function &Fn = MF.getFunction();
13700 unsigned StackProbeSize =
13701 Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: 4096);
13702 // Round down to the stack alignment.
13703 StackProbeSize &= ~(StackAlign - 1);
13704 return StackProbeSize ? StackProbeSize : StackAlign;
13705}
13706
13707// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13708// into three phases. In the first phase, it uses pseudo instruction
13709// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13710// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13711// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13712// MaxCallFrameSize so that it can calculate correct data area pointer.
13713MachineBasicBlock *
13714PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
13715 MachineBasicBlock *MBB) const {
13716 const bool isPPC64 = Subtarget.isPPC64();
13717 MachineFunction *MF = MBB->getParent();
13718 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13719 DebugLoc DL = MI.getDebugLoc();
13720 const unsigned ProbeSize = getStackProbeSize(MF: *MF);
13721 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13722 MachineRegisterInfo &MRI = MF->getRegInfo();
13723 // The CFG of probing stack looks as
13724 // +-----+
13725 // | MBB |
13726 // +--+--+
13727 // |
13728 // +----v----+
13729 // +--->+ TestMBB +---+
13730 // | +----+----+ |
13731 // | | |
13732 // | +-----v----+ |
13733 // +---+ BlockMBB | |
13734 // +----------+ |
13735 // |
13736 // +---------+ |
13737 // | TailMBB +<--+
13738 // +---------+
13739 // In MBB, calculate previous frame pointer and final stack pointer.
13740 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13741 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13742 // TailMBB is spliced via \p MI.
13743 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13744 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13745 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13746
13747 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13748 MF->insert(MBBI: MBBIter, MBB: TestMBB);
13749 MF->insert(MBBI: MBBIter, MBB: BlockMBB);
13750 MF->insert(MBBI: MBBIter, MBB: TailMBB);
13751
13752 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13753 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13754
13755 Register DstReg = MI.getOperand(i: 0).getReg();
13756 Register NegSizeReg = MI.getOperand(i: 1).getReg();
13757 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13758 Register FinalStackPtr = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13759 Register FramePointer = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13760 Register ActualNegSizeReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13761
13762 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13763 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13764 // NegSize.
13765 unsigned ProbeOpc;
13766 if (!MRI.hasOneNonDBGUse(RegNo: NegSizeReg))
13767 ProbeOpc =
13768 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13769 else
13770 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13771 // and NegSizeReg will be allocated in the same phyreg to avoid
13772 // redundant copy when NegSizeReg has only one use which is current MI and
13773 // will be replaced by PREPARE_PROBED_ALLOCA then.
13774 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13775 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13776 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: ProbeOpc), DestReg: FramePointer)
13777 .addDef(RegNo: ActualNegSizeReg)
13778 .addReg(RegNo: NegSizeReg)
13779 .add(MO: MI.getOperand(i: 2))
13780 .add(MO: MI.getOperand(i: 3));
13781
13782 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13783 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4),
13784 DestReg: FinalStackPtr)
13785 .addReg(RegNo: SPReg)
13786 .addReg(RegNo: ActualNegSizeReg);
13787
13788 // Materialize a scratch register for update.
13789 int64_t NegProbeSize = -(int64_t)ProbeSize;
13790 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13791 Register ScratchReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13792 if (!isInt<16>(x: NegProbeSize)) {
13793 Register TempReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13794 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg)
13795 .addImm(Val: NegProbeSize >> 16);
13796 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI),
13797 DestReg: ScratchReg)
13798 .addReg(RegNo: TempReg)
13799 .addImm(Val: NegProbeSize & 0xFFFF);
13800 } else
13801 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: ScratchReg)
13802 .addImm(Val: NegProbeSize);
13803
13804 {
13805 // Probing leading residual part.
13806 Register Div = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13807 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::DIVD : PPC::DIVW), DestReg: Div)
13808 .addReg(RegNo: ActualNegSizeReg)
13809 .addReg(RegNo: ScratchReg);
13810 Register Mul = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13811 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::MULLD : PPC::MULLW), DestReg: Mul)
13812 .addReg(RegNo: Div)
13813 .addReg(RegNo: ScratchReg);
13814 Register NegMod = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13815 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: NegMod)
13816 .addReg(RegNo: Mul)
13817 .addReg(RegNo: ActualNegSizeReg);
13818 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13819 .addReg(RegNo: FramePointer)
13820 .addReg(RegNo: SPReg)
13821 .addReg(RegNo: NegMod);
13822 }
13823
13824 {
13825 // Remaining part should be multiple of ProbeSize.
13826 Register CmpResult = MRI.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13827 BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::CMPD : PPC::CMPW), DestReg: CmpResult)
13828 .addReg(RegNo: SPReg)
13829 .addReg(RegNo: FinalStackPtr);
13830 BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
13831 .addImm(Val: PPC::PRED_EQ)
13832 .addReg(RegNo: CmpResult)
13833 .addMBB(MBB: TailMBB);
13834 TestMBB->addSuccessor(Succ: BlockMBB);
13835 TestMBB->addSuccessor(Succ: TailMBB);
13836 }
13837
13838 {
13839 // Touch the block.
13840 // |P...|P...|P...
13841 BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13842 .addReg(RegNo: FramePointer)
13843 .addReg(RegNo: SPReg)
13844 .addReg(RegNo: ScratchReg);
13845 BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: TestMBB);
13846 BlockMBB->addSuccessor(Succ: TestMBB);
13847 }
13848
13849 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13850 // DYNAREAOFFSET pseudo instruction to get the future result.
13851 Register MaxCallFrameSizeReg =
13852 MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13853 BuildMI(BB: TailMBB, MIMD: DL,
13854 MCID: TII->get(Opcode: isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13855 DestReg: MaxCallFrameSizeReg)
13856 .add(MO: MI.getOperand(i: 2))
13857 .add(MO: MI.getOperand(i: 3));
13858 BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), DestReg: DstReg)
13859 .addReg(RegNo: SPReg)
13860 .addReg(RegNo: MaxCallFrameSizeReg);
13861
13862 // Splice instructions after MI to TailMBB.
13863 TailMBB->splice(Where: TailMBB->end(), Other: MBB,
13864 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
13865 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13866 MBB->addSuccessor(Succ: TestMBB);
13867
13868 // Delete the pseudo instruction.
13869 MI.eraseFromParent();
13870
13871 ++NumDynamicAllocaProbed;
13872 return TailMBB;
13873}
13874
13875static bool IsSelectCC(MachineInstr &MI) {
13876 switch (MI.getOpcode()) {
13877 case PPC::SELECT_CC_I4:
13878 case PPC::SELECT_CC_I8:
13879 case PPC::SELECT_CC_F4:
13880 case PPC::SELECT_CC_F8:
13881 case PPC::SELECT_CC_F16:
13882 case PPC::SELECT_CC_VRRC:
13883 case PPC::SELECT_CC_VSFRC:
13884 case PPC::SELECT_CC_VSSRC:
13885 case PPC::SELECT_CC_VSRC:
13886 case PPC::SELECT_CC_SPE4:
13887 case PPC::SELECT_CC_SPE:
13888 return true;
13889 default:
13890 return false;
13891 }
13892}
13893
13894static bool IsSelect(MachineInstr &MI) {
13895 switch (MI.getOpcode()) {
13896 case PPC::SELECT_I4:
13897 case PPC::SELECT_I8:
13898 case PPC::SELECT_F4:
13899 case PPC::SELECT_F8:
13900 case PPC::SELECT_F16:
13901 case PPC::SELECT_SPE:
13902 case PPC::SELECT_SPE4:
13903 case PPC::SELECT_VRRC:
13904 case PPC::SELECT_VSFRC:
13905 case PPC::SELECT_VSSRC:
13906 case PPC::SELECT_VSRC:
13907 return true;
13908 default:
13909 return false;
13910 }
13911}
13912
13913MachineBasicBlock *
13914PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
13915 MachineBasicBlock *BB) const {
13916 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13917 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13918 if (Subtarget.is64BitELFABI() &&
13919 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13920 !Subtarget.isUsingPCRelativeCalls()) {
13921 // Call lowering should have added an r2 operand to indicate a dependence
13922 // on the TOC base pointer value. It can't however, because there is no
13923 // way to mark the dependence as implicit there, and so the stackmap code
13924 // will confuse it with a regular operand. Instead, add the dependence
13925 // here.
13926 MI.addOperand(Op: MachineOperand::CreateReg(Reg: PPC::X2, isDef: false, isImp: true));
13927 }
13928
13929 return emitPatchPoint(MI, MBB: BB);
13930 }
13931
13932 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13933 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13934 return emitEHSjLjSetJmp(MI, MBB: BB);
13935 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13936 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13937 return emitEHSjLjLongJmp(MI, MBB: BB);
13938 }
13939
13940 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13941
13942 // To "insert" these instructions we actually have to insert their
13943 // control-flow patterns.
13944 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13945 MachineFunction::iterator It = ++BB->getIterator();
13946
13947 MachineFunction *F = BB->getParent();
13948 MachineRegisterInfo &MRI = F->getRegInfo();
13949
13950 if (Subtarget.hasISEL() &&
13951 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13952 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13953 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13954 SmallVector<MachineOperand, 2> Cond;
13955 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13956 MI.getOpcode() == PPC::SELECT_CC_I8)
13957 Cond.push_back(Elt: MI.getOperand(i: 4));
13958 else
13959 Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
13960 Cond.push_back(Elt: MI.getOperand(i: 1));
13961
13962 DebugLoc dl = MI.getDebugLoc();
13963 TII->insertSelect(MBB&: *BB, I: MI, DL: dl, DstReg: MI.getOperand(i: 0).getReg(), Cond,
13964 TrueReg: MI.getOperand(i: 2).getReg(), FalseReg: MI.getOperand(i: 3).getReg());
13965 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13966 // The incoming instruction knows the destination vreg to set, the
13967 // condition code register to branch on, the true/false values to
13968 // select between, and a branch opcode to use.
13969
13970 // thisMBB:
13971 // ...
13972 // TrueVal = ...
13973 // cmpTY ccX, r1, r2
13974 // bCC sinkMBB
13975 // fallthrough --> copy0MBB
13976 MachineBasicBlock *thisMBB = BB;
13977 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13978 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13979 DebugLoc dl = MI.getDebugLoc();
13980 F->insert(MBBI: It, MBB: copy0MBB);
13981 F->insert(MBBI: It, MBB: sinkMBB);
13982
13983 if (isPhysRegUsedAfter(Reg: PPC::CARRY, MBI: MI.getIterator())) {
13984 copy0MBB->addLiveIn(PhysReg: PPC::CARRY);
13985 sinkMBB->addLiveIn(PhysReg: PPC::CARRY);
13986 }
13987
13988 // Set the call frame size on entry to the new basic blocks.
13989 // See https://reviews.llvm.org/D156113.
13990 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13991 copy0MBB->setCallFrameSize(CallFrameSize);
13992 sinkMBB->setCallFrameSize(CallFrameSize);
13993
13994 // Transfer the remainder of BB and its successor edges to sinkMBB.
13995 sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13996 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13997 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13998
13999 // Next, add the true and fallthrough blocks as its successors.
14000 BB->addSuccessor(Succ: copy0MBB);
14001 BB->addSuccessor(Succ: sinkMBB);
14002
14003 if (IsSelect(MI)) {
14004 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BC))
14005 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14006 .addMBB(MBB: sinkMBB);
14007 } else {
14008 unsigned SelectPred = MI.getOperand(i: 4).getImm();
14009 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14010 .addImm(Val: SelectPred)
14011 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14012 .addMBB(MBB: sinkMBB);
14013 }
14014
14015 // copy0MBB:
14016 // %FalseValue = ...
14017 // # fallthrough to sinkMBB
14018 BB = copy0MBB;
14019
14020 // Update machine-CFG edges
14021 BB->addSuccessor(Succ: sinkMBB);
14022
14023 // sinkMBB:
14024 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
14025 // ...
14026 BB = sinkMBB;
14027 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::PHI), DestReg: MI.getOperand(i: 0).getReg())
14028 .addReg(RegNo: MI.getOperand(i: 3).getReg())
14029 .addMBB(MBB: copy0MBB)
14030 .addReg(RegNo: MI.getOperand(i: 2).getReg())
14031 .addMBB(MBB: thisMBB);
14032 } else if (MI.getOpcode() == PPC::ReadTB) {
14033 // To read the 64-bit time-base register on a 32-bit target, we read the
14034 // two halves. Should the counter have wrapped while it was being read, we
14035 // need to try again.
14036 // ...
14037 // readLoop:
14038 // mfspr Rx,TBU # load from TBU
14039 // mfspr Ry,TB # load from TB
14040 // mfspr Rz,TBU # load from TBU
14041 // cmpw crX,Rx,Rz # check if 'old'='new'
14042 // bne readLoop # branch if they're not equal
14043 // ...
14044
14045 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14046 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14047 DebugLoc dl = MI.getDebugLoc();
14048 F->insert(MBBI: It, MBB: readMBB);
14049 F->insert(MBBI: It, MBB: sinkMBB);
14050
14051 // Transfer the remainder of BB and its successor edges to sinkMBB.
14052 sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
14053 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14054 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14055
14056 BB->addSuccessor(Succ: readMBB);
14057 BB = readMBB;
14058
14059 MachineRegisterInfo &RegInfo = F->getRegInfo();
14060 Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
14061 Register LoReg = MI.getOperand(i: 0).getReg();
14062 Register HiReg = MI.getOperand(i: 1).getReg();
14063
14064 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: HiReg).addImm(Val: 269);
14065 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: LoReg).addImm(Val: 268);
14066 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: ReadAgainReg).addImm(Val: 269);
14067
14068 Register CmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14069
14070 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CmpReg)
14071 .addReg(RegNo: HiReg)
14072 .addReg(RegNo: ReadAgainReg);
14073 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14074 .addImm(Val: PPC::PRED_NE)
14075 .addReg(RegNo: CmpReg)
14076 .addMBB(MBB: readMBB);
14077
14078 BB->addSuccessor(Succ: readMBB);
14079 BB->addSuccessor(Succ: sinkMBB);
14080 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14081 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::ADD4);
14082 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14083 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::ADD4);
14084 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14085 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::ADD4);
14086 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14087 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::ADD8);
14088
14089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14090 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::AND);
14091 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14092 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::AND);
14093 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14094 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::AND);
14095 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14096 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::AND8);
14097
14098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14099 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::OR);
14100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14101 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::OR);
14102 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14103 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::OR);
14104 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14105 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::OR8);
14106
14107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14108 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::XOR);
14109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14110 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::XOR);
14111 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14112 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::XOR);
14113 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14114 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::XOR8);
14115
14116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14117 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::NAND);
14118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14119 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::NAND);
14120 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14121 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::NAND);
14122 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14123 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::NAND8);
14124
14125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14126 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::SUBF);
14127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14128 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::SUBF);
14129 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14130 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::SUBF);
14131 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14132 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::SUBF8);
14133
14134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14135 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14137 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14138 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14139 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
14140 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14141 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_LT);
14142
14143 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14144 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14145 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14146 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14147 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14148 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
14149 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14150 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_GT);
14151
14152 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14153 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14154 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14155 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14156 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14157 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
14158 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14159 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_LT);
14160
14161 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14162 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14163 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14164 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14165 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14166 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
14167 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14168 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_GT);
14169
14170 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14171 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0);
14172 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14173 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0);
14174 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14175 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0);
14176 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14177 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0);
14178 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14179 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14180 (Subtarget.hasPartwordAtomics() &&
14181 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14182 (Subtarget.hasPartwordAtomics() &&
14183 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14184 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14185
14186 auto LoadMnemonic = PPC::LDARX;
14187 auto StoreMnemonic = PPC::STDCX;
14188 switch (MI.getOpcode()) {
14189 default:
14190 llvm_unreachable("Compare and swap of unknown size");
14191 case PPC::ATOMIC_CMP_SWAP_I8:
14192 LoadMnemonic = PPC::LBARX;
14193 StoreMnemonic = PPC::STBCX;
14194 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14195 break;
14196 case PPC::ATOMIC_CMP_SWAP_I16:
14197 LoadMnemonic = PPC::LHARX;
14198 StoreMnemonic = PPC::STHCX;
14199 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14200 break;
14201 case PPC::ATOMIC_CMP_SWAP_I32:
14202 LoadMnemonic = PPC::LWARX;
14203 StoreMnemonic = PPC::STWCX;
14204 break;
14205 case PPC::ATOMIC_CMP_SWAP_I64:
14206 LoadMnemonic = PPC::LDARX;
14207 StoreMnemonic = PPC::STDCX;
14208 break;
14209 }
14210 MachineRegisterInfo &RegInfo = F->getRegInfo();
14211 Register dest = MI.getOperand(i: 0).getReg();
14212 Register ptrA = MI.getOperand(i: 1).getReg();
14213 Register ptrB = MI.getOperand(i: 2).getReg();
14214 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14215 Register oldval = MI.getOperand(i: 3).getReg();
14216 Register newval = MI.getOperand(i: 4).getReg();
14217 DebugLoc dl = MI.getDebugLoc();
14218
14219 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14220 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14221 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14222 F->insert(MBBI: It, MBB: loop1MBB);
14223 F->insert(MBBI: It, MBB: loop2MBB);
14224 F->insert(MBBI: It, MBB: exitMBB);
14225 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14226 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14227 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14228
14229 // thisMBB:
14230 // ...
14231 // fallthrough --> loopMBB
14232 BB->addSuccessor(Succ: loop1MBB);
14233
14234 // loop1MBB:
14235 // l[bhwd]arx dest, ptr
14236 // cmp[wd] dest, oldval
14237 // bne- exitBB
14238 // loop2MBB:
14239 // st[bhwd]cx. newval, ptr
14240 // bne- loopMBB
14241 // b exitBB
14242 // exitBB:
14243 BB = loop1MBB;
14244 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
14245 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::CMPD : PPC::CMPW), DestReg: CrReg)
14246 .addReg(RegNo: dest)
14247 .addReg(RegNo: oldval);
14248 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14249 .addImm(Val: PPC::PRED_NE_MINUS)
14250 .addReg(RegNo: CrReg)
14251 .addMBB(MBB: exitMBB);
14252 BB->addSuccessor(Succ: loop2MBB);
14253 BB->addSuccessor(Succ: exitMBB);
14254
14255 BB = loop2MBB;
14256 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
14257 .addReg(RegNo: newval)
14258 .addReg(RegNo: ptrA)
14259 .addReg(RegNo: ptrB);
14260 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14261 .addImm(Val: PPC::PRED_NE_MINUS)
14262 .addReg(RegNo: PPC::CR0)
14263 .addMBB(MBB: loop1MBB);
14264 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14265 BB->addSuccessor(Succ: loop1MBB);
14266 BB->addSuccessor(Succ: exitMBB);
14267
14268 // exitMBB:
14269 // ...
14270 BB = exitMBB;
14271 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14272 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14273 // We must use 64-bit registers for addresses when targeting 64-bit,
14274 // since we're actually doing arithmetic on them. Other registers
14275 // can be 32-bit.
14276 bool is64bit = Subtarget.isPPC64();
14277 bool isLittleEndian = Subtarget.isLittleEndian();
14278 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14279
14280 Register dest = MI.getOperand(i: 0).getReg();
14281 Register ptrA = MI.getOperand(i: 1).getReg();
14282 Register ptrB = MI.getOperand(i: 2).getReg();
14283 Register oldval = MI.getOperand(i: 3).getReg();
14284 Register newval = MI.getOperand(i: 4).getReg();
14285 DebugLoc dl = MI.getDebugLoc();
14286
14287 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14288 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14289 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14290 F->insert(MBBI: It, MBB: loop1MBB);
14291 F->insert(MBBI: It, MBB: loop2MBB);
14292 F->insert(MBBI: It, MBB: exitMBB);
14293 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14294 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14295 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14296
14297 MachineRegisterInfo &RegInfo = F->getRegInfo();
14298 const TargetRegisterClass *RC =
14299 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14300 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14301
14302 Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
14303 Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14304 Register ShiftReg =
14305 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
14306 Register NewVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14307 Register NewVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14308 Register OldVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14309 Register OldVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14310 Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14311 Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14312 Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14313 Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14314 Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14315 Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14316 Register Ptr1Reg;
14317 Register TmpReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14318 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14319 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14320 // thisMBB:
14321 // ...
14322 // fallthrough --> loopMBB
14323 BB->addSuccessor(Succ: loop1MBB);
14324
14325 // The 4-byte load must be aligned, while a char or short may be
14326 // anywhere in the word. Hence all this nasty bookkeeping code.
14327 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14328 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14329 // xori shift, shift1, 24 [16]
14330 // rlwinm ptr, ptr1, 0, 0, 29
14331 // slw newval2, newval, shift
14332 // slw oldval2, oldval,shift
14333 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14334 // slw mask, mask2, shift
14335 // and newval3, newval2, mask
14336 // and oldval3, oldval2, mask
14337 // loop1MBB:
14338 // lwarx tmpDest, ptr
14339 // and tmp, tmpDest, mask
14340 // cmpw tmp, oldval3
14341 // bne- exitBB
14342 // loop2MBB:
14343 // andc tmp2, tmpDest, mask
14344 // or tmp4, tmp2, newval3
14345 // stwcx. tmp4, ptr
14346 // bne- loop1MBB
14347 // b exitBB
14348 // exitBB:
14349 // srw dest, tmpDest, shift
14350 if (ptrA != ZeroReg) {
14351 Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
14352 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
14353 .addReg(RegNo: ptrA)
14354 .addReg(RegNo: ptrB);
14355 } else {
14356 Ptr1Reg = ptrB;
14357 }
14358
14359 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14360 // mode.
14361 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
14362 .addReg(RegNo: Ptr1Reg, Flags: {}, SubReg: is64bit ? PPC::sub_32 : 0)
14363 .addImm(Val: 3)
14364 .addImm(Val: 27)
14365 .addImm(Val: is8bit ? 28 : 27);
14366 if (!isLittleEndian)
14367 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
14368 .addReg(RegNo: Shift1Reg)
14369 .addImm(Val: is8bit ? 24 : 16);
14370 if (is64bit)
14371 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
14372 .addReg(RegNo: Ptr1Reg)
14373 .addImm(Val: 0)
14374 .addImm(Val: 61);
14375 else
14376 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
14377 .addReg(RegNo: Ptr1Reg)
14378 .addImm(Val: 0)
14379 .addImm(Val: 0)
14380 .addImm(Val: 29);
14381 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: NewVal2Reg)
14382 .addReg(RegNo: newval)
14383 .addReg(RegNo: ShiftReg);
14384 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: OldVal2Reg)
14385 .addReg(RegNo: oldval)
14386 .addReg(RegNo: ShiftReg);
14387 if (is8bit)
14388 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: 255);
14389 else {
14390 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: 0);
14391 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
14392 .addReg(RegNo: Mask3Reg)
14393 .addImm(Val: 65535);
14394 }
14395 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
14396 .addReg(RegNo: Mask2Reg)
14397 .addReg(RegNo: ShiftReg);
14398 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: NewVal3Reg)
14399 .addReg(RegNo: NewVal2Reg)
14400 .addReg(RegNo: MaskReg);
14401 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: OldVal3Reg)
14402 .addReg(RegNo: OldVal2Reg)
14403 .addReg(RegNo: MaskReg);
14404
14405 BB = loop1MBB;
14406 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
14407 .addReg(RegNo: ZeroReg)
14408 .addReg(RegNo: PtrReg);
14409 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: TmpReg)
14410 .addReg(RegNo: TmpDestReg)
14411 .addReg(RegNo: MaskReg);
14412 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CrReg)
14413 .addReg(RegNo: TmpReg)
14414 .addReg(RegNo: OldVal3Reg);
14415 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14416 .addImm(Val: PPC::PRED_NE)
14417 .addReg(RegNo: CrReg)
14418 .addMBB(MBB: exitMBB);
14419 BB->addSuccessor(Succ: loop2MBB);
14420 BB->addSuccessor(Succ: exitMBB);
14421
14422 BB = loop2MBB;
14423 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
14424 .addReg(RegNo: TmpDestReg)
14425 .addReg(RegNo: MaskReg);
14426 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg)
14427 .addReg(RegNo: Tmp2Reg)
14428 .addReg(RegNo: NewVal3Reg);
14429 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
14430 .addReg(RegNo: Tmp4Reg)
14431 .addReg(RegNo: ZeroReg)
14432 .addReg(RegNo: PtrReg);
14433 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14434 .addImm(Val: PPC::PRED_NE)
14435 .addReg(RegNo: PPC::CR0)
14436 .addMBB(MBB: loop1MBB);
14437 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14438 BB->addSuccessor(Succ: loop1MBB);
14439 BB->addSuccessor(Succ: exitMBB);
14440
14441 // exitMBB:
14442 // ...
14443 BB = exitMBB;
14444 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: dest)
14445 .addReg(RegNo: TmpReg)
14446 .addReg(RegNo: ShiftReg);
14447 } else if (MI.getOpcode() == PPC::FADDrtz) {
14448 // This pseudo performs an FADD with rounding mode temporarily forced
14449 // to round-to-zero. We emit this via custom inserter since the FPSCR
14450 // is not modeled at the SelectionDAG level.
14451 Register Dest = MI.getOperand(i: 0).getReg();
14452 Register Src1 = MI.getOperand(i: 1).getReg();
14453 Register Src2 = MI.getOperand(i: 2).getReg();
14454 DebugLoc dl = MI.getDebugLoc();
14455
14456 MachineRegisterInfo &RegInfo = F->getRegInfo();
14457 Register MFFSReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14458
14459 // Save FPSCR value.
14460 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: MFFSReg);
14461
14462 // Set rounding mode to round-to-zero.
14463 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB1))
14464 .addImm(Val: 31)
14465 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14466
14467 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB0))
14468 .addImm(Val: 30)
14469 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14470
14471 // Perform addition.
14472 auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::FADD), DestReg: Dest)
14473 .addReg(RegNo: Src1)
14474 .addReg(RegNo: Src2);
14475 if (MI.getFlag(Flag: MachineInstr::NoFPExcept))
14476 MIB.setMIFlag(MachineInstr::NoFPExcept);
14477
14478 // Restore FPSCR value.
14479 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSFb)).addImm(Val: 1).addReg(RegNo: MFFSReg);
14480 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14481 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14482 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14483 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14484 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14485 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14486 ? PPC::ANDI8_rec
14487 : PPC::ANDI_rec;
14488 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14489 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14490
14491 MachineRegisterInfo &RegInfo = F->getRegInfo();
14492 Register Dest = RegInfo.createVirtualRegister(
14493 RegClass: Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14494
14495 DebugLoc Dl = MI.getDebugLoc();
14496 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode), DestReg: Dest)
14497 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14498 .addImm(Val: 1);
14499 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14500 DestReg: MI.getOperand(i: 0).getReg())
14501 .addReg(RegNo: IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14502 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14503 DebugLoc Dl = MI.getDebugLoc();
14504 MachineRegisterInfo &RegInfo = F->getRegInfo();
14505 Register CRReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14506 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TCHECK), DestReg: CRReg);
14507 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14508 DestReg: MI.getOperand(i: 0).getReg())
14509 .addReg(RegNo: CRReg);
14510 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14511 DebugLoc Dl = MI.getDebugLoc();
14512 unsigned Imm = MI.getOperand(i: 1).getImm();
14513 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TBEGIN)).addImm(Val: Imm);
14514 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14515 DestReg: MI.getOperand(i: 0).getReg())
14516 .addReg(RegNo: PPC::CR0EQ);
14517 } else if (MI.getOpcode() == PPC::SETRNDi) {
14518 DebugLoc dl = MI.getDebugLoc();
14519 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14520
14521 // Save FPSCR value.
14522 if (MRI.use_empty(RegNo: OldFPSCRReg))
14523 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14524 else
14525 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14526
14527 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14528 // the following settings:
14529 // 00 Round to nearest
14530 // 01 Round to 0
14531 // 10 Round to +inf
14532 // 11 Round to -inf
14533
14534 // When the operand is immediate, using the two least significant bits of
14535 // the immediate to set the bits 62:63 of FPSCR.
14536 unsigned Mode = MI.getOperand(i: 1).getImm();
14537 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14538 .addImm(Val: 31)
14539 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14540
14541 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14542 .addImm(Val: 30)
14543 .addReg(RegNo: PPC::RM, Flags: RegState::ImplicitDefine);
14544 } else if (MI.getOpcode() == PPC::SETRND) {
14545 DebugLoc dl = MI.getDebugLoc();
14546
14547 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14548 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14549 // If the target doesn't have DirectMove, we should use stack to do the
14550 // conversion, because the target doesn't have the instructions like mtvsrd
14551 // or mfvsrd to do this conversion directly.
14552 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14553 if (Subtarget.hasDirectMove()) {
14554 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg)
14555 .addReg(RegNo: SrcReg);
14556 } else {
14557 // Use stack to do the register copy.
14558 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14559 MachineRegisterInfo &RegInfo = F->getRegInfo();
14560 const TargetRegisterClass *RC = RegInfo.getRegClass(Reg: SrcReg);
14561 if (RC == &PPC::F8RCRegClass) {
14562 // Copy register from F8RCRegClass to G8RCRegclass.
14563 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14564 "Unsupported RegClass.");
14565
14566 StoreOp = PPC::STFD;
14567 LoadOp = PPC::LD;
14568 } else {
14569 // Copy register from G8RCRegClass to F8RCRegclass.
14570 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14571 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14572 "Unsupported RegClass.");
14573 }
14574
14575 MachineFrameInfo &MFI = F->getFrameInfo();
14576 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
14577
14578 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14579 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: 0),
14580 F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14581 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14582
14583 // Store the SrcReg into the stack.
14584 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: StoreOp))
14585 .addReg(RegNo: SrcReg)
14586 .addImm(Val: 0)
14587 .addFrameIndex(Idx: FrameIdx)
14588 .addMemOperand(MMO: MMOStore);
14589
14590 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14591 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: 0),
14592 F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14593 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14594
14595 // Load from the stack where SrcReg is stored, and save to DestReg,
14596 // so we have done the RegClass conversion from RegClass::SrcReg to
14597 // RegClass::DestReg.
14598 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: LoadOp), DestReg)
14599 .addImm(Val: 0)
14600 .addFrameIndex(Idx: FrameIdx)
14601 .addMemOperand(MMO: MMOLoad);
14602 }
14603 };
14604
14605 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14606
14607 // Save FPSCR value.
14608 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14609
14610 // When the operand is gprc register, use two least significant bits of the
14611 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14612 //
14613 // copy OldFPSCRTmpReg, OldFPSCRReg
14614 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14615 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14616 // copy NewFPSCRReg, NewFPSCRTmpReg
14617 // mtfsf 255, NewFPSCRReg
14618 MachineOperand SrcOp = MI.getOperand(i: 1);
14619 MachineRegisterInfo &RegInfo = F->getRegInfo();
14620 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14621
14622 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14623
14624 Register ImDefReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14625 Register ExtSrcReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14626
14627 // The first operand of INSERT_SUBREG should be a register which has
14628 // subregisters, we only care about its RegClass, so we should use an
14629 // IMPLICIT_DEF register.
14630 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImDefReg);
14631 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::INSERT_SUBREG), DestReg: ExtSrcReg)
14632 .addReg(RegNo: ImDefReg)
14633 .add(MO: SrcOp)
14634 .addImm(Val: 1);
14635
14636 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14637 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDIMI), DestReg: NewFPSCRTmpReg)
14638 .addReg(RegNo: OldFPSCRTmpReg)
14639 .addReg(RegNo: ExtSrcReg)
14640 .addImm(Val: 0)
14641 .addImm(Val: 62);
14642
14643 Register NewFPSCRReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14644 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14645
14646 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14647 // bits of FPSCR.
14648 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSF))
14649 .addImm(Val: 255)
14650 .addReg(RegNo: NewFPSCRReg)
14651 .addImm(Val: 0)
14652 .addImm(Val: 0);
14653 } else if (MI.getOpcode() == PPC::SETFLM) {
14654 DebugLoc Dl = MI.getDebugLoc();
14655
14656 // Result of setflm is previous FPSCR content, so we need to save it first.
14657 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14658 if (MRI.use_empty(RegNo: OldFPSCRReg))
14659 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14660 else
14661 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14662
14663 // Put bits in 32:63 to FPSCR.
14664 Register NewFPSCRReg = MI.getOperand(i: 1).getReg();
14665 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MTFSF))
14666 .addImm(Val: 255)
14667 .addReg(RegNo: NewFPSCRReg)
14668 .addImm(Val: 0)
14669 .addImm(Val: 0);
14670 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14671 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14672 return emitProbedAlloca(MI, MBB: BB);
14673 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14674 DebugLoc DL = MI.getDebugLoc();
14675 Register Src = MI.getOperand(i: 2).getReg();
14676 Register Lo = MI.getOperand(i: 0).getReg();
14677 Register Hi = MI.getOperand(i: 1).getReg();
14678 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14679 .addDef(RegNo: Lo)
14680 .addUse(RegNo: Src, Flags: {}, SubReg: PPC::sub_gp8_x1);
14681 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14682 .addDef(RegNo: Hi)
14683 .addUse(RegNo: Src, Flags: {}, SubReg: PPC::sub_gp8_x0);
14684 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14685 MI.getOpcode() == PPC::STQX_PSEUDO) {
14686 DebugLoc DL = MI.getDebugLoc();
14687 // Ptr is used as the ptr_rc_no_r0 part
14688 // of LQ/STQ's memory operand and adding result of RA and RB,
14689 // so it has to be g8rc_and_g8rc_nox0.
14690 Register Ptr =
14691 F->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
14692 Register Val = MI.getOperand(i: 0).getReg();
14693 Register RA = MI.getOperand(i: 1).getReg();
14694 Register RB = MI.getOperand(i: 2).getReg();
14695 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::ADD8), DestReg: Ptr).addReg(RegNo: RA).addReg(RegNo: RB);
14696 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
14697 MCID: MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(Opcode: PPC::LQ)
14698 : TII->get(Opcode: PPC::STQ))
14699 .addReg(RegNo: Val, Flags: getDefRegState(B: MI.getOpcode() == PPC::LQX_PSEUDO))
14700 .addImm(Val: 0)
14701 .addReg(RegNo: Ptr);
14702 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14703 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14704 DebugLoc DL = MI.getDebugLoc();
14705 Register DstReg = MI.getOperand(i: 0).getReg();
14706 Register PtrReg = MI.getOperand(i: 1).getReg();
14707 Register ValReg = MI.getOperand(i: 2).getReg();
14708 unsigned FC = MI.getOperand(i: 3).getImm();
14709 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14710 Register Val64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14711 if (IsLwat)
14712 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: Val64)
14713 .addReg(RegNo: ValReg)
14714 .addImm(Val: PPC::sub_32);
14715 else
14716 Val64 = ValReg;
14717
14718 Register G8rPair = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14719 Register UndefG8r = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14720 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: UndefG8r);
14721 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::REG_SEQUENCE), DestReg: G8rPair)
14722 .addReg(RegNo: UndefG8r)
14723 .addImm(Val: PPC::sub_gp8_x0)
14724 .addReg(RegNo: Val64)
14725 .addImm(Val: PPC::sub_gp8_x1);
14726
14727 Register PairResult = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14728 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: IsLwat ? PPC::LWAT : PPC::LDAT), DestReg: PairResult)
14729 .addReg(RegNo: G8rPair)
14730 .addReg(RegNo: PtrReg)
14731 .addImm(Val: FC);
14732 Register Result64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14733 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Result64)
14734 .addReg(RegNo: PairResult, Flags: {}, SubReg: PPC::sub_gp8_x0);
14735 if (IsLwat)
14736 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14737 .addReg(RegNo: Result64, Flags: {}, SubReg: PPC::sub_32);
14738 else
14739 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14740 .addReg(RegNo: Result64);
14741 } else if (MI.getOpcode() == PPC::LWAT_COND_PSEUDO ||
14742 MI.getOpcode() == PPC::LDAT_COND_PSEUDO) {
14743 DebugLoc DL = MI.getDebugLoc();
14744 Register DstReg = MI.getOperand(i: 0).getReg();
14745 Register PtrReg = MI.getOperand(i: 1).getReg();
14746 unsigned FC = MI.getOperand(i: 2).getImm();
14747 bool IsLwat_Cond = MI.getOpcode() == PPC::LWAT_COND_PSEUDO;
14748
14749 Register Pair = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14750 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: Pair);
14751
14752 Register PairResult = MRI.createVirtualRegister(RegClass: &PPC::G8pRCRegClass);
14753 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: IsLwat_Cond ? PPC::LWAT : PPC::LDAT),
14754 DestReg: PairResult)
14755 .addReg(RegNo: Pair)
14756 .addReg(RegNo: PtrReg)
14757 .addImm(Val: FC);
14758 Register Result64 = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14759 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: Result64)
14760 .addReg(RegNo: PairResult, Flags: {}, SubReg: PPC::sub_gp8_x0);
14761 if (IsLwat_Cond)
14762 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14763 .addReg(RegNo: Result64, Flags: {}, SubReg: PPC::sub_32);
14764 else
14765 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
14766 .addReg(RegNo: Result64);
14767 } else {
14768 llvm_unreachable("Unexpected instr type to insert");
14769 }
14770
14771 MI.eraseFromParent(); // The pseudo instruction is gone now.
14772 return BB;
14773}
14774
14775//===----------------------------------------------------------------------===//
14776// Target Optimization Hooks
14777//===----------------------------------------------------------------------===//
14778
14779static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14780 // For the estimates, convergence is quadratic, so we essentially double the
14781 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14782 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14783 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14784 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14785 if (VT.getScalarType() == MVT::f64)
14786 RefinementSteps++;
14787 return RefinementSteps;
14788}
14789
14790SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14791 const DenormalMode &Mode,
14792 SDNodeFlags Flags) const {
14793 // We only have VSX Vector Test for software Square Root.
14794 EVT VT = Op.getValueType();
14795 if (!isTypeLegal(VT: MVT::i1) ||
14796 (VT != MVT::f64 &&
14797 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14798 return TargetLowering::getSqrtInputTest(Operand: Op, DAG, Mode, Flags);
14799
14800 SDLoc DL(Op);
14801 // The output register of FTSQRT is CR field.
14802 SDValue FTSQRT = DAG.getNode(Opcode: PPCISD::FTSQRT, DL, VT: MVT::i32, Operand: Op, Flags);
14803 // ftsqrt BF,FRB
14804 // Let e_b be the unbiased exponent of the double-precision
14805 // floating-point operand in register FRB.
14806 // fe_flag is set to 1 if either of the following conditions occurs.
14807 // - The double-precision floating-point operand in register FRB is a zero,
14808 // a NaN, or an infinity, or a negative value.
14809 // - e_b is less than or equal to -970.
14810 // Otherwise fe_flag is set to 0.
14811 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14812 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14813 // exponent is less than -970)
14814 SDValue SRIdxVal = DAG.getTargetConstant(Val: PPC::sub_eq, DL, VT: MVT::i32);
14815 return SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i1,
14816 Op1: FTSQRT, Op2: SRIdxVal),
14817 0);
14818}
14819
14820SDValue
14821PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14822 SelectionDAG &DAG) const {
14823 // We only have VSX Vector Square Root.
14824 EVT VT = Op.getValueType();
14825 if (VT != MVT::f64 &&
14826 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14827 return TargetLowering::getSqrtResultForDenormInput(Operand: Op, DAG);
14828
14829 return DAG.getNode(Opcode: PPCISD::FSQRT, DL: SDLoc(Op), VT, Operand: Op);
14830}
14831
14832SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14833 int Enabled, int &RefinementSteps,
14834 bool &UseOneConstNR,
14835 bool Reciprocal) const {
14836 EVT VT = Operand.getValueType();
14837 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14838 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14839 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14840 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14841 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14842 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14843
14844 // The Newton-Raphson computation with a single constant does not provide
14845 // enough accuracy on some CPUs.
14846 UseOneConstNR = !Subtarget.needsTwoConstNR();
14847 return DAG.getNode(Opcode: PPCISD::FRSQRTE, DL: SDLoc(Operand), VT, Operand);
14848 }
14849 return SDValue();
14850}
14851
14852SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14853 int Enabled,
14854 int &RefinementSteps) const {
14855 EVT VT = Operand.getValueType();
14856 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14857 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14858 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14859 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14860 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14861 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14862 return DAG.getNode(Opcode: PPCISD::FRE, DL: SDLoc(Operand), VT, Operand);
14863 }
14864 return SDValue();
14865}
14866
14867unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14868 // Note: This functionality is used only when arcp is enabled, and
14869 // on cores with reciprocal estimates (which are used when arcp is
14870 // enabled for division), this functionality is redundant with the default
14871 // combiner logic (once the division -> reciprocal/multiply transformation
14872 // has taken place). As a result, this matters more for older cores than for
14873 // newer ones.
14874
14875 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14876 // reciprocal if there are two or more FDIVs (for embedded cores with only
14877 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14878 switch (Subtarget.getCPUDirective()) {
14879 default:
14880 return 3;
14881 case PPC::DIR_440:
14882 case PPC::DIR_A2:
14883 case PPC::DIR_E500:
14884 case PPC::DIR_E500mc:
14885 case PPC::DIR_E5500:
14886 return 2;
14887 }
14888}
14889
14890// isConsecutiveLSLoc needs to work even if all adds have not yet been
14891// collapsed, and so we need to look through chains of them.
14892static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
14893 int64_t& Offset, SelectionDAG &DAG) {
14894 if (DAG.isBaseWithConstantOffset(Op: Loc)) {
14895 Base = Loc.getOperand(i: 0);
14896 Offset += cast<ConstantSDNode>(Val: Loc.getOperand(i: 1))->getSExtValue();
14897
14898 // The base might itself be a base plus an offset, and if so, accumulate
14899 // that as well.
14900 getBaseWithConstantOffset(Loc: Loc.getOperand(i: 0), Base, Offset, DAG);
14901 }
14902}
14903
14904static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
14905 unsigned Bytes, int Dist,
14906 SelectionDAG &DAG) {
14907 if (VT.getSizeInBits() / 8 != Bytes)
14908 return false;
14909
14910 SDValue BaseLoc = Base->getBasePtr();
14911 if (Loc.getOpcode() == ISD::FrameIndex) {
14912 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14913 return false;
14914 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14915 int FI = cast<FrameIndexSDNode>(Val&: Loc)->getIndex();
14916 int BFI = cast<FrameIndexSDNode>(Val&: BaseLoc)->getIndex();
14917 int FS = MFI.getObjectSize(ObjectIdx: FI);
14918 int BFS = MFI.getObjectSize(ObjectIdx: BFI);
14919 if (FS != BFS || FS != (int)Bytes) return false;
14920 return MFI.getObjectOffset(ObjectIdx: FI) == (MFI.getObjectOffset(ObjectIdx: BFI) + Dist*Bytes);
14921 }
14922
14923 SDValue Base1 = Loc, Base2 = BaseLoc;
14924 int64_t Offset1 = 0, Offset2 = 0;
14925 getBaseWithConstantOffset(Loc, Base&: Base1, Offset&: Offset1, DAG);
14926 getBaseWithConstantOffset(Loc: BaseLoc, Base&: Base2, Offset&: Offset2, DAG);
14927 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14928 return true;
14929
14930 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14931 const GlobalValue *GV1 = nullptr;
14932 const GlobalValue *GV2 = nullptr;
14933 Offset1 = 0;
14934 Offset2 = 0;
14935 bool isGA1 = TLI.isGAPlusOffset(N: Loc.getNode(), GA&: GV1, Offset&: Offset1);
14936 bool isGA2 = TLI.isGAPlusOffset(N: BaseLoc.getNode(), GA&: GV2, Offset&: Offset2);
14937 if (isGA1 && isGA2 && GV1 == GV2)
14938 return Offset1 == (Offset2 + Dist*Bytes);
14939 return false;
14940}
14941
14942// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14943// not enforce equality of the chain operands.
14944static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
14945 unsigned Bytes, int Dist,
14946 SelectionDAG &DAG) {
14947 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) {
14948 EVT VT = LS->getMemoryVT();
14949 SDValue Loc = LS->getBasePtr();
14950 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14951 }
14952
14953 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14954 EVT VT;
14955 switch (N->getConstantOperandVal(Num: 1)) {
14956 default: return false;
14957 case Intrinsic::ppc_altivec_lvx:
14958 case Intrinsic::ppc_altivec_lvxl:
14959 case Intrinsic::ppc_vsx_lxvw4x:
14960 case Intrinsic::ppc_vsx_lxvw4x_be:
14961 VT = MVT::v4i32;
14962 break;
14963 case Intrinsic::ppc_vsx_lxvd2x:
14964 case Intrinsic::ppc_vsx_lxvd2x_be:
14965 VT = MVT::v2f64;
14966 break;
14967 case Intrinsic::ppc_altivec_lvebx:
14968 VT = MVT::i8;
14969 break;
14970 case Intrinsic::ppc_altivec_lvehx:
14971 VT = MVT::i16;
14972 break;
14973 case Intrinsic::ppc_altivec_lvewx:
14974 VT = MVT::i32;
14975 break;
14976 }
14977
14978 return isConsecutiveLSLoc(Loc: N->getOperand(Num: 2), VT, Base, Bytes, Dist, DAG);
14979 }
14980
14981 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14982 EVT VT;
14983 switch (N->getConstantOperandVal(Num: 1)) {
14984 default: return false;
14985 case Intrinsic::ppc_altivec_stvx:
14986 case Intrinsic::ppc_altivec_stvxl:
14987 case Intrinsic::ppc_vsx_stxvw4x:
14988 VT = MVT::v4i32;
14989 break;
14990 case Intrinsic::ppc_vsx_stxvd2x:
14991 VT = MVT::v2f64;
14992 break;
14993 case Intrinsic::ppc_vsx_stxvw4x_be:
14994 VT = MVT::v4i32;
14995 break;
14996 case Intrinsic::ppc_vsx_stxvd2x_be:
14997 VT = MVT::v2f64;
14998 break;
14999 case Intrinsic::ppc_altivec_stvebx:
15000 VT = MVT::i8;
15001 break;
15002 case Intrinsic::ppc_altivec_stvehx:
15003 VT = MVT::i16;
15004 break;
15005 case Intrinsic::ppc_altivec_stvewx:
15006 VT = MVT::i32;
15007 break;
15008 }
15009
15010 return isConsecutiveLSLoc(Loc: N->getOperand(Num: 3), VT, Base, Bytes, Dist, DAG);
15011 }
15012
15013 return false;
15014}
15015
15016// Return true is there is a nearyby consecutive load to the one provided
15017// (regardless of alignment). We search up and down the chain, looking though
15018// token factors and other loads (but nothing else). As a result, a true result
15019// indicates that it is safe to create a new consecutive load adjacent to the
15020// load provided.
15021static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
15022 SDValue Chain = LD->getChain();
15023 EVT VT = LD->getMemoryVT();
15024
15025 SmallPtrSet<SDNode *, 16> LoadRoots;
15026 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15027 SmallPtrSet<SDNode *, 16> Visited;
15028
15029 // First, search up the chain, branching to follow all token-factor operands.
15030 // If we find a consecutive load, then we're done, otherwise, record all
15031 // nodes just above the top-level loads and token factors.
15032 while (!Queue.empty()) {
15033 SDNode *ChainNext = Queue.pop_back_val();
15034 if (!Visited.insert(Ptr: ChainNext).second)
15035 continue;
15036
15037 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: ChainNext)) {
15038 if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: 1, DAG))
15039 return true;
15040
15041 if (!Visited.count(Ptr: ChainLD->getChain().getNode()))
15042 Queue.push_back(Elt: ChainLD->getChain().getNode());
15043 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15044 for (const SDUse &O : ChainNext->ops())
15045 if (!Visited.count(Ptr: O.getNode()))
15046 Queue.push_back(Elt: O.getNode());
15047 } else
15048 LoadRoots.insert(Ptr: ChainNext);
15049 }
15050
15051 // Second, search down the chain, starting from the top-level nodes recorded
15052 // in the first phase. These top-level nodes are the nodes just above all
15053 // loads and token factors. Starting with their uses, recursively look though
15054 // all loads (just the chain uses) and token factors to find a consecutive
15055 // load.
15056 Visited.clear();
15057 Queue.clear();
15058
15059 for (SDNode *I : LoadRoots) {
15060 Queue.push_back(Elt: I);
15061
15062 while (!Queue.empty()) {
15063 SDNode *LoadRoot = Queue.pop_back_val();
15064 if (!Visited.insert(Ptr: LoadRoot).second)
15065 continue;
15066
15067 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: LoadRoot))
15068 if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: 1, DAG))
15069 return true;
15070
15071 for (SDNode *U : LoadRoot->users())
15072 if (((isa<MemSDNode>(Val: U) &&
15073 cast<MemSDNode>(Val: U)->getChain().getNode() == LoadRoot) ||
15074 U->getOpcode() == ISD::TokenFactor) &&
15075 !Visited.count(Ptr: U))
15076 Queue.push_back(Elt: U);
15077 }
15078 }
15079
15080 return false;
15081}
15082
15083/// This function is called when we have proved that a SETCC node can be replaced
15084/// by subtraction (and other supporting instructions) so that the result of
15085/// comparison is kept in a GPR instead of CR. This function is purely for
15086/// codegen purposes and has some flags to guide the codegen process.
15087static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15088 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15089 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15090
15091 // Zero extend the operands to the largest legal integer. Originally, they
15092 // must be of a strictly smaller size.
15093 auto Op0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: 0),
15094 N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
15095 auto Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: 1),
15096 N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
15097
15098 // Swap if needed. Depends on the condition code.
15099 if (Swap)
15100 std::swap(a&: Op0, b&: Op1);
15101
15102 // Subtract extended integers.
15103 auto SubNode = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: Op0, N2: Op1);
15104
15105 // Move the sign bit to the least significant position and zero out the rest.
15106 // Now the least significant bit carries the result of original comparison.
15107 auto Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SubNode,
15108 N2: DAG.getConstant(Val: Size - 1, DL, VT: MVT::i32));
15109 auto Final = Shifted;
15110
15111 // Complement the result if needed. Based on the condition code.
15112 if (Complement)
15113 Final = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Shifted,
15114 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i64));
15115
15116 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Final);
15117}
15118
15119SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15120 DAGCombinerInfo &DCI) const {
15121 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15122
15123 SelectionDAG &DAG = DCI.DAG;
15124 SDLoc DL(N);
15125
15126 // Size of integers being compared has a critical role in the following
15127 // analysis, so we prefer to do this when all types are legal.
15128 if (!DCI.isAfterLegalizeDAG())
15129 return SDValue();
15130
15131 // If all users of SETCC extend its value to a legal integer type
15132 // then we replace SETCC with a subtraction
15133 for (const SDNode *U : N->users())
15134 if (U->getOpcode() != ISD::ZERO_EXTEND)
15135 return SDValue();
15136
15137 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15138 auto OpSize = N->getOperand(Num: 0).getValueSizeInBits();
15139
15140 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
15141
15142 if (OpSize < Size) {
15143 switch (CC) {
15144 default: break;
15145 case ISD::SETULT:
15146 return generateEquivalentSub(N, Size, Complement: false, Swap: false, DL, DAG);
15147 case ISD::SETULE:
15148 return generateEquivalentSub(N, Size, Complement: true, Swap: true, DL, DAG);
15149 case ISD::SETUGT:
15150 return generateEquivalentSub(N, Size, Complement: false, Swap: true, DL, DAG);
15151 case ISD::SETUGE:
15152 return generateEquivalentSub(N, Size, Complement: true, Swap: false, DL, DAG);
15153 }
15154 }
15155
15156 return SDValue();
15157}
15158
15159SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15160 DAGCombinerInfo &DCI) const {
15161 SelectionDAG &DAG = DCI.DAG;
15162 SDLoc dl(N);
15163
15164 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15165 // If we're tracking CR bits, we need to be careful that we don't have:
15166 // trunc(binary-ops(zext(x), zext(y)))
15167 // or
15168 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15169 // such that we're unnecessarily moving things into GPRs when it would be
15170 // better to keep them in CR bits.
15171
15172 // Note that trunc here can be an actual i1 trunc, or can be the effective
15173 // truncation that comes from a setcc or select_cc.
15174 if (N->getOpcode() == ISD::TRUNCATE &&
15175 N->getValueType(ResNo: 0) != MVT::i1)
15176 return SDValue();
15177
15178 if (N->getOperand(Num: 0).getValueType() != MVT::i32 &&
15179 N->getOperand(Num: 0).getValueType() != MVT::i64)
15180 return SDValue();
15181
15182 if (N->getOpcode() == ISD::SETCC ||
15183 N->getOpcode() == ISD::SELECT_CC) {
15184 // If we're looking at a comparison, then we need to make sure that the
15185 // high bits (all except for the first) don't matter the result.
15186 ISD::CondCode CC =
15187 cast<CondCodeSDNode>(Val: N->getOperand(
15188 Num: N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15189 unsigned OpBits = N->getOperand(Num: 0).getValueSizeInBits();
15190
15191 if (ISD::isSignedIntSetCC(Code: CC)) {
15192 if (DAG.ComputeNumSignBits(Op: N->getOperand(Num: 0)) != OpBits ||
15193 DAG.ComputeNumSignBits(Op: N->getOperand(Num: 1)) != OpBits)
15194 return SDValue();
15195 } else if (ISD::isUnsignedIntSetCC(Code: CC)) {
15196 if (!DAG.MaskedValueIsZero(Op: N->getOperand(Num: 0),
15197 Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-1)) ||
15198 !DAG.MaskedValueIsZero(Op: N->getOperand(Num: 1),
15199 Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-1)))
15200 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15201 : SDValue());
15202 } else {
15203 // This is neither a signed nor an unsigned comparison, just make sure
15204 // that the high bits are equal.
15205 KnownBits Op1Known = DAG.computeKnownBits(Op: N->getOperand(Num: 0));
15206 KnownBits Op2Known = DAG.computeKnownBits(Op: N->getOperand(Num: 1));
15207
15208 // We don't really care about what is known about the first bit (if
15209 // anything), so pretend that it is known zero for both to ensure they can
15210 // be compared as constants.
15211 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(BitPosition: 0);
15212 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(BitPosition: 0);
15213
15214 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15215 Op1Known.getConstant() != Op2Known.getConstant())
15216 return SDValue();
15217 }
15218 }
15219
15220 // We now know that the higher-order bits are irrelevant, we just need to
15221 // make sure that all of the intermediate operations are bit operations, and
15222 // all inputs are extensions.
15223 if (N->getOperand(Num: 0).getOpcode() != ISD::AND &&
15224 N->getOperand(Num: 0).getOpcode() != ISD::OR &&
15225 N->getOperand(Num: 0).getOpcode() != ISD::XOR &&
15226 N->getOperand(Num: 0).getOpcode() != ISD::SELECT &&
15227 N->getOperand(Num: 0).getOpcode() != ISD::SELECT_CC &&
15228 N->getOperand(Num: 0).getOpcode() != ISD::TRUNCATE &&
15229 N->getOperand(Num: 0).getOpcode() != ISD::SIGN_EXTEND &&
15230 N->getOperand(Num: 0).getOpcode() != ISD::ZERO_EXTEND &&
15231 N->getOperand(Num: 0).getOpcode() != ISD::ANY_EXTEND)
15232 return SDValue();
15233
15234 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15235 N->getOperand(Num: 1).getOpcode() != ISD::AND &&
15236 N->getOperand(Num: 1).getOpcode() != ISD::OR &&
15237 N->getOperand(Num: 1).getOpcode() != ISD::XOR &&
15238 N->getOperand(Num: 1).getOpcode() != ISD::SELECT &&
15239 N->getOperand(Num: 1).getOpcode() != ISD::SELECT_CC &&
15240 N->getOperand(Num: 1).getOpcode() != ISD::TRUNCATE &&
15241 N->getOperand(Num: 1).getOpcode() != ISD::SIGN_EXTEND &&
15242 N->getOperand(Num: 1).getOpcode() != ISD::ZERO_EXTEND &&
15243 N->getOperand(Num: 1).getOpcode() != ISD::ANY_EXTEND)
15244 return SDValue();
15245
15246 SmallVector<SDValue, 4> Inputs;
15247 SmallVector<SDValue, 8> BinOps, PromOps;
15248 SmallPtrSet<SDNode *, 16> Visited;
15249
15250 for (unsigned i = 0; i < 2; ++i) {
15251 if (((N->getOperand(Num: i).getOpcode() == ISD::SIGN_EXTEND ||
15252 N->getOperand(Num: i).getOpcode() == ISD::ZERO_EXTEND ||
15253 N->getOperand(Num: i).getOpcode() == ISD::ANY_EXTEND) &&
15254 N->getOperand(Num: i).getOperand(i: 0).getValueType() == MVT::i1) ||
15255 isa<ConstantSDNode>(Val: N->getOperand(Num: i)))
15256 Inputs.push_back(Elt: N->getOperand(Num: i));
15257 else
15258 BinOps.push_back(Elt: N->getOperand(Num: i));
15259
15260 if (N->getOpcode() == ISD::TRUNCATE)
15261 break;
15262 }
15263
15264 // Visit all inputs, collect all binary operations (and, or, xor and
15265 // select) that are all fed by extensions.
15266 while (!BinOps.empty()) {
15267 SDValue BinOp = BinOps.pop_back_val();
15268
15269 if (!Visited.insert(Ptr: BinOp.getNode()).second)
15270 continue;
15271
15272 PromOps.push_back(Elt: BinOp);
15273
15274 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15275 // The condition of the select is not promoted.
15276 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15277 continue;
15278 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15279 continue;
15280
15281 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15282 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15283 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15284 BinOp.getOperand(i).getOperand(i: 0).getValueType() == MVT::i1) ||
15285 isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15286 Inputs.push_back(Elt: BinOp.getOperand(i));
15287 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15288 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15289 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15290 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15291 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15292 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15293 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15294 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15295 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15296 BinOps.push_back(Elt: BinOp.getOperand(i));
15297 } else {
15298 // We have an input that is not an extension or another binary
15299 // operation; we'll abort this transformation.
15300 return SDValue();
15301 }
15302 }
15303 }
15304
15305 // Make sure that this is a self-contained cluster of operations (which
15306 // is not quite the same thing as saying that everything has only one
15307 // use).
15308 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15309 if (isa<ConstantSDNode>(Val: Inputs[i]))
15310 continue;
15311
15312 for (const SDNode *User : Inputs[i].getNode()->users()) {
15313 if (User != N && !Visited.count(Ptr: User))
15314 return SDValue();
15315
15316 // Make sure that we're not going to promote the non-output-value
15317 // operand(s) or SELECT or SELECT_CC.
15318 // FIXME: Although we could sometimes handle this, and it does occur in
15319 // practice that one of the condition inputs to the select is also one of
15320 // the outputs, we currently can't deal with this.
15321 if (User->getOpcode() == ISD::SELECT) {
15322 if (User->getOperand(Num: 0) == Inputs[i])
15323 return SDValue();
15324 } else if (User->getOpcode() == ISD::SELECT_CC) {
15325 if (User->getOperand(Num: 0) == Inputs[i] ||
15326 User->getOperand(Num: 1) == Inputs[i])
15327 return SDValue();
15328 }
15329 }
15330 }
15331
15332 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15333 for (const SDNode *User : PromOps[i].getNode()->users()) {
15334 if (User != N && !Visited.count(Ptr: User))
15335 return SDValue();
15336
15337 // Make sure that we're not going to promote the non-output-value
15338 // operand(s) or SELECT or SELECT_CC.
15339 // FIXME: Although we could sometimes handle this, and it does occur in
15340 // practice that one of the condition inputs to the select is also one of
15341 // the outputs, we currently can't deal with this.
15342 if (User->getOpcode() == ISD::SELECT) {
15343 if (User->getOperand(Num: 0) == PromOps[i])
15344 return SDValue();
15345 } else if (User->getOpcode() == ISD::SELECT_CC) {
15346 if (User->getOperand(Num: 0) == PromOps[i] ||
15347 User->getOperand(Num: 1) == PromOps[i])
15348 return SDValue();
15349 }
15350 }
15351 }
15352
15353 // Replace all inputs with the extension operand.
15354 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15355 // Constants may have users outside the cluster of to-be-promoted nodes,
15356 // and so we need to replace those as we do the promotions.
15357 if (isa<ConstantSDNode>(Val: Inputs[i]))
15358 continue;
15359 else
15360 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i], To: Inputs[i].getOperand(i: 0));
15361 }
15362
15363 std::list<HandleSDNode> PromOpHandles;
15364 for (auto &PromOp : PromOps)
15365 PromOpHandles.emplace_back(args&: PromOp);
15366
15367 // Replace all operations (these are all the same, but have a different
15368 // (i1) return type). DAG.getNode will validate that the types of
15369 // a binary operator match, so go through the list in reverse so that
15370 // we've likely promoted both operands first. Any intermediate truncations or
15371 // extensions disappear.
15372 while (!PromOpHandles.empty()) {
15373 SDValue PromOp = PromOpHandles.back().getValue();
15374 PromOpHandles.pop_back();
15375
15376 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15377 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15378 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15379 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15380 if (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: 0)) &&
15381 PromOp.getOperand(i: 0).getValueType() != MVT::i1) {
15382 // The operand is not yet ready (see comment below).
15383 PromOpHandles.emplace_front(args&: PromOp);
15384 continue;
15385 }
15386
15387 SDValue RepValue = PromOp.getOperand(i: 0);
15388 if (isa<ConstantSDNode>(Val: RepValue))
15389 RepValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: RepValue);
15390
15391 DAG.ReplaceAllUsesOfValueWith(From: PromOp, To: RepValue);
15392 continue;
15393 }
15394
15395 unsigned C;
15396 switch (PromOp.getOpcode()) {
15397 default: C = 0; break;
15398 case ISD::SELECT: C = 1; break;
15399 case ISD::SELECT_CC: C = 2; break;
15400 }
15401
15402 if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15403 PromOp.getOperand(i: C).getValueType() != MVT::i1) ||
15404 (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+1)) &&
15405 PromOp.getOperand(i: C+1).getValueType() != MVT::i1)) {
15406 // The to-be-promoted operands of this node have not yet been
15407 // promoted (this should be rare because we're going through the
15408 // list backward, but if one of the operands has several users in
15409 // this cluster of to-be-promoted nodes, it is possible).
15410 PromOpHandles.emplace_front(args&: PromOp);
15411 continue;
15412 }
15413
15414 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15415
15416 // If there are any constant inputs, make sure they're replaced now.
15417 for (unsigned i = 0; i < 2; ++i)
15418 if (isa<ConstantSDNode>(Val: Ops[C+i]))
15419 Ops[C+i] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: Ops[C+i]);
15420
15421 DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15422 To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: MVT::i1, Ops));
15423 }
15424
15425 // Now we're left with the initial truncation itself.
15426 if (N->getOpcode() == ISD::TRUNCATE)
15427 return N->getOperand(Num: 0);
15428
15429 // Otherwise, this is a comparison. The operands to be compared have just
15430 // changed type (to i1), but everything else is the same.
15431 return SDValue(N, 0);
15432}
15433
15434SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15435 DAGCombinerInfo &DCI) const {
15436 SelectionDAG &DAG = DCI.DAG;
15437 SDLoc dl(N);
15438
15439 // If we're tracking CR bits, we need to be careful that we don't have:
15440 // zext(binary-ops(trunc(x), trunc(y)))
15441 // or
15442 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15443 // such that we're unnecessarily moving things into CR bits that can more
15444 // efficiently stay in GPRs. Note that if we're not certain that the high
15445 // bits are set as required by the final extension, we still may need to do
15446 // some masking to get the proper behavior.
15447
15448 // This same functionality is important on PPC64 when dealing with
15449 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15450 // the return values of functions. Because it is so similar, it is handled
15451 // here as well.
15452
15453 if (N->getValueType(ResNo: 0) != MVT::i32 &&
15454 N->getValueType(ResNo: 0) != MVT::i64)
15455 return SDValue();
15456
15457 if (!((N->getOperand(Num: 0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15458 (N->getOperand(Num: 0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15459 return SDValue();
15460
15461 if (N->getOperand(Num: 0).getOpcode() != ISD::AND &&
15462 N->getOperand(Num: 0).getOpcode() != ISD::OR &&
15463 N->getOperand(Num: 0).getOpcode() != ISD::XOR &&
15464 N->getOperand(Num: 0).getOpcode() != ISD::SELECT &&
15465 N->getOperand(Num: 0).getOpcode() != ISD::SELECT_CC)
15466 return SDValue();
15467
15468 SmallVector<SDValue, 4> Inputs;
15469 SmallVector<SDValue, 8> BinOps(1, N->getOperand(Num: 0)), PromOps;
15470 SmallPtrSet<SDNode *, 16> Visited;
15471
15472 // Visit all inputs, collect all binary operations (and, or, xor and
15473 // select) that are all fed by truncations.
15474 while (!BinOps.empty()) {
15475 SDValue BinOp = BinOps.pop_back_val();
15476
15477 if (!Visited.insert(Ptr: BinOp.getNode()).second)
15478 continue;
15479
15480 PromOps.push_back(Elt: BinOp);
15481
15482 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15483 // The condition of the select is not promoted.
15484 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15485 continue;
15486 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15487 continue;
15488
15489 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15490 isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15491 Inputs.push_back(Elt: BinOp.getOperand(i));
15492 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15493 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15494 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15495 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15496 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15497 BinOps.push_back(Elt: BinOp.getOperand(i));
15498 } else {
15499 // We have an input that is not a truncation or another binary
15500 // operation; we'll abort this transformation.
15501 return SDValue();
15502 }
15503 }
15504 }
15505
15506 // The operands of a select that must be truncated when the select is
15507 // promoted because the operand is actually part of the to-be-promoted set.
15508 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15509
15510 // Make sure that this is a self-contained cluster of operations (which
15511 // is not quite the same thing as saying that everything has only one
15512 // use).
15513 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15514 if (isa<ConstantSDNode>(Val: Inputs[i]))
15515 continue;
15516
15517 for (SDNode *User : Inputs[i].getNode()->users()) {
15518 if (User != N && !Visited.count(Ptr: User))
15519 return SDValue();
15520
15521 // If we're going to promote the non-output-value operand(s) or SELECT or
15522 // SELECT_CC, record them for truncation.
15523 if (User->getOpcode() == ISD::SELECT) {
15524 if (User->getOperand(Num: 0) == Inputs[i])
15525 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15526 y: User->getOperand(Num: 0).getValueType()));
15527 } else if (User->getOpcode() == ISD::SELECT_CC) {
15528 if (User->getOperand(Num: 0) == Inputs[i])
15529 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15530 y: User->getOperand(Num: 0).getValueType()));
15531 if (User->getOperand(Num: 1) == Inputs[i])
15532 SelectTruncOp[1].insert(KV: std::make_pair(x&: User,
15533 y: User->getOperand(Num: 1).getValueType()));
15534 }
15535 }
15536 }
15537
15538 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15539 for (SDNode *User : PromOps[i].getNode()->users()) {
15540 if (User != N && !Visited.count(Ptr: User))
15541 return SDValue();
15542
15543 // If we're going to promote the non-output-value operand(s) or SELECT or
15544 // SELECT_CC, record them for truncation.
15545 if (User->getOpcode() == ISD::SELECT) {
15546 if (User->getOperand(Num: 0) == PromOps[i])
15547 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15548 y: User->getOperand(Num: 0).getValueType()));
15549 } else if (User->getOpcode() == ISD::SELECT_CC) {
15550 if (User->getOperand(Num: 0) == PromOps[i])
15551 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15552 y: User->getOperand(Num: 0).getValueType()));
15553 if (User->getOperand(Num: 1) == PromOps[i])
15554 SelectTruncOp[1].insert(KV: std::make_pair(x&: User,
15555 y: User->getOperand(Num: 1).getValueType()));
15556 }
15557 }
15558 }
15559
15560 unsigned PromBits = N->getOperand(Num: 0).getValueSizeInBits();
15561 bool ReallyNeedsExt = false;
15562 if (N->getOpcode() != ISD::ANY_EXTEND) {
15563 // If all of the inputs are not already sign/zero extended, then
15564 // we'll still need to do that at the end.
15565 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15566 if (isa<ConstantSDNode>(Val: Inputs[i]))
15567 continue;
15568
15569 unsigned OpBits =
15570 Inputs[i].getOperand(i: 0).getValueSizeInBits();
15571 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15572
15573 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15574 !DAG.MaskedValueIsZero(Op: Inputs[i].getOperand(i: 0),
15575 Mask: APInt::getHighBitsSet(numBits: OpBits,
15576 hiBitsSet: OpBits-PromBits))) ||
15577 (N->getOpcode() == ISD::SIGN_EXTEND &&
15578 DAG.ComputeNumSignBits(Op: Inputs[i].getOperand(i: 0)) <
15579 (OpBits-(PromBits-1)))) {
15580 ReallyNeedsExt = true;
15581 break;
15582 }
15583 }
15584 }
15585
15586 // Convert PromOps to handles before doing any RAUW operations, as these
15587 // may CSE with existing nodes, deleting the originals.
15588 std::list<HandleSDNode> PromOpHandles;
15589 for (auto &PromOp : PromOps)
15590 PromOpHandles.emplace_back(args&: PromOp);
15591
15592 // Replace all inputs, either with the truncation operand, or a
15593 // truncation or extension to the final output type.
15594 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15595 // Constant inputs need to be replaced with the to-be-promoted nodes that
15596 // use them because they might have users outside of the cluster of
15597 // promoted nodes.
15598 if (isa<ConstantSDNode>(Val: Inputs[i]))
15599 continue;
15600
15601 SDValue InSrc = Inputs[i].getOperand(i: 0);
15602 if (Inputs[i].getValueType() == N->getValueType(ResNo: 0))
15603 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i], To: InSrc);
15604 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15605 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15606 To: DAG.getSExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15607 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15608 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15609 To: DAG.getZExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15610 else
15611 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15612 To: DAG.getAnyExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15613 }
15614
15615 // Replace all operations (these are all the same, but have a different
15616 // (promoted) return type). DAG.getNode will validate that the types of
15617 // a binary operator match, so go through the list in reverse so that
15618 // we've likely promoted both operands first.
15619 while (!PromOpHandles.empty()) {
15620 SDValue PromOp = PromOpHandles.back().getValue();
15621 PromOpHandles.pop_back();
15622
15623 unsigned C;
15624 switch (PromOp.getOpcode()) {
15625 default: C = 0; break;
15626 case ISD::SELECT: C = 1; break;
15627 case ISD::SELECT_CC: C = 2; break;
15628 }
15629
15630 if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15631 PromOp.getOperand(i: C).getValueType() != N->getValueType(ResNo: 0)) ||
15632 (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+1)) &&
15633 PromOp.getOperand(i: C+1).getValueType() != N->getValueType(ResNo: 0))) {
15634 // The to-be-promoted operands of this node have not yet been
15635 // promoted (this should be rare because we're going through the
15636 // list backward, but if one of the operands has several users in
15637 // this cluster of to-be-promoted nodes, it is possible).
15638 PromOpHandles.emplace_front(args&: PromOp);
15639 continue;
15640 }
15641
15642 // For SELECT and SELECT_CC nodes, we do a similar check for any
15643 // to-be-promoted comparison inputs.
15644 if (PromOp.getOpcode() == ISD::SELECT ||
15645 PromOp.getOpcode() == ISD::SELECT_CC) {
15646 if ((SelectTruncOp[0].count(Val: PromOp.getNode()) &&
15647 PromOp.getOperand(i: 0).getValueType() != N->getValueType(ResNo: 0)) ||
15648 (SelectTruncOp[1].count(Val: PromOp.getNode()) &&
15649 PromOp.getOperand(i: 1).getValueType() != N->getValueType(ResNo: 0))) {
15650 PromOpHandles.emplace_front(args&: PromOp);
15651 continue;
15652 }
15653 }
15654
15655 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15656
15657 // If this node has constant inputs, then they'll need to be promoted here.
15658 for (unsigned i = 0; i < 2; ++i) {
15659 if (!isa<ConstantSDNode>(Val: Ops[C+i]))
15660 continue;
15661 if (Ops[C+i].getValueType() == N->getValueType(ResNo: 0))
15662 continue;
15663
15664 if (N->getOpcode() == ISD::SIGN_EXTEND)
15665 Ops[C+i] = DAG.getSExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15666 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15667 Ops[C+i] = DAG.getZExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15668 else
15669 Ops[C+i] = DAG.getAnyExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15670 }
15671
15672 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15673 // truncate them again to the original value type.
15674 if (PromOp.getOpcode() == ISD::SELECT ||
15675 PromOp.getOpcode() == ISD::SELECT_CC) {
15676 auto SI0 = SelectTruncOp[0].find(Val: PromOp.getNode());
15677 if (SI0 != SelectTruncOp[0].end())
15678 Ops[0] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI0->second, Operand: Ops[0]);
15679 auto SI1 = SelectTruncOp[1].find(Val: PromOp.getNode());
15680 if (SI1 != SelectTruncOp[1].end())
15681 Ops[1] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI1->second, Operand: Ops[1]);
15682 }
15683
15684 DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15685 To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: N->getValueType(ResNo: 0), Ops));
15686 }
15687
15688 // Now we're left with the initial extension itself.
15689 if (!ReallyNeedsExt)
15690 return N->getOperand(Num: 0);
15691
15692 // To zero extend, just mask off everything except for the first bit (in the
15693 // i1 case).
15694 if (N->getOpcode() == ISD::ZERO_EXTEND)
15695 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 0),
15696 N2: DAG.getConstant(Val: APInt::getLowBitsSet(
15697 numBits: N->getValueSizeInBits(ResNo: 0), loBitsSet: PromBits),
15698 DL: dl, VT: N->getValueType(ResNo: 0)));
15699
15700 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15701 "Invalid extension type");
15702 EVT ShiftAmountTy = getShiftAmountTy(LHSTy: N->getValueType(ResNo: 0), DL: DAG.getDataLayout());
15703 SDValue ShiftCst =
15704 DAG.getConstant(Val: N->getValueSizeInBits(ResNo: 0) - PromBits, DL: dl, VT: ShiftAmountTy);
15705 return DAG.getNode(
15706 Opcode: ISD::SRA, DL: dl, VT: N->getValueType(ResNo: 0),
15707 N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 0), N2: ShiftCst),
15708 N2: ShiftCst);
15709}
15710
15711// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15712static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS) {
15713
15714 auto isValidForConvert = [](SDValue &Operand) {
15715 if (!Operand.hasOneUse())
15716 return false;
15717
15718 if (Operand.getValueType() != MVT::i128)
15719 return false;
15720
15721 if (Operand.getOpcode() == ISD::Constant)
15722 return true;
15723
15724 auto *LoadNode = dyn_cast<LoadSDNode>(Val&: Operand);
15725 if (!LoadNode)
15726 return false;
15727
15728 // If memory operation is volatile, do not perform any
15729 // optimization or transformation. Volatile operations must be preserved
15730 // as written to ensure correct program behavior, so we return an empty
15731 // SDValue to indicate no action.
15732
15733 if (LoadNode->isVolatile())
15734 return false;
15735
15736 // Only combine loads if both use the unindexed addressing mode.
15737 // PowerPC AltiVec/VMX does not support vector loads or stores with
15738 // pre/post-increment addressing. Indexed modes may imply implicit
15739 // pointer updates, which are not compatible with AltiVec vector
15740 // instructions.
15741 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15742 return false;
15743
15744 // Only combine loads if both are non-extending loads
15745 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15746 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15747 // loaded value's semantics and are not compatible with vector loads.
15748 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15749 return false;
15750
15751 return true;
15752 };
15753
15754 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15755}
15756
15757SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N,
15758 const SDLoc &DL) {
15759
15760 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15761
15762 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15763 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15764 "CC mus be ISD::SETNE or ISD::SETEQ");
15765
15766 auto getV16i8Load = [&](const SDValue &Operand) {
15767 if (Operand.getOpcode() == ISD::Constant)
15768 return DAG.getBitcast(VT: MVT::v16i8, V: Operand);
15769
15770 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15771
15772 auto *LoadNode = cast<LoadSDNode>(Val: Operand);
15773 SDValue NewLoad =
15774 DAG.getLoad(VT: MVT::v16i8, dl: DL, Chain: LoadNode->getChain(),
15775 Ptr: LoadNode->getBasePtr(), MMO: LoadNode->getMemOperand());
15776 DAG.ReplaceAllUsesOfValueWith(From: Operand.getValue(R: 1), To: NewLoad.getValue(R: 1));
15777 return NewLoad;
15778 };
15779
15780 // Following code transforms the DAG
15781 // t0: ch,glue = EntryToken
15782 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15783 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15784 // undef:i64
15785 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15786 // t5: i128,ch =
15787 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15788 // setcc t3, t5, setne:ch
15789 //
15790 // ---->
15791 //
15792 // t0: ch,glue = EntryToken
15793 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15794 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15795 // undef:i64
15796 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15797 // t5: v16i8,ch =
15798 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15799 // t6: i32 =
15800 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15801 // Constant:i32<2>, t3, t5
15802 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15803
15804 // Or transforms the DAG
15805 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15806 // t8: i1 =
15807 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15808 //
15809 // --->
15810 //
15811 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15812 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15813 // t7: i32 =
15814 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15815
15816 SDValue LHSVec = getV16i8Load(N->getOperand(Num: 0));
15817 SDValue RHSVec = getV16i8Load(N->getOperand(Num: 1));
15818
15819 SDValue IntrID =
15820 DAG.getConstant(Val: Intrinsic::ppc_altivec_vcmpequb_p, DL, VT: MVT::i32);
15821 SDValue CRSel = DAG.getConstant(Val: 2, DL, VT: MVT::i32); // which CR6 predicate field
15822 SDValue PredResult = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
15823 N1: IntrID, N2: CRSel, N3: LHSVec, N4: RHSVec);
15824 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15825 // so we need to invert the CC opcode.
15826 return DAG.getSetCC(DL, VT: N->getValueType(ResNo: 0), LHS: PredResult,
15827 RHS: DAG.getConstant(Val: 0, DL, VT: MVT::i32),
15828 Cond: CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15829}
15830
15831// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15832// If it is , return true; otherwise return false.
15833static bool canConvertSETCCToXori(SDNode *N) {
15834 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15835
15836 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15837 if (CC != ISD::SETEQ)
15838 return false;
15839
15840 SDValue LHS = N->getOperand(Num: 0);
15841 SDValue RHS = N->getOperand(Num: 1);
15842
15843 // Check the `SDValue &V` is from `and` with `1`.
15844 auto IsAndWithOne = [](SDValue &V) {
15845 if (V.getOpcode() == ISD::AND) {
15846 for (const SDValue &Op : V->ops())
15847 if (auto *C = dyn_cast<ConstantSDNode>(Val: Op))
15848 if (C->isOne())
15849 return true;
15850 }
15851 return false;
15852 };
15853
15854 // Check whether the SETCC compare with zero.
15855 auto IsCompareWithZero = [](SDValue &V) {
15856 if (auto *C = dyn_cast<ConstantSDNode>(Val&: V))
15857 if (C->isZero())
15858 return true;
15859 return false;
15860 };
15861
15862 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15863 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15864}
15865
15866// You must check whether the `SDNode* N` can be converted to Xori using
15867// the function `static bool canConvertSETCCToXori(SDNode *N)`
15868// before calling the function; otherwise, it may produce incorrect results.
15869static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG) {
15870
15871 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15872 SDValue LHS = N->getOperand(Num: 0);
15873 SDValue RHS = N->getOperand(Num: 1);
15874 SDLoc DL(N);
15875
15876 [[maybe_unused]] ISD::CondCode CC =
15877 cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15878 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15879 // Rewrite it as XORI (and X, 1), 1.
15880 auto MakeXor1 = [&](SDValue V) {
15881 EVT VT = V.getValueType();
15882 SDValue One = DAG.getConstant(Val: 1, DL, VT);
15883 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: V, N2: One);
15884 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Xor);
15885 };
15886
15887 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15888 return MakeXor1(LHS);
15889
15890 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15891 return MakeXor1(RHS);
15892
15893 llvm_unreachable("Should not reach here.");
15894}
15895
15896SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15897 DAGCombinerInfo &DCI) const {
15898 assert(N->getOpcode() == ISD::SETCC &&
15899 "Should be called with a SETCC node");
15900
15901 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15902 // If it is, rewrite it as XORI (and X, 1), 1.
15903 if (canConvertSETCCToXori(N))
15904 return ConvertSETCCToXori(N, DAG&: DCI.DAG);
15905
15906 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15907 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15908 SDValue LHS = N->getOperand(Num: 0);
15909 SDValue RHS = N->getOperand(Num: 1);
15910
15911 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15912 if (LHS.getOpcode() == ISD::SUB && isNullConstant(V: LHS.getOperand(i: 0)) &&
15913 LHS.hasOneUse())
15914 std::swap(a&: LHS, b&: RHS);
15915
15916 // x == 0-y --> x+y == 0
15917 // x != 0-y --> x+y != 0
15918 if (RHS.getOpcode() == ISD::SUB && isNullConstant(V: RHS.getOperand(i: 0)) &&
15919 RHS.hasOneUse()) {
15920 SDLoc DL(N);
15921 SelectionDAG &DAG = DCI.DAG;
15922 EVT VT = N->getValueType(ResNo: 0);
15923 EVT OpVT = LHS.getValueType();
15924 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS, N2: RHS.getOperand(i: 1));
15925 return DAG.getSetCC(DL, VT, LHS: Add, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond: CC);
15926 }
15927
15928 // Optimization: Fold i128 equality/inequality compares of two loads into a
15929 // vectorized compare using vcmpequb.p when Altivec is available.
15930 //
15931 // Rationale:
15932 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15933 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15934 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15935 // perform a full 128-bit equality check in a single vector compare.
15936 //
15937 // Example Result:
15938 // This transformation replaces memcmp(a, b, 16) with two vector loads
15939 // and one vector compare instruction.
15940
15941 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15942 return convertTwoLoadsAndCmpToVCMPEQUB(DAG&: DCI.DAG, N, DL: SDLoc(N));
15943 }
15944
15945 return DAGCombineTruncBoolExt(N, DCI);
15946}
15947
15948// Is this an extending load from an f32 to an f64?
15949static bool isFPExtLoad(SDValue Op) {
15950 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: Op.getNode()))
15951 return LD->getExtensionType() == ISD::EXTLOAD &&
15952 Op.getValueType() == MVT::f64;
15953 return false;
15954}
15955
15956/// Reduces the number of fp-to-int conversion when building a vector.
15957///
15958/// If this vector is built out of floating to integer conversions,
15959/// transform it to a vector built out of floating point values followed by a
15960/// single floating to integer conversion of the vector.
15961/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15962/// becomes (fptosi (build_vector ($A, $B, ...)))
15963SDValue PPCTargetLowering::
15964combineElementTruncationToVectorTruncation(SDNode *N,
15965 DAGCombinerInfo &DCI) const {
15966 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15967 "Should be called with a BUILD_VECTOR node");
15968
15969 SelectionDAG &DAG = DCI.DAG;
15970 SDLoc dl(N);
15971
15972 SDValue FirstInput = N->getOperand(Num: 0);
15973 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15974 "The input operand must be an fp-to-int conversion.");
15975
15976 // This combine happens after legalization so the fp_to_[su]i nodes are
15977 // already converted to PPCSISD nodes.
15978 unsigned FirstConversion = FirstInput.getOperand(i: 0).getOpcode();
15979 if (FirstConversion == PPCISD::FCTIDZ ||
15980 FirstConversion == PPCISD::FCTIDUZ ||
15981 FirstConversion == PPCISD::FCTIWZ ||
15982 FirstConversion == PPCISD::FCTIWUZ) {
15983 bool IsSplat = true;
15984 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15985 FirstConversion == PPCISD::FCTIWUZ;
15986 EVT SrcVT = FirstInput.getOperand(i: 0).getValueType();
15987 SmallVector<SDValue, 4> Ops;
15988 EVT TargetVT = N->getValueType(ResNo: 0);
15989 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15990 SDValue NextOp = N->getOperand(Num: i);
15991 if (NextOp.getOpcode() != PPCISD::MFVSR)
15992 return SDValue();
15993 unsigned NextConversion = NextOp.getOperand(i: 0).getOpcode();
15994 if (NextConversion != FirstConversion)
15995 return SDValue();
15996 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15997 // This is not valid if the input was originally double precision. It is
15998 // also not profitable to do unless this is an extending load in which
15999 // case doing this combine will allow us to combine consecutive loads.
16000 if (Is32Bit && !isFPExtLoad(Op: NextOp.getOperand(i: 0).getOperand(i: 0)))
16001 return SDValue();
16002 if (N->getOperand(Num: i) != FirstInput)
16003 IsSplat = false;
16004 }
16005
16006 // If this is a splat, we leave it as-is since there will be only a single
16007 // fp-to-int conversion followed by a splat of the integer. This is better
16008 // for 32-bit and smaller ints and neutral for 64-bit ints.
16009 if (IsSplat)
16010 return SDValue();
16011
16012 // Now that we know we have the right type of node, get its operands
16013 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16014 SDValue In = N->getOperand(Num: i).getOperand(i: 0);
16015 if (Is32Bit) {
16016 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16017 // here, we know that all inputs are extending loads so this is safe).
16018 if (In.isUndef())
16019 Ops.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
16020 else {
16021 SDValue Trunc =
16022 DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: In.getOperand(i: 0),
16023 N2: DAG.getIntPtrConstant(Val: 1, DL: dl, /*isTarget=*/true));
16024 Ops.push_back(Elt: Trunc);
16025 }
16026 } else
16027 Ops.push_back(Elt: In.isUndef() ? DAG.getUNDEF(VT: SrcVT) : In.getOperand(i: 0));
16028 }
16029
16030 unsigned Opcode;
16031 if (FirstConversion == PPCISD::FCTIDZ ||
16032 FirstConversion == PPCISD::FCTIWZ)
16033 Opcode = ISD::FP_TO_SINT;
16034 else
16035 Opcode = ISD::FP_TO_UINT;
16036
16037 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16038 SDValue BV = DAG.getBuildVector(VT: NewVT, DL: dl, Ops);
16039 return DAG.getNode(Opcode, DL: dl, VT: TargetVT, Operand: BV);
16040 }
16041 return SDValue();
16042}
16043
16044// LXVKQ instruction load VSX vector with a special quadword value
16045// based on an immediate value. This helper method returns the details of the
16046// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16047// to help generate the LXVKQ instruction and the subsequent shift instruction
16048// required to match the original build vector pattern.
16049
16050// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16051using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16052
16053static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16054
16055 // LXVKQ instruction loads the Quadword value:
16056 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16057 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16058 static const uint32_t Uim = 16;
16059
16060 // Check for direct LXVKQ match (no shift needed)
16061 if (FullVal == BasePattern)
16062 return std::make_tuple(args: Uim, args: uint8_t{0});
16063
16064 // Check if FullValue is 1 (the result of the base pattern >> 127)
16065 if (FullVal == APInt(128, 1))
16066 return std::make_tuple(args: Uim, args: uint8_t{127});
16067
16068 return std::nullopt;
16069}
16070
16071/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16072/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16073/// LXVKQ instruction load VSX vector with a special quadword value based on an
16074/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16075/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16076/// This can be used to inline the build vector constants that have the
16077/// following patterns:
16078///
16079/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16080/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16081/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16082/// combination of splatting and right shift instructions.
16083
16084SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16085 SelectionDAG &DAG) const {
16086
16087 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16088 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16089
16090 // This transformation is only supported if we are loading either a byte,
16091 // halfword, word, or doubleword.
16092 EVT VT = Op.getValueType();
16093 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16094 VT == MVT::v2i64))
16095 return SDValue();
16096
16097 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16098 << VT.getEVTString() << "): ";
16099 Op->dump());
16100
16101 unsigned NumElems = VT.getVectorNumElements();
16102 unsigned ElemBits = VT.getScalarSizeInBits();
16103
16104 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16105
16106 // Check for Non-constant operand in the build vector.
16107 for (const SDValue &Operand : Op.getNode()->op_values()) {
16108 if (!isa<ConstantSDNode>(Val: Operand))
16109 return SDValue();
16110 }
16111
16112 // Assemble build vector operands as a 128-bit register value
16113 // We need to reconstruct what the 128-bit register pattern would be
16114 // that produces this vector when interpreted with the current endianness
16115 APInt FullVal = APInt::getZero(numBits: 128);
16116
16117 for (unsigned Index = 0; Index < NumElems; ++Index) {
16118 auto *C = cast<ConstantSDNode>(Val: Op.getOperand(i: Index));
16119
16120 // Get element value as raw bits (zero-extended)
16121 uint64_t ElemValue = C->getZExtValue();
16122
16123 // Mask to element size to ensure we only get the relevant bits
16124 if (ElemBits < 64)
16125 ElemValue &= ((1ULL << ElemBits) - 1);
16126
16127 // Calculate bit position for this element in the 128-bit register
16128 unsigned BitPos =
16129 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16130
16131 // Create APInt for the element value and shift it to correct position
16132 APInt ElemAPInt(128, ElemValue);
16133 ElemAPInt <<= BitPos;
16134
16135 // Place the element value at the correct bit position
16136 FullVal |= ElemAPInt;
16137 }
16138
16139 if (FullVal.isZero() || FullVal.isAllOnes())
16140 return SDValue();
16141
16142 if (auto UIMOpt = getPatternInfo(FullVal)) {
16143 const auto &[Uim, ShiftAmount] = *UIMOpt;
16144 SDLoc Dl(Op);
16145
16146 // Generate LXVKQ instruction if the shift amount is zero.
16147 if (ShiftAmount == 0) {
16148 SDValue UimVal = DAG.getTargetConstant(Val: Uim, DL: Dl, VT: MVT::i32);
16149 SDValue LxvkqInstr =
16150 SDValue(DAG.getMachineNode(Opcode: PPC::LXVKQ, dl: Dl, VT, Op1: UimVal), 0);
16151 LLVM_DEBUG(llvm::dbgs()
16152 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16153 LxvkqInstr.dump());
16154 return LxvkqInstr;
16155 }
16156
16157 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16158
16159 // The right shifted pattern can be constructed using a combination of
16160 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16161 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16162 // value 255.
16163 SDValue ShiftAmountVec =
16164 SDValue(DAG.getMachineNode(Opcode: PPC::XXSPLTIB, dl: Dl, VT: MVT::v4i32,
16165 Op1: DAG.getTargetConstant(Val: 255, DL: Dl, VT: MVT::i32)),
16166 0);
16167 // Generate appropriate right shift instruction
16168 SDValue ShiftVec = SDValue(
16169 DAG.getMachineNode(Opcode: PPC::VSRQ, dl: Dl, VT, Op1: ShiftAmountVec, Op2: ShiftAmountVec),
16170 0);
16171 LLVM_DEBUG(llvm::dbgs()
16172 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16173 ShiftVec.dump());
16174 return ShiftVec;
16175 }
16176 // No patterns matched for build vectors.
16177 return SDValue();
16178}
16179
16180/// Reduce the number of loads when building a vector.
16181///
16182/// Building a vector out of multiple loads can be converted to a load
16183/// of the vector type if the loads are consecutive. If the loads are
16184/// consecutive but in descending order, a shuffle is added at the end
16185/// to reorder the vector.
16186static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
16187 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16188 "Should be called with a BUILD_VECTOR node");
16189
16190 SDLoc dl(N);
16191
16192 // Return early for non byte-sized type, as they can't be consecutive.
16193 if (!N->getValueType(ResNo: 0).getVectorElementType().isByteSized())
16194 return SDValue();
16195
16196 bool InputsAreConsecutiveLoads = true;
16197 bool InputsAreReverseConsecutive = true;
16198 unsigned ElemSize = N->getValueType(ResNo: 0).getScalarType().getStoreSize();
16199 SDValue FirstInput = N->getOperand(Num: 0);
16200 bool IsRoundOfExtLoad = false;
16201 LoadSDNode *FirstLoad = nullptr;
16202
16203 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16204 FirstInput.getOperand(i: 0).getOpcode() == ISD::LOAD) {
16205 FirstLoad = cast<LoadSDNode>(Val: FirstInput.getOperand(i: 0));
16206 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16207 }
16208 // Not a build vector of (possibly fp_rounded) loads.
16209 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16210 N->getNumOperands() == 1)
16211 return SDValue();
16212
16213 if (!IsRoundOfExtLoad)
16214 FirstLoad = cast<LoadSDNode>(Val&: FirstInput);
16215
16216 SmallVector<LoadSDNode *, 4> InputLoads;
16217 InputLoads.push_back(Elt: FirstLoad);
16218 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16219 // If any inputs are fp_round(extload), they all must be.
16220 if (IsRoundOfExtLoad && N->getOperand(Num: i).getOpcode() != ISD::FP_ROUND)
16221 return SDValue();
16222
16223 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(Num: i).getOperand(i: 0) :
16224 N->getOperand(Num: i);
16225 if (NextInput.getOpcode() != ISD::LOAD)
16226 return SDValue();
16227
16228 SDValue PreviousInput =
16229 IsRoundOfExtLoad ? N->getOperand(Num: i-1).getOperand(i: 0) : N->getOperand(Num: i-1);
16230 LoadSDNode *LD1 = cast<LoadSDNode>(Val&: PreviousInput);
16231 LoadSDNode *LD2 = cast<LoadSDNode>(Val&: NextInput);
16232
16233 // If any inputs are fp_round(extload), they all must be.
16234 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16235 return SDValue();
16236
16237 // We only care about regular loads. The PPC-specific load intrinsics
16238 // will not lead to a merge opportunity.
16239 if (!DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: ElemSize, Dist: 1))
16240 InputsAreConsecutiveLoads = false;
16241 if (!DAG.areNonVolatileConsecutiveLoads(LD: LD1, Base: LD2, Bytes: ElemSize, Dist: 1))
16242 InputsAreReverseConsecutive = false;
16243
16244 // Exit early if the loads are neither consecutive nor reverse consecutive.
16245 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16246 return SDValue();
16247 InputLoads.push_back(Elt: LD2);
16248 }
16249
16250 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16251 "The loads cannot be both consecutive and reverse consecutive.");
16252
16253 SDValue WideLoad;
16254 SDValue ReturnSDVal;
16255 if (InputsAreConsecutiveLoads) {
16256 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16257 WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: 0), dl, Chain: FirstLoad->getChain(),
16258 Ptr: FirstLoad->getBasePtr(), PtrInfo: FirstLoad->getPointerInfo(),
16259 Alignment: FirstLoad->getAlign());
16260 ReturnSDVal = WideLoad;
16261 } else if (InputsAreReverseConsecutive) {
16262 LoadSDNode *LastLoad = InputLoads.back();
16263 assert(LastLoad && "Input needs to be a LoadSDNode.");
16264 WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: 0), dl, Chain: LastLoad->getChain(),
16265 Ptr: LastLoad->getBasePtr(), PtrInfo: LastLoad->getPointerInfo(),
16266 Alignment: LastLoad->getAlign());
16267 SmallVector<int, 16> Ops;
16268 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16269 Ops.push_back(Elt: i);
16270
16271 ReturnSDVal = DAG.getVectorShuffle(VT: N->getValueType(ResNo: 0), dl, N1: WideLoad,
16272 N2: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)), Mask: Ops);
16273 } else
16274 return SDValue();
16275
16276 for (auto *LD : InputLoads)
16277 DAG.makeEquivalentMemoryOrdering(OldLoad: LD, NewMemOp: WideLoad);
16278 return ReturnSDVal;
16279}
16280
16281// This function adds the required vector_shuffle needed to get
16282// the elements of the vector extract in the correct position
16283// as specified by the CorrectElems encoding.
16284static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
16285 SDValue Input, uint64_t Elems,
16286 uint64_t CorrectElems) {
16287 SDLoc dl(N);
16288
16289 unsigned NumElems = Input.getValueType().getVectorNumElements();
16290 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16291
16292 // Knowing the element indices being extracted from the original
16293 // vector and the order in which they're being inserted, just put
16294 // them at element indices required for the instruction.
16295 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16296 if (DAG.getDataLayout().isLittleEndian())
16297 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16298 else
16299 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16300 CorrectElems = CorrectElems >> 8;
16301 Elems = Elems >> 8;
16302 }
16303
16304 SDValue Shuffle =
16305 DAG.getVectorShuffle(VT: Input.getValueType(), dl, N1: Input,
16306 N2: DAG.getUNDEF(VT: Input.getValueType()), Mask: ShuffleMask);
16307
16308 EVT VT = N->getValueType(ResNo: 0);
16309 SDValue Conv = DAG.getBitcast(VT, V: Shuffle);
16310
16311 EVT ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(),
16312 VT: Input.getValueType().getVectorElementType(),
16313 NumElements: VT.getVectorNumElements());
16314 return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT, N1: Conv,
16315 N2: DAG.getValueType(ExtVT));
16316}
16317
16318// Look for build vector patterns where input operands come from sign
16319// extended vector_extract elements of specific indices. If the correct indices
16320// aren't used, add a vector shuffle to fix up the indices and create
16321// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16322// during instruction selection.
16323static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
16324 // This array encodes the indices that the vector sign extend instructions
16325 // extract from when extending from one type to another for both BE and LE.
16326 // The right nibble of each byte corresponds to the LE incides.
16327 // and the left nibble of each byte corresponds to the BE incides.
16328 // For example: 0x3074B8FC byte->word
16329 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16330 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16331 // For example: 0x000070F8 byte->double word
16332 // For LE: the allowed indices are: 0x0,0x8
16333 // For BE: the allowed indices are: 0x7,0xF
16334 uint64_t TargetElems[] = {
16335 0x3074B8FC, // b->w
16336 0x000070F8, // b->d
16337 0x10325476, // h->w
16338 0x00003074, // h->d
16339 0x00001032, // w->d
16340 };
16341
16342 uint64_t Elems = 0;
16343 int Index;
16344 SDValue Input;
16345
16346 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16347 if (!Op)
16348 return false;
16349 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16350 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16351 return false;
16352
16353 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16354 // of the right width.
16355 SDValue Extract = Op.getOperand(i: 0);
16356 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16357 Extract = Extract.getOperand(i: 0);
16358 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16359 return false;
16360
16361 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: 1));
16362 if (!ExtOp)
16363 return false;
16364
16365 Index = ExtOp->getZExtValue();
16366 if (Input && Input != Extract.getOperand(i: 0))
16367 return false;
16368
16369 if (!Input)
16370 Input = Extract.getOperand(i: 0);
16371
16372 Elems = Elems << 8;
16373 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16374 Elems |= Index;
16375
16376 return true;
16377 };
16378
16379 // If the build vector operands aren't sign extended vector extracts,
16380 // of the same input vector, then return.
16381 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16382 if (!isSExtOfVecExtract(N->getOperand(Num: i))) {
16383 return SDValue();
16384 }
16385 }
16386
16387 // If the vector extract indices are not correct, add the appropriate
16388 // vector_shuffle.
16389 int TgtElemArrayIdx;
16390 int InputSize = Input.getValueType().getScalarSizeInBits();
16391 int OutputSize = N->getValueType(ResNo: 0).getScalarSizeInBits();
16392 if (InputSize + OutputSize == 40)
16393 TgtElemArrayIdx = 0;
16394 else if (InputSize + OutputSize == 72)
16395 TgtElemArrayIdx = 1;
16396 else if (InputSize + OutputSize == 48)
16397 TgtElemArrayIdx = 2;
16398 else if (InputSize + OutputSize == 80)
16399 TgtElemArrayIdx = 3;
16400 else if (InputSize + OutputSize == 96)
16401 TgtElemArrayIdx = 4;
16402 else
16403 return SDValue();
16404
16405 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16406 CorrectElems = DAG.getDataLayout().isLittleEndian()
16407 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16408 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16409 if (Elems != CorrectElems) {
16410 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16411 }
16412
16413 // Regular lowering will catch cases where a shuffle is not needed.
16414 return SDValue();
16415}
16416
16417// Look for the pattern of a load from a narrow width to i128, feeding
16418// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16419// (LXVRZX). This node represents a zero extending load that will be matched
16420// to the Load VSX Vector Rightmost instructions.
16421static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
16422 SDLoc DL(N);
16423
16424 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16425 if (N->getValueType(ResNo: 0) != MVT::v1i128)
16426 return SDValue();
16427
16428 SDValue Operand = N->getOperand(Num: 0);
16429 // Proceed with the transformation if the operand to the BUILD_VECTOR
16430 // is a load instruction.
16431 if (Operand.getOpcode() != ISD::LOAD)
16432 return SDValue();
16433
16434 auto *LD = cast<LoadSDNode>(Val&: Operand);
16435 EVT MemoryType = LD->getMemoryVT();
16436
16437 // This transformation is only valid if the we are loading either a byte,
16438 // halfword, word, or doubleword.
16439 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16440 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16441
16442 // Ensure that the load from the narrow width is being zero extended to i128.
16443 if (!ValidLDType ||
16444 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16445 LD->getExtensionType() != ISD::EXTLOAD))
16446 return SDValue();
16447
16448 SDValue LoadOps[] = {
16449 LD->getChain(), LD->getBasePtr(),
16450 DAG.getIntPtrConstant(Val: MemoryType.getScalarSizeInBits(), DL)};
16451
16452 return DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVRZX, dl: DL,
16453 VTList: DAG.getVTList(VT1: MVT::v1i128, VT2: MVT::Other),
16454 Ops: LoadOps, MemVT: MemoryType, MMO: LD->getMemOperand());
16455}
16456
16457SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16458 DAGCombinerInfo &DCI) const {
16459 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16460 "Should be called with a BUILD_VECTOR node");
16461
16462 SelectionDAG &DAG = DCI.DAG;
16463 SDLoc dl(N);
16464
16465 if (!Subtarget.hasVSX())
16466 return SDValue();
16467
16468 // The target independent DAG combiner will leave a build_vector of
16469 // float-to-int conversions intact. We can generate MUCH better code for
16470 // a float-to-int conversion of a vector of floats.
16471 SDValue FirstInput = N->getOperand(Num: 0);
16472 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16473 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16474 if (Reduced)
16475 return Reduced;
16476 }
16477
16478 // If we're building a vector out of consecutive loads, just load that
16479 // vector type.
16480 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16481 if (Reduced)
16482 return Reduced;
16483
16484 // If we're building a vector out of extended elements from another vector
16485 // we have P9 vector integer extend instructions. The code assumes legal
16486 // input types (i.e. it can't handle things like v4i16) so do not run before
16487 // legalization.
16488 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16489 Reduced = combineBVOfVecSExt(N, DAG);
16490 if (Reduced)
16491 return Reduced;
16492 }
16493
16494 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16495 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16496 // is a load from <valid narrow width> to i128.
16497 if (Subtarget.isISA3_1()) {
16498 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16499 if (BVOfZLoad)
16500 return BVOfZLoad;
16501 }
16502
16503 if (N->getValueType(ResNo: 0) != MVT::v2f64)
16504 return SDValue();
16505
16506 // Looking for:
16507 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16508 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16509 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16510 return SDValue();
16511 if (N->getOperand(Num: 1).getOpcode() != ISD::SINT_TO_FP &&
16512 N->getOperand(Num: 1).getOpcode() != ISD::UINT_TO_FP)
16513 return SDValue();
16514 if (FirstInput.getOpcode() != N->getOperand(Num: 1).getOpcode())
16515 return SDValue();
16516
16517 SDValue Ext1 = FirstInput.getOperand(i: 0);
16518 SDValue Ext2 = N->getOperand(Num: 1).getOperand(i: 0);
16519 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16520 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16521 return SDValue();
16522
16523 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Val: Ext1.getOperand(i: 1));
16524 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Val: Ext2.getOperand(i: 1));
16525 if (!Ext1Op || !Ext2Op)
16526 return SDValue();
16527 if (Ext1.getOperand(i: 0).getValueType() != MVT::v4i32 ||
16528 Ext1.getOperand(i: 0) != Ext2.getOperand(i: 0))
16529 return SDValue();
16530
16531 int FirstElem = Ext1Op->getZExtValue();
16532 int SecondElem = Ext2Op->getZExtValue();
16533 int SubvecIdx;
16534 if (FirstElem == 0 && SecondElem == 1)
16535 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16536 else if (FirstElem == 2 && SecondElem == 3)
16537 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16538 else
16539 return SDValue();
16540
16541 SDValue SrcVec = Ext1.getOperand(i: 0);
16542 auto NodeType = (N->getOperand(Num: 1).getOpcode() == ISD::SINT_TO_FP) ?
16543 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16544 return DAG.getNode(Opcode: NodeType, DL: dl, VT: MVT::v2f64,
16545 N1: SrcVec, N2: DAG.getIntPtrConstant(Val: SubvecIdx, DL: dl));
16546}
16547
16548SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16549 DAGCombinerInfo &DCI) const {
16550 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16551 N->getOpcode() == ISD::UINT_TO_FP) &&
16552 "Need an int -> FP conversion node here");
16553
16554 if (useSoftFloat() || !Subtarget.has64BitSupport())
16555 return SDValue();
16556
16557 SelectionDAG &DAG = DCI.DAG;
16558 SDLoc dl(N);
16559 SDValue Op(N, 0);
16560
16561 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16562 // from the hardware.
16563 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16564 return SDValue();
16565 if (!Op.getOperand(i: 0).getValueType().isSimple())
16566 return SDValue();
16567 if (Op.getOperand(i: 0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16568 Op.getOperand(i: 0).getValueType().getSimpleVT() > MVT(MVT::i64))
16569 return SDValue();
16570
16571 SDValue FirstOperand(Op.getOperand(i: 0));
16572 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16573 (FirstOperand.getValueType() == MVT::i8 ||
16574 FirstOperand.getValueType() == MVT::i16);
16575 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16576 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16577 bool DstDouble = Op.getValueType() == MVT::f64;
16578 unsigned ConvOp = Signed ?
16579 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16580 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16581 SDValue WidthConst =
16582 DAG.getIntPtrConstant(Val: FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16583 DL: dl, isTarget: false);
16584 LoadSDNode *LDN = cast<LoadSDNode>(Val: FirstOperand.getNode());
16585 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16586 SDValue Ld = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXSIZX, dl,
16587 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
16588 Ops, MemVT: MVT::i8, MMO: LDN->getMemOperand());
16589 DAG.makeEquivalentMemoryOrdering(OldLoad: LDN, NewMemOp: Ld);
16590
16591 // For signed conversion, we need to sign-extend the value in the VSR
16592 if (Signed) {
16593 SDValue ExtOps[] = { Ld, WidthConst };
16594 SDValue Ext = DAG.getNode(Opcode: PPCISD::VEXTS, DL: dl, VT: MVT::f64, Ops: ExtOps);
16595 return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ext);
16596 } else
16597 return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ld);
16598 }
16599
16600
16601 // For i32 intermediate values, unfortunately, the conversion functions
16602 // leave the upper 32 bits of the value are undefined. Within the set of
16603 // scalar instructions, we have no method for zero- or sign-extending the
16604 // value. Thus, we cannot handle i32 intermediate values here.
16605 if (Op.getOperand(i: 0).getValueType() == MVT::i32)
16606 return SDValue();
16607
16608 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16609 "UINT_TO_FP is supported only with FPCVT");
16610
16611 // If we have FCFIDS, then use it when converting to single-precision.
16612 // Otherwise, convert to double-precision and then round.
16613 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16614 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16615 : PPCISD::FCFIDS)
16616 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16617 : PPCISD::FCFID);
16618 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16619 ? MVT::f32
16620 : MVT::f64;
16621
16622 // If we're converting from a float, to an int, and back to a float again,
16623 // then we don't need the store/load pair at all.
16624 if ((Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_UINT &&
16625 Subtarget.hasFPCVT()) ||
16626 (Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_SINT)) {
16627 SDValue Src = Op.getOperand(i: 0).getOperand(i: 0);
16628 if (Src.getValueType() == MVT::f32) {
16629 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
16630 DCI.AddToWorklist(N: Src.getNode());
16631 } else if (Src.getValueType() != MVT::f64) {
16632 // Make sure that we don't pick up a ppc_fp128 source value.
16633 return SDValue();
16634 }
16635
16636 unsigned FCTOp =
16637 Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16638 PPCISD::FCTIDUZ;
16639
16640 SDValue Tmp = DAG.getNode(Opcode: FCTOp, DL: dl, VT: MVT::f64, Operand: Src);
16641 SDValue FP = DAG.getNode(Opcode: FCFOp, DL: dl, VT: FCFTy, Operand: Tmp);
16642
16643 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16644 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
16645 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
16646 DCI.AddToWorklist(N: FP.getNode());
16647 }
16648
16649 return FP;
16650 }
16651
16652 return SDValue();
16653}
16654
16655// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16656// builtins) into loads with swaps.
16657SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
16658 DAGCombinerInfo &DCI) const {
16659 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16660 // load combines.
16661 if (DCI.isBeforeLegalizeOps())
16662 return SDValue();
16663
16664 SelectionDAG &DAG = DCI.DAG;
16665 SDLoc dl(N);
16666 SDValue Chain;
16667 SDValue Base;
16668 MachineMemOperand *MMO;
16669
16670 switch (N->getOpcode()) {
16671 default:
16672 llvm_unreachable("Unexpected opcode for little endian VSX load");
16673 case ISD::LOAD: {
16674 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16675 Chain = LD->getChain();
16676 Base = LD->getBasePtr();
16677 MMO = LD->getMemOperand();
16678 // If the MMO suggests this isn't a load of a full vector, leave
16679 // things alone. For a built-in, we have to make the change for
16680 // correctness, so if there is a size problem that will be a bug.
16681 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16682 return SDValue();
16683 break;
16684 }
16685 case ISD::INTRINSIC_W_CHAIN: {
16686 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16687 Chain = Intrin->getChain();
16688 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16689 // us what we want. Get operand 2 instead.
16690 Base = Intrin->getOperand(Num: 2);
16691 MMO = Intrin->getMemOperand();
16692 break;
16693 }
16694 }
16695
16696 MVT VecTy = N->getValueType(ResNo: 0).getSimpleVT();
16697
16698 SDValue LoadOps[] = { Chain, Base };
16699 SDValue Load = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVD2X, dl,
16700 VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other),
16701 Ops: LoadOps, MemVT: MVT::v2f64, MMO);
16702
16703 DCI.AddToWorklist(N: Load.getNode());
16704 Chain = Load.getValue(R: 1);
16705 SDValue Swap = DAG.getNode(
16706 Opcode: PPCISD::XXSWAPD, DL: dl, VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Load);
16707 DCI.AddToWorklist(N: Swap.getNode());
16708
16709 // Add a bitcast if the resulting load type doesn't match v2f64.
16710 if (VecTy != MVT::v2f64) {
16711 SDValue N = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: Swap);
16712 DCI.AddToWorklist(N: N.getNode());
16713 // Package {bitcast value, swap's chain} to match Load's shape.
16714 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VT1: VecTy, VT2: MVT::Other),
16715 N1: N, N2: Swap.getValue(R: 1));
16716 }
16717
16718 return Swap;
16719}
16720
16721// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16722// builtins) into stores with swaps.
16723SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
16724 DAGCombinerInfo &DCI) const {
16725 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16726 // store combines.
16727 if (DCI.isBeforeLegalizeOps())
16728 return SDValue();
16729
16730 SelectionDAG &DAG = DCI.DAG;
16731 SDLoc dl(N);
16732 SDValue Chain;
16733 SDValue Base;
16734 unsigned SrcOpnd;
16735 MachineMemOperand *MMO;
16736
16737 switch (N->getOpcode()) {
16738 default:
16739 llvm_unreachable("Unexpected opcode for little endian VSX store");
16740 case ISD::STORE: {
16741 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
16742 Chain = ST->getChain();
16743 Base = ST->getBasePtr();
16744 MMO = ST->getMemOperand();
16745 SrcOpnd = 1;
16746 // If the MMO suggests this isn't a store of a full vector, leave
16747 // things alone. For a built-in, we have to make the change for
16748 // correctness, so if there is a size problem that will be a bug.
16749 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16750 return SDValue();
16751 break;
16752 }
16753 case ISD::INTRINSIC_VOID: {
16754 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16755 Chain = Intrin->getChain();
16756 // Intrin->getBasePtr() oddly does not get what we want.
16757 Base = Intrin->getOperand(Num: 3);
16758 MMO = Intrin->getMemOperand();
16759 SrcOpnd = 2;
16760 break;
16761 }
16762 }
16763
16764 SDValue Src = N->getOperand(Num: SrcOpnd);
16765 MVT VecTy = Src.getValueType().getSimpleVT();
16766
16767 // All stores are done as v2f64 and possible bit cast.
16768 if (VecTy != MVT::v2f64) {
16769 Src = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: Src);
16770 DCI.AddToWorklist(N: Src.getNode());
16771 }
16772
16773 SDValue Swap = DAG.getNode(Opcode: PPCISD::XXSWAPD, DL: dl,
16774 VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Src);
16775 DCI.AddToWorklist(N: Swap.getNode());
16776 Chain = Swap.getValue(R: 1);
16777 SDValue StoreOps[] = { Chain, Swap, Base };
16778 SDValue Store = DAG.getMemIntrinsicNode(Opcode: PPCISD::STXVD2X, dl,
16779 VTList: DAG.getVTList(VT: MVT::Other),
16780 Ops: StoreOps, MemVT: VecTy, MMO);
16781 DCI.AddToWorklist(N: Store.getNode());
16782 return Store;
16783}
16784
16785// Handle DAG combine for STORE (FP_TO_INT F).
16786SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16787 DAGCombinerInfo &DCI) const {
16788 SelectionDAG &DAG = DCI.DAG;
16789 SDLoc dl(N);
16790 unsigned Opcode = N->getOperand(Num: 1).getOpcode();
16791 (void)Opcode;
16792 bool Strict = N->getOperand(Num: 1)->isStrictFPOpcode();
16793
16794 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16795 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16796 && "Not a FP_TO_INT Instruction!");
16797
16798 SDValue Val = N->getOperand(Num: 1).getOperand(i: Strict ? 1 : 0);
16799 EVT Op1VT = N->getOperand(Num: 1).getValueType();
16800 EVT ResVT = Val.getValueType();
16801
16802 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(VT: ResVT))
16803 return SDValue();
16804
16805 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16806 bool ValidTypeForStoreFltAsInt =
16807 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16808 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16809
16810 // TODO: Lower conversion from f128 on all VSX targets
16811 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16812 return SDValue();
16813
16814 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16815 cast<StoreSDNode>(Val: N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16816 return SDValue();
16817
16818 Val = convertFPToInt(Op: N->getOperand(Num: 1), DAG, Subtarget);
16819
16820 // Set number of bytes being converted.
16821 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16822 SDValue Ops[] = {N->getOperand(Num: 0), Val, N->getOperand(Num: 2),
16823 DAG.getIntPtrConstant(Val: ByteSize, DL: dl, isTarget: false),
16824 DAG.getValueType(Op1VT)};
16825
16826 Val = DAG.getMemIntrinsicNode(Opcode: PPCISD::ST_VSR_SCAL_INT, dl,
16827 VTList: DAG.getVTList(VT: MVT::Other), Ops,
16828 MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16829 MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16830
16831 return Val;
16832}
16833
16834static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16835 // Check that the source of the element keeps flipping
16836 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16837 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16838 for (int i = 1, e = Mask.size(); i < e; i++) {
16839 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16840 return false;
16841 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16842 return false;
16843 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16844 }
16845 return true;
16846}
16847
16848static bool isSplatBV(SDValue Op) {
16849 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16850 return false;
16851 SDValue FirstOp;
16852
16853 // Find first non-undef input.
16854 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16855 FirstOp = Op.getOperand(i);
16856 if (!FirstOp.isUndef())
16857 break;
16858 }
16859
16860 // All inputs are undef or the same as the first non-undef input.
16861 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16862 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16863 return false;
16864 return true;
16865}
16866
16867static SDValue isScalarToVec(SDValue Op) {
16868 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16869 return Op;
16870 if (Op.getOpcode() != ISD::BITCAST)
16871 return SDValue();
16872 Op = Op.getOperand(i: 0);
16873 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16874 return Op;
16875 return SDValue();
16876}
16877
16878// Fix up the shuffle mask to account for the fact that the result of
16879// scalar_to_vector is not in lane zero. This just takes all values in
16880// the ranges specified by the min/max indices and adds the number of
16881// elements required to ensure each element comes from the respective
16882// position in the valid lane.
16883// On little endian, that's just the corresponding element in the other
16884// half of the vector. On big endian, it is in the same half but right
16885// justified rather than left justified in that half.
16886static void fixupShuffleMaskForPermutedSToV(
16887 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16888 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16889 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16890 int LHSEltFixup =
16891 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16892 int RHSEltFixup =
16893 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16894 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16895 int Idx = ShuffV[I];
16896 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16897 ShuffV[I] += LHSEltFixup;
16898 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16899 ShuffV[I] += RHSEltFixup;
16900 }
16901}
16902
16903// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16904// the original is:
16905// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16906// In such a case, just change the shuffle mask to extract the element
16907// from the permuted index.
16908static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
16909 const PPCSubtarget &Subtarget) {
16910 SDLoc dl(OrigSToV);
16911 EVT VT = OrigSToV.getValueType();
16912 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16913 "Expecting a SCALAR_TO_VECTOR here");
16914 SDValue Input = OrigSToV.getOperand(i: 0);
16915
16916 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16917 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Val: Input.getOperand(i: 1));
16918 SDValue OrigVector = Input.getOperand(i: 0);
16919
16920 // Can't handle non-const element indices or different vector types
16921 // for the input to the extract and the output of the scalar_to_vector.
16922 if (Idx && VT == OrigVector.getValueType()) {
16923 unsigned NumElts = VT.getVectorNumElements();
16924 assert(
16925 NumElts > 1 &&
16926 "Cannot produce a permuted scalar_to_vector for one element vector");
16927 SmallVector<int, 16> NewMask(NumElts, -1);
16928 unsigned ResultInElt = NumElts / 2;
16929 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16930 NewMask[ResultInElt] = Idx->getZExtValue();
16931 return DAG.getVectorShuffle(VT, dl, N1: OrigVector, N2: OrigVector, Mask: NewMask);
16932 }
16933 }
16934 return DAG.getNode(Opcode: PPCISD::SCALAR_TO_VECTOR_PERMUTED, DL: dl, VT,
16935 Operand: OrigSToV.getOperand(i: 0));
16936}
16937
16938static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
16939 int HalfVec, int LHSLastElementDefined,
16940 int RHSLastElementDefined) {
16941 for (int Index : ShuffV) {
16942 if (Index < 0) // Skip explicitly undefined mask indices.
16943 continue;
16944 // Handle first input vector of the vector_shuffle.
16945 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16946 (Index > LHSLastElementDefined))
16947 return false;
16948 // Handle second input vector of the vector_shuffle.
16949 if ((RHSLastElementDefined >= 0) &&
16950 (Index > HalfVec + RHSLastElementDefined))
16951 return false;
16952 }
16953 return true;
16954}
16955
16956static SDValue generateSToVPermutedForVecShuffle(
16957 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16958 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16959 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16960 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16961 // Set up the values for the shuffle vector fixup.
16962 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16963 // The last element depends on if the input comes from the LHS or RHS.
16964 //
16965 // For example:
16966 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16967 //
16968 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16969 // because elements 1 and higher of a scalar_to_vector are undefined.
16970 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16971 // because elements 1 and higher of a scalar_to_vector are undefined.
16972 // It is also not 4 because the original scalar_to_vector is wider and
16973 // actually contains two i32 elements.
16974 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16975 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16976 : FirstElt;
16977 SDValue SToVPermuted = getSToVPermuted(OrigSToV: SToVNode, DAG, Subtarget);
16978 if (SToVPermuted.getValueType() != VecShuffOperandType)
16979 SToVPermuted = DAG.getBitcast(VT: VecShuffOperandType, V: SToVPermuted);
16980 return SToVPermuted;
16981}
16982
16983// On little endian subtargets, combine shuffles such as:
16984// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16985// into:
16986// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16987// because the latter can be matched to a single instruction merge.
16988// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16989// to put the value into element zero. Adjust the shuffle mask so that the
16990// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16991// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16992// nodes with elements smaller than doubleword because all the ways
16993// of getting scalar data into a vector register put the value in the
16994// rightmost element of the left half of the vector.
16995SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16996 SelectionDAG &DAG) const {
16997 SDValue LHS = SVN->getOperand(Num: 0);
16998 SDValue RHS = SVN->getOperand(Num: 1);
16999 auto Mask = SVN->getMask();
17000 int NumElts = LHS.getValueType().getVectorNumElements();
17001 SDValue Res(SVN, 0);
17002 SDLoc dl(SVN);
17003 bool IsLittleEndian = Subtarget.isLittleEndian();
17004
17005 // On big endian targets this is only useful for subtargets with direct moves.
17006 // On little endian targets it would be useful for all subtargets with VSX.
17007 // However adding special handling for LE subtargets without direct moves
17008 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17009 // which includes direct moves.
17010 if (!Subtarget.hasDirectMove())
17011 return Res;
17012
17013 // If this is not a shuffle of a shuffle and the first element comes from
17014 // the second vector, canonicalize to the commuted form. This will make it
17015 // more likely to match one of the single instruction patterns.
17016 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17017 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17018 std::swap(a&: LHS, b&: RHS);
17019 Res = DAG.getCommutedVectorShuffle(SV: *SVN);
17020
17021 if (!isa<ShuffleVectorSDNode>(Val: Res))
17022 return Res;
17023
17024 Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
17025 }
17026
17027 // Adjust the shuffle mask if either input vector comes from a
17028 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17029 // form (to prevent the need for a swap).
17030 SmallVector<int, 16> ShuffV(Mask);
17031 SDValue SToVLHS = isScalarToVec(Op: LHS);
17032 SDValue SToVRHS = isScalarToVec(Op: RHS);
17033 if (SToVLHS || SToVRHS) {
17034 EVT VT = SVN->getValueType(ResNo: 0);
17035 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17036 int ShuffleNumElts = ShuffV.size();
17037 int HalfVec = ShuffleNumElts / 2;
17038 // The width of the "valid lane" (i.e. the lane that contains the value that
17039 // is vectorized) needs to be expressed in terms of the number of elements
17040 // of the shuffle. It is thereby the ratio of the values before and after
17041 // any bitcast, which will be set later on if the LHS or RHS are
17042 // SCALAR_TO_VECTOR nodes.
17043 unsigned LHSNumValidElts = HalfVec;
17044 unsigned RHSNumValidElts = HalfVec;
17045
17046 // Initially assume that neither input is permuted. These will be adjusted
17047 // accordingly if either input is. Note, that -1 means that all elements
17048 // are undefined.
17049 int LHSFirstElt = 0;
17050 int RHSFirstElt = ShuffleNumElts;
17051 int LHSLastElt = -1;
17052 int RHSLastElt = -1;
17053
17054 // Get the permuted scalar to vector nodes for the source(s) that come from
17055 // ISD::SCALAR_TO_VECTOR.
17056 // On big endian systems, this only makes sense for element sizes smaller
17057 // than 64 bits since for 64-bit elements, all instructions already put
17058 // the value into element zero. Since scalar size of LHS and RHS may differ
17059 // after isScalarToVec, this should be checked using their own sizes.
17060 int LHSScalarSize = 0;
17061 int RHSScalarSize = 0;
17062 if (SToVLHS) {
17063 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17064 if (!IsLittleEndian && LHSScalarSize >= 64)
17065 return Res;
17066 }
17067 if (SToVRHS) {
17068 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17069 if (!IsLittleEndian && RHSScalarSize >= 64)
17070 return Res;
17071 }
17072 if (LHSScalarSize != 0)
17073 LHS = generateSToVPermutedForVecShuffle(
17074 ScalarSize: LHSScalarSize, ShuffleEltWidth, NumValidElts&: LHSNumValidElts, FirstElt: LHSFirstElt,
17075 LastElt&: LHSLastElt, VecShuffOperand: LHS, SToVNode: SToVLHS, DAG, Subtarget);
17076 if (RHSScalarSize != 0)
17077 RHS = generateSToVPermutedForVecShuffle(
17078 ScalarSize: RHSScalarSize, ShuffleEltWidth, NumValidElts&: RHSNumValidElts, FirstElt: RHSFirstElt,
17079 LastElt&: RHSLastElt, VecShuffOperand: RHS, SToVNode: SToVRHS, DAG, Subtarget);
17080
17081 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElementDefined: LHSLastElt, RHSLastElementDefined: RHSLastElt))
17082 return Res;
17083
17084 // Fix up the shuffle mask to reflect where the desired element actually is.
17085 // The minimum and maximum indices that correspond to element zero for both
17086 // the LHS and RHS are computed and will control which shuffle mask entries
17087 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17088 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17089 fixupShuffleMaskForPermutedSToV(
17090 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17091 LHSNumValidElts, RHSNumValidElts, Subtarget);
17092 Res = DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: 0), dl, N1: LHS, N2: RHS, Mask: ShuffV);
17093
17094 // We may have simplified away the shuffle. We won't be able to do anything
17095 // further with it here.
17096 if (!isa<ShuffleVectorSDNode>(Val: Res))
17097 return Res;
17098 Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
17099 }
17100
17101 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17102 // The common case after we commuted the shuffle is that the RHS is a splat
17103 // and we have elements coming in from the splat at indices that are not
17104 // conducive to using a merge.
17105 // Example:
17106 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17107 if (!isSplatBV(Op: TheSplat))
17108 return Res;
17109
17110 // We are looking for a mask such that all even elements are from
17111 // one vector and all odd elements from the other.
17112 if (!isAlternatingShuffMask(Mask, NumElts))
17113 return Res;
17114
17115 // Adjust the mask so we are pulling in the same index from the splat
17116 // as the index from the interesting vector in consecutive elements.
17117 if (IsLittleEndian) {
17118 // Example (even elements from first vector):
17119 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17120 if (Mask[0] < NumElts)
17121 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17122 if (ShuffV[i] < 0)
17123 continue;
17124 // If element from non-splat is undef, pick first element from splat.
17125 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17126 }
17127 // Example (odd elements from first vector):
17128 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17129 else
17130 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17131 if (ShuffV[i] < 0)
17132 continue;
17133 // If element from non-splat is undef, pick first element from splat.
17134 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17135 }
17136 } else {
17137 // Example (even elements from first vector):
17138 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17139 if (Mask[0] < NumElts)
17140 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17141 if (ShuffV[i] < 0)
17142 continue;
17143 // If element from non-splat is undef, pick first element from splat.
17144 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17145 }
17146 // Example (odd elements from first vector):
17147 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17148 else
17149 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17150 if (ShuffV[i] < 0)
17151 continue;
17152 // If element from non-splat is undef, pick first element from splat.
17153 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17154 }
17155 }
17156
17157 // If the RHS has undefs, we need to remove them since we may have created
17158 // a shuffle that adds those instead of the splat value.
17159 SDValue SplatVal =
17160 cast<BuildVectorSDNode>(Val: TheSplat.getNode())->getSplatValue();
17161 TheSplat = DAG.getSplatBuildVector(VT: TheSplat.getValueType(), DL: dl, Op: SplatVal);
17162
17163 if (IsLittleEndian)
17164 RHS = TheSplat;
17165 else
17166 LHS = TheSplat;
17167 return DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: 0), dl, N1: LHS, N2: RHS, Mask: ShuffV);
17168}
17169
17170SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17171 LSBaseSDNode *LSBase,
17172 DAGCombinerInfo &DCI) const {
17173 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17174 "Not a reverse memop pattern!");
17175
17176 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17177 auto Mask = SVN->getMask();
17178 int i = 0;
17179 auto I = Mask.rbegin();
17180 auto E = Mask.rend();
17181
17182 for (; I != E; ++I) {
17183 if (*I != i)
17184 return false;
17185 i++;
17186 }
17187 return true;
17188 };
17189
17190 SelectionDAG &DAG = DCI.DAG;
17191 EVT VT = SVN->getValueType(ResNo: 0);
17192
17193 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17194 return SDValue();
17195
17196 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17197 // See comment in PPCVSXSwapRemoval.cpp.
17198 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17199 if (!Subtarget.hasP9Vector())
17200 return SDValue();
17201
17202 if(!IsElementReverse(SVN))
17203 return SDValue();
17204
17205 if (LSBase->getOpcode() == ISD::LOAD) {
17206 // If the load return value 0 has more than one user except the
17207 // shufflevector instruction, it is not profitable to replace the
17208 // shufflevector with a reverse load.
17209 for (SDUse &Use : LSBase->uses())
17210 if (Use.getResNo() == 0 &&
17211 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17212 return SDValue();
17213
17214 SDLoc dl(LSBase);
17215 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17216 return DAG.getMemIntrinsicNode(
17217 Opcode: PPCISD::LOAD_VEC_BE, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops: LoadOps,
17218 MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
17219 }
17220
17221 if (LSBase->getOpcode() == ISD::STORE) {
17222 // If there are other uses of the shuffle, the swap cannot be avoided.
17223 // Forcing the use of an X-Form (since swapped stores only have
17224 // X-Forms) without removing the swap is unprofitable.
17225 if (!SVN->hasOneUse())
17226 return SDValue();
17227
17228 SDLoc dl(LSBase);
17229 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(Num: 0),
17230 LSBase->getBasePtr()};
17231 return DAG.getMemIntrinsicNode(
17232 Opcode: PPCISD::STORE_VEC_BE, dl, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps,
17233 MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
17234 }
17235
17236 llvm_unreachable("Expected a load or store node here");
17237}
17238
17239static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17240 unsigned IntrinsicID = Intrin.getConstantOperandVal(i: 1);
17241 if (IntrinsicID == Intrinsic::ppc_stdcx)
17242 StoreWidth = 8;
17243 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17244 StoreWidth = 4;
17245 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17246 StoreWidth = 2;
17247 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17248 StoreWidth = 1;
17249 else
17250 return false;
17251 return true;
17252}
17253
17254static SDValue DAGCombineAddc(SDNode *N,
17255 llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
17256 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(Value: 1)) {
17257 // (ADDC (ADDE 0, 0, C), -1) -> C
17258 SDValue LHS = N->getOperand(Num: 0);
17259 SDValue RHS = N->getOperand(Num: 1);
17260 if (LHS->getOpcode() == PPCISD::ADDE &&
17261 isNullConstant(V: LHS->getOperand(Num: 0)) &&
17262 isNullConstant(V: LHS->getOperand(Num: 1)) && isAllOnesConstant(V: RHS)) {
17263 return DCI.CombineTo(N, Res0: SDValue(N, 0), Res1: LHS->getOperand(Num: 2));
17264 }
17265 }
17266 return SDValue();
17267}
17268
17269// Optimize zero-extension of setcc when the compared value is known to be 0
17270// or 1.
17271//
17272// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17273// -> zext(xor(Value, 1)) for seteq
17274// -> zext(Value) for setne
17275//
17276// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17277// by keeping the value in its original i32 type throughout.
17278//
17279// Example:
17280// Before: zext(setcc(test_data_class(...), 0, seteq))
17281// // test_data_class returns 0 or 1 in i32
17282// // setcc converts i32 -> i1
17283// // zext converts i1 -> i64
17284// After: zext(xor(test_data_class(...), 1))
17285// // Stays in i32, then extends to i64
17286//
17287// This is beneficial because:
17288// 1. Eliminates the setcc instruction
17289// 2. Avoids i32 -> i1 truncation
17290// 3. Keeps computation in native integer width
17291
17292static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
17293 // Check if this is a zero_extend
17294 if (N->getOpcode() != ISD::ZERO_EXTEND)
17295 return SDValue();
17296
17297 SDValue Src = N->getOperand(Num: 0);
17298
17299 // Check if the source is a setcc
17300 if (Src.getOpcode() != ISD::SETCC)
17301 return SDValue();
17302
17303 SDValue LHS = Src.getOperand(i: 0);
17304 SDValue RHS = Src.getOperand(i: 1);
17305 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get();
17306
17307 if (!isNullConstant(V: RHS) && !isNullConstant(V: LHS))
17308 return SDValue();
17309
17310 SDValue NonNullConstant = isNullConstant(V: RHS) ? LHS : RHS;
17311
17312 auto isZeroOrOne = [=](SDValue &V) {
17313 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17314 V.getConstantOperandVal(i: 0) == Intrinsic::ppc_test_data_class)
17315 return true;
17316 return false;
17317 };
17318
17319 if (!isZeroOrOne(NonNullConstant))
17320 return SDValue();
17321
17322 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17323 // zext(setcc (Value), 0, setne))
17324 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17325 // Replace with: zext(xor(Value, 1)) for seteq
17326 // or: zext(Value) for setne
17327 // This keeps the value in i32 instead of converting to i1
17328 SDLoc DL(N);
17329 EVT VType = N->getValueType(ResNo: 0);
17330 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(Op: NonNullConstant, DL, VT: VType);
17331
17332 if (CC == ISD::SETNE)
17333 return NewNonNullConstant;
17334
17335 SDValue One = DAG.getConstant(Val: 1, DL, VT: VType);
17336 return DAG.getNode(Opcode: ISD::XOR, DL, VT: VType, N1: NewNonNullConstant, N2: One);
17337 }
17338
17339 return SDValue();
17340}
17341
17342// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17343// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17344// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17345// 1, cc))
17346// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17347// 0, 1, cc))
17348// 4. etc
17349static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
17350 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17351
17352 EVT XorVT = N->getValueType(ResNo: 0);
17353 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17354 return SDValue();
17355
17356 SDValue LHS = N->getOperand(Num: 0);
17357 SDValue RHS = N->getOperand(Num: 1);
17358
17359 // Check for XOR with constant 1
17360 ConstantSDNode *XorConst = dyn_cast<ConstantSDNode>(Val&: RHS);
17361 if (!XorConst || !XorConst->isOne()) {
17362 XorConst = dyn_cast<ConstantSDNode>(Val&: LHS);
17363 if (!XorConst || !XorConst->isOne())
17364 return SDValue();
17365 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17366 std::swap(a&: LHS, b&: RHS);
17367 }
17368
17369 // Check if LHS has only one use
17370 if (!LHS.hasOneUse())
17371 return SDValue();
17372
17373 // Handle extensions: ZEXT, ANYEXT
17374 SDValue SelectNode = LHS;
17375
17376 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17377 LHS.getOpcode() == ISD::ANY_EXTEND) {
17378 SelectNode = LHS.getOperand(i: 0);
17379
17380 // Check if the extension input has only one use
17381 if (!SelectNode.hasOneUse())
17382 return SDValue();
17383 }
17384
17385 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17386 if (!SelectNode.isMachineOpcode())
17387 return SDValue();
17388
17389 unsigned MachineOpc = SelectNode.getMachineOpcode();
17390
17391 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17392 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17393 return SDValue();
17394
17395 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17396 if (SelectNode.getNumOperands() != 4)
17397 return SDValue();
17398
17399 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(Val: SelectNode.getOperand(i: 1));
17400 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(Val: SelectNode.getOperand(i: 2));
17401
17402 if (!ConstOp1 || !ConstOp2)
17403 return SDValue();
17404
17405 // Only optimize if operands are {0, 1} or {1, 0}
17406 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17407 (ConstOp1->isZero() && ConstOp2->isOne())))
17408 return SDValue();
17409
17410 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17411 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17412 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17413 // create SELECT_CC(cond, 1, 0, pred).
17414 SDLoc DL(N);
17415 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17416
17417 bool ConstOp1IsOne = ConstOp1->isOne();
17418 return SDValue(
17419 DAG.getMachineNode(Opcode: MachineOpc, dl: DL, VT: XorVT,
17420 Ops: {SelectNode.getOperand(i: 0),
17421 DAG.getConstant(Val: ConstOp1IsOne ? 0 : 1, DL, VT: XorVT),
17422 DAG.getConstant(Val: ConstOp1IsOne ? 1 : 0, DL, VT: XorVT),
17423 SelectNode.getOperand(i: 3)}),
17424 0);
17425}
17426
17427SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
17428 DAGCombinerInfo &DCI) const {
17429 SelectionDAG &DAG = DCI.DAG;
17430 SDLoc dl(N);
17431 switch (N->getOpcode()) {
17432 default: break;
17433 case ISD::ADD:
17434 return combineADD(N, DCI);
17435 case ISD::AND: {
17436 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17437 // original input as that will prevent us from selecting optimal rotates.
17438 // This only matters if the input to the extend is i32 widened to i64.
17439 SDValue Op1 = N->getOperand(Num: 0);
17440 SDValue Op2 = N->getOperand(Num: 1);
17441 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17442 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17443 !isa<ConstantSDNode>(Val: Op2) || N->getValueType(ResNo: 0) != MVT::i64 ||
17444 Op1.getOperand(i: 0).getValueType() != MVT::i32)
17445 break;
17446 SDValue NarrowOp = Op1.getOperand(i: 0);
17447 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17448 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17449 break;
17450
17451 uint64_t Imm = Op2->getAsZExtVal();
17452 // Make sure that the constant is narrow enough to fit in the narrow type.
17453 if (!isUInt<32>(x: Imm))
17454 break;
17455 SDValue ConstOp = DAG.getConstant(Val: Imm, DL: dl, VT: MVT::i32);
17456 SDValue NarrowAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: NarrowOp, N2: ConstOp);
17457 return DAG.getZExtOrTrunc(Op: NarrowAnd, DL: dl, VT: N->getValueType(ResNo: 0));
17458 }
17459 case ISD::XOR: {
17460 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17461 if (SDValue V = combineXorSelectCC(N, DAG))
17462 return V;
17463 break;
17464 }
17465 case ISD::SHL:
17466 return combineSHL(N, DCI);
17467 case ISD::SRA:
17468 return combineSRA(N, DCI);
17469 case ISD::SRL:
17470 return combineSRL(N, DCI);
17471 case ISD::MUL:
17472 return combineMUL(N, DCI);
17473 case ISD::FMA:
17474 case PPCISD::FNMSUB:
17475 return combineFMALike(N, DCI);
17476 case PPCISD::SHL:
17477 if (isNullConstant(V: N->getOperand(Num: 0))) // 0 << V -> 0.
17478 return N->getOperand(Num: 0);
17479 break;
17480 case PPCISD::SRL:
17481 if (isNullConstant(V: N->getOperand(Num: 0))) // 0 >>u V -> 0.
17482 return N->getOperand(Num: 0);
17483 break;
17484 case PPCISD::SRA:
17485 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0))) {
17486 if (C->isZero() || // 0 >>s V -> 0.
17487 C->isAllOnes()) // -1 >>s V -> -1.
17488 return N->getOperand(Num: 0);
17489 }
17490 break;
17491 case ISD::ZERO_EXTEND:
17492 if (SDValue RetV = combineZextSetccWithZero(N, DAG&: DCI.DAG))
17493 return RetV;
17494 [[fallthrough]];
17495 case ISD::SIGN_EXTEND:
17496 case ISD::ANY_EXTEND:
17497 return DAGCombineExtBoolTrunc(N, DCI);
17498 case ISD::TRUNCATE:
17499 return combineTRUNCATE(N, DCI);
17500 case ISD::SETCC:
17501 if (SDValue CSCC = combineSetCC(N, DCI))
17502 return CSCC;
17503 [[fallthrough]];
17504 case ISD::SELECT_CC:
17505 return DAGCombineTruncBoolExt(N, DCI);
17506 case ISD::SINT_TO_FP:
17507 case ISD::UINT_TO_FP:
17508 return combineFPToIntToFP(N, DCI);
17509 case ISD::VECTOR_SHUFFLE:
17510 if (ISD::isNormalLoad(N: N->getOperand(Num: 0).getNode())) {
17511 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(Val: N->getOperand(Num: 0));
17512 return combineVReverseMemOP(SVN: cast<ShuffleVectorSDNode>(Val: N), LSBase, DCI);
17513 }
17514 return combineVectorShuffle(SVN: cast<ShuffleVectorSDNode>(Val: N), DAG&: DCI.DAG);
17515 case ISD::STORE: {
17516
17517 EVT Op1VT = N->getOperand(Num: 1).getValueType();
17518 unsigned Opcode = N->getOperand(Num: 1).getOpcode();
17519
17520 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17521 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17522 SDValue Val = combineStoreFPToInt(N, DCI);
17523 if (Val)
17524 return Val;
17525 }
17526
17527 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17528 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: 1));
17529 SDValue Val= combineVReverseMemOP(SVN, LSBase: cast<LSBaseSDNode>(Val: N), DCI);
17530 if (Val)
17531 return Val;
17532 }
17533
17534 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17535 if (cast<StoreSDNode>(Val: N)->isUnindexed() && Opcode == ISD::BSWAP &&
17536 N->getOperand(Num: 1).getNode()->hasOneUse() &&
17537 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17538 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17539
17540 // STBRX can only handle simple types and it makes no sense to store less
17541 // two bytes in byte-reversed order.
17542 EVT mVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
17543 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17544 break;
17545
17546 SDValue BSwapOp = N->getOperand(Num: 1).getOperand(i: 0);
17547 // Do an any-extend to 32-bits if this is a half-word input.
17548 if (BSwapOp.getValueType() == MVT::i16)
17549 BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: BSwapOp);
17550
17551 // If the type of BSWAP operand is wider than stored memory width
17552 // it need to be shifted to the right side before STBRX.
17553 if (Op1VT.bitsGT(VT: mVT)) {
17554 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17555 BSwapOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: Op1VT, N1: BSwapOp,
17556 N2: DAG.getConstant(Val: Shift, DL: dl, VT: MVT::i32));
17557 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17558 if (Op1VT == MVT::i64)
17559 BSwapOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: BSwapOp);
17560 }
17561
17562 SDValue Ops[] = {
17563 N->getOperand(Num: 0), BSwapOp, N->getOperand(Num: 2), DAG.getValueType(mVT)
17564 };
17565 return
17566 DAG.getMemIntrinsicNode(Opcode: PPCISD::STBRX, dl, VTList: DAG.getVTList(VT: MVT::Other),
17567 Ops, MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
17568 MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
17569 }
17570
17571 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17572 // So it can increase the chance of CSE constant construction.
17573 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17574 isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) && Op1VT == MVT::i32) {
17575 // Need to sign-extended to 64-bits to handle negative values.
17576 EVT MemVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
17577 uint64_t Val64 = SignExtend64(X: N->getConstantOperandVal(Num: 1),
17578 B: MemVT.getSizeInBits());
17579 SDValue Const64 = DAG.getConstant(Val: Val64, DL: dl, VT: MVT::i64);
17580
17581 auto *ST = cast<StoreSDNode>(Val: N);
17582 SDValue NewST = DAG.getStore(Chain: ST->getChain(), dl, Val: Const64,
17583 Ptr: ST->getBasePtr(), Offset: ST->getOffset(), SVT: MemVT,
17584 MMO: ST->getMemOperand(), AM: ST->getAddressingMode(),
17585 /*IsTruncating=*/true);
17586 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17587 // new store which will change the constant by removing non-demanded bits.
17588 return ST->isUnindexed()
17589 ? DCI.CombineTo(N, Res: NewST, /*AddTo=*/false)
17590 : DCI.CombineTo(N, Res0: NewST, Res1: NewST.getValue(R: 1), /*AddTo=*/false);
17591 }
17592
17593 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17594 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17595 if (Op1VT.isSimple()) {
17596 MVT StoreVT = Op1VT.getSimpleVT();
17597 if (Subtarget.needsSwapsForVSXMemOps() &&
17598 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17599 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17600 return expandVSXStoreForLE(N, DCI);
17601 }
17602 break;
17603 }
17604 case ISD::LOAD: {
17605 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
17606 EVT VT = LD->getValueType(ResNo: 0);
17607
17608 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17609 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17610 if (VT.isSimple()) {
17611 MVT LoadVT = VT.getSimpleVT();
17612 if (Subtarget.needsSwapsForVSXMemOps() &&
17613 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17614 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17615 return expandVSXLoadForLE(N, DCI);
17616 }
17617
17618 // We sometimes end up with a 64-bit integer load, from which we extract
17619 // two single-precision floating-point numbers. This happens with
17620 // std::complex<float>, and other similar structures, because of the way we
17621 // canonicalize structure copies. However, if we lack direct moves,
17622 // then the final bitcasts from the extracted integer values to the
17623 // floating-point numbers turn into store/load pairs. Even with direct moves,
17624 // just loading the two floating-point numbers is likely better.
17625 auto ReplaceTwoFloatLoad = [&]() {
17626 if (VT != MVT::i64)
17627 return false;
17628
17629 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17630 LD->isVolatile())
17631 return false;
17632
17633 // We're looking for a sequence like this:
17634 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17635 // t16: i64 = srl t13, Constant:i32<32>
17636 // t17: i32 = truncate t16
17637 // t18: f32 = bitcast t17
17638 // t19: i32 = truncate t13
17639 // t20: f32 = bitcast t19
17640
17641 if (!LD->hasNUsesOfValue(NUses: 2, Value: 0))
17642 return false;
17643
17644 auto UI = LD->user_begin();
17645 while (UI.getUse().getResNo() != 0) ++UI;
17646 SDNode *Trunc = *UI++;
17647 while (UI.getUse().getResNo() != 0) ++UI;
17648 SDNode *RightShift = *UI;
17649 if (Trunc->getOpcode() != ISD::TRUNCATE)
17650 std::swap(a&: Trunc, b&: RightShift);
17651
17652 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17653 Trunc->getValueType(ResNo: 0) != MVT::i32 ||
17654 !Trunc->hasOneUse())
17655 return false;
17656 if (RightShift->getOpcode() != ISD::SRL ||
17657 !isa<ConstantSDNode>(Val: RightShift->getOperand(Num: 1)) ||
17658 RightShift->getConstantOperandVal(Num: 1) != 32 ||
17659 !RightShift->hasOneUse())
17660 return false;
17661
17662 SDNode *Trunc2 = *RightShift->user_begin();
17663 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17664 Trunc2->getValueType(ResNo: 0) != MVT::i32 ||
17665 !Trunc2->hasOneUse())
17666 return false;
17667
17668 SDNode *Bitcast = *Trunc->user_begin();
17669 SDNode *Bitcast2 = *Trunc2->user_begin();
17670
17671 if (Bitcast->getOpcode() != ISD::BITCAST ||
17672 Bitcast->getValueType(ResNo: 0) != MVT::f32)
17673 return false;
17674 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17675 Bitcast2->getValueType(ResNo: 0) != MVT::f32)
17676 return false;
17677
17678 if (Subtarget.isLittleEndian())
17679 std::swap(a&: Bitcast, b&: Bitcast2);
17680
17681 // Bitcast has the second float (in memory-layout order) and Bitcast2
17682 // has the first one.
17683
17684 SDValue BasePtr = LD->getBasePtr();
17685 if (LD->isIndexed()) {
17686 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17687 "Non-pre-inc AM on PPC?");
17688 BasePtr =
17689 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
17690 N2: LD->getOffset());
17691 }
17692
17693 auto MMOFlags =
17694 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17695 SDValue FloatLoad = DAG.getLoad(VT: MVT::f32, dl, Chain: LD->getChain(), Ptr: BasePtr,
17696 PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign(),
17697 MMOFlags, AAInfo: LD->getAAInfo());
17698 SDValue AddPtr =
17699 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(),
17700 N1: BasePtr, N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
17701 SDValue FloatLoad2 = DAG.getLoad(
17702 VT: MVT::f32, dl, Chain: SDValue(FloatLoad.getNode(), 1), Ptr: AddPtr,
17703 PtrInfo: LD->getPointerInfo().getWithOffset(O: 4),
17704 Alignment: commonAlignment(A: LD->getAlign(), Offset: 4), MMOFlags, AAInfo: LD->getAAInfo());
17705
17706 if (LD->isIndexed()) {
17707 // Note that DAGCombine should re-form any pre-increment load(s) from
17708 // what is produced here if that makes sense.
17709 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, 1), To: BasePtr);
17710 }
17711
17712 DCI.CombineTo(N: Bitcast2, Res: FloatLoad);
17713 DCI.CombineTo(N: Bitcast, Res: FloatLoad2);
17714
17715 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, LD->isIndexed() ? 2 : 1),
17716 To: SDValue(FloatLoad2.getNode(), 1));
17717 return true;
17718 };
17719
17720 if (ReplaceTwoFloatLoad())
17721 return SDValue(N, 0);
17722
17723 EVT MemVT = LD->getMemoryVT();
17724 Type *Ty = MemVT.getTypeForEVT(Context&: *DAG.getContext());
17725 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17726 if (LD->isUnindexed() && VT.isVector() &&
17727 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17728 // P8 and later hardware should just use LOAD.
17729 !Subtarget.hasP8Vector() &&
17730 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17731 VT == MVT::v4f32))) &&
17732 LD->getAlign() < ABIAlignment) {
17733 // This is a type-legal unaligned Altivec load.
17734 SDValue Chain = LD->getChain();
17735 SDValue Ptr = LD->getBasePtr();
17736 bool isLittleEndian = Subtarget.isLittleEndian();
17737
17738 // This implements the loading of unaligned vectors as described in
17739 // the venerable Apple Velocity Engine overview. Specifically:
17740 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17741 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17742 //
17743 // The general idea is to expand a sequence of one or more unaligned
17744 // loads into an alignment-based permutation-control instruction (lvsl
17745 // or lvsr), a series of regular vector loads (which always truncate
17746 // their input address to an aligned address), and a series of
17747 // permutations. The results of these permutations are the requested
17748 // loaded values. The trick is that the last "extra" load is not taken
17749 // from the address you might suspect (sizeof(vector) bytes after the
17750 // last requested load), but rather sizeof(vector) - 1 bytes after the
17751 // last requested vector. The point of this is to avoid a page fault if
17752 // the base address happened to be aligned. This works because if the
17753 // base address is aligned, then adding less than a full vector length
17754 // will cause the last vector in the sequence to be (re)loaded.
17755 // Otherwise, the next vector will be fetched as you might suspect was
17756 // necessary.
17757
17758 // We might be able to reuse the permutation generation from
17759 // a different base address offset from this one by an aligned amount.
17760 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17761 // optimization later.
17762 Intrinsic::ID Intr, IntrLD, IntrPerm;
17763 MVT PermCntlTy, PermTy, LDTy;
17764 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17765 : Intrinsic::ppc_altivec_lvsl;
17766 IntrLD = Intrinsic::ppc_altivec_lvx;
17767 IntrPerm = Intrinsic::ppc_altivec_vperm;
17768 PermCntlTy = MVT::v16i8;
17769 PermTy = MVT::v4i32;
17770 LDTy = MVT::v4i32;
17771
17772 SDValue PermCntl = BuildIntrinsicOp(IID: Intr, Op: Ptr, DAG, dl, DestVT: PermCntlTy);
17773
17774 // Create the new MMO for the new base load. It is like the original MMO,
17775 // but represents an area in memory almost twice the vector size centered
17776 // on the original address. If the address is unaligned, we might start
17777 // reading up to (sizeof(vector)-1) bytes below the address of the
17778 // original unaligned load.
17779 MachineFunction &MF = DAG.getMachineFunction();
17780 MachineMemOperand *BaseMMO =
17781 MF.getMachineMemOperand(MMO: LD->getMemOperand(),
17782 Offset: -(int64_t)MemVT.getStoreSize()+1,
17783 Size: 2*MemVT.getStoreSize()-1);
17784
17785 // Create the new base load.
17786 SDValue LDXIntID =
17787 DAG.getTargetConstant(Val: IntrLD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
17788 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17789 SDValue BaseLoad =
17790 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
17791 VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
17792 Ops: BaseLoadOps, MemVT: LDTy, MMO: BaseMMO);
17793
17794 // Note that the value of IncOffset (which is provided to the next
17795 // load's pointer info offset value, and thus used to calculate the
17796 // alignment), and the value of IncValue (which is actually used to
17797 // increment the pointer value) are different! This is because we
17798 // require the next load to appear to be aligned, even though it
17799 // is actually offset from the base pointer by a lesser amount.
17800 int IncOffset = VT.getSizeInBits() / 8;
17801 int IncValue = IncOffset;
17802
17803 // Walk (both up and down) the chain looking for another load at the real
17804 // (aligned) offset (the alignment of the other load does not matter in
17805 // this case). If found, then do not use the offset reduction trick, as
17806 // that will prevent the loads from being later combined (as they would
17807 // otherwise be duplicates).
17808 if (!findConsecutiveLoad(LD, DAG))
17809 --IncValue;
17810
17811 SDValue Increment =
17812 DAG.getConstant(Val: IncValue, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
17813 Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Ptr.getValueType(), N1: Ptr, N2: Increment);
17814
17815 MachineMemOperand *ExtraMMO =
17816 MF.getMachineMemOperand(MMO: LD->getMemOperand(),
17817 Offset: 1, Size: 2*MemVT.getStoreSize()-1);
17818 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17819 SDValue ExtraLoad =
17820 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
17821 VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
17822 Ops: ExtraLoadOps, MemVT: LDTy, MMO: ExtraMMO);
17823
17824 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
17825 N1: BaseLoad.getValue(R: 1), N2: ExtraLoad.getValue(R: 1));
17826
17827 // Because vperm has a big-endian bias, we must reverse the order
17828 // of the input vectors and complement the permute control vector
17829 // when generating little endian code. We have already handled the
17830 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17831 // and ExtraLoad here.
17832 SDValue Perm;
17833 if (isLittleEndian)
17834 Perm = BuildIntrinsicOp(IID: IntrPerm,
17835 Op0: ExtraLoad, Op1: BaseLoad, Op2: PermCntl, DAG, dl);
17836 else
17837 Perm = BuildIntrinsicOp(IID: IntrPerm,
17838 Op0: BaseLoad, Op1: ExtraLoad, Op2: PermCntl, DAG, dl);
17839
17840 if (VT != PermTy)
17841 Perm = Subtarget.hasAltivec()
17842 ? DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Perm)
17843 : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Perm,
17844 N2: DAG.getTargetConstant(Val: 1, DL: dl, VT: MVT::i64));
17845 // second argument is 1 because this rounding
17846 // is always exact.
17847
17848 // The output of the permutation is our loaded result, the TokenFactor is
17849 // our new chain.
17850 DCI.CombineTo(N, Res0: Perm, Res1: TF);
17851 return SDValue(N, 0);
17852 }
17853 }
17854 break;
17855 case ISD::INTRINSIC_WO_CHAIN: {
17856 bool isLittleEndian = Subtarget.isLittleEndian();
17857 unsigned IID = N->getConstantOperandVal(Num: 0);
17858 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17859 : Intrinsic::ppc_altivec_lvsl);
17860 if (IID == Intr && N->getOperand(Num: 1)->getOpcode() == ISD::ADD) {
17861 SDValue Add = N->getOperand(Num: 1);
17862
17863 int Bits = 4 /* 16 byte alignment */;
17864
17865 if (DAG.MaskedValueIsZero(Op: Add->getOperand(Num: 1),
17866 Mask: APInt::getAllOnes(numBits: Bits /* alignment */)
17867 .zext(width: Add.getScalarValueSizeInBits()))) {
17868 SDNode *BasePtr = Add->getOperand(Num: 0).getNode();
17869 for (SDNode *U : BasePtr->users()) {
17870 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17871 U->getConstantOperandVal(Num: 0) == IID) {
17872 // We've found another LVSL/LVSR, and this address is an aligned
17873 // multiple of that one. The results will be the same, so use the
17874 // one we've just found instead.
17875
17876 return SDValue(U, 0);
17877 }
17878 }
17879 }
17880
17881 if (isa<ConstantSDNode>(Val: Add->getOperand(Num: 1))) {
17882 SDNode *BasePtr = Add->getOperand(Num: 0).getNode();
17883 for (SDNode *U : BasePtr->users()) {
17884 if (U->getOpcode() == ISD::ADD &&
17885 isa<ConstantSDNode>(Val: U->getOperand(Num: 1)) &&
17886 (Add->getConstantOperandVal(Num: 1) - U->getConstantOperandVal(Num: 1)) %
17887 (1ULL << Bits) ==
17888 0) {
17889 SDNode *OtherAdd = U;
17890 for (SDNode *V : OtherAdd->users()) {
17891 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17892 V->getConstantOperandVal(Num: 0) == IID) {
17893 return SDValue(V, 0);
17894 }
17895 }
17896 }
17897 }
17898 }
17899 }
17900
17901 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17902 // Expose the vabsduw/h/b opportunity for down stream
17903 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17904 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17905 IID == Intrinsic::ppc_altivec_vmaxsh ||
17906 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17907 SDValue V1 = N->getOperand(Num: 1);
17908 SDValue V2 = N->getOperand(Num: 2);
17909 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17910 V1.getSimpleValueType() == MVT::v8i16 ||
17911 V1.getSimpleValueType() == MVT::v16i8) &&
17912 V1.getSimpleValueType() == V2.getSimpleValueType()) {
17913 // (0-a, a)
17914 if (V1.getOpcode() == ISD::SUB &&
17915 ISD::isBuildVectorAllZeros(N: V1.getOperand(i: 0).getNode()) &&
17916 V1.getOperand(i: 1) == V2) {
17917 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V2.getValueType(), Operand: V2);
17918 }
17919 // (a, 0-a)
17920 if (V2.getOpcode() == ISD::SUB &&
17921 ISD::isBuildVectorAllZeros(N: V2.getOperand(i: 0).getNode()) &&
17922 V2.getOperand(i: 1) == V1) {
17923 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17924 }
17925 // (x-y, y-x)
17926 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17927 V1.getOperand(i: 0) == V2.getOperand(i: 1) &&
17928 V1.getOperand(i: 1) == V2.getOperand(i: 0)) {
17929 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17930 }
17931 }
17932 }
17933 }
17934
17935 break;
17936 case ISD::INTRINSIC_W_CHAIN:
17937 switch (N->getConstantOperandVal(Num: 1)) {
17938 default:
17939 break;
17940 case Intrinsic::ppc_altivec_vsum4sbs:
17941 case Intrinsic::ppc_altivec_vsum4shs:
17942 case Intrinsic::ppc_altivec_vsum4ubs: {
17943 // These sum-across intrinsics only have a chain due to the side effect
17944 // that they may set the SAT bit. If we know the SAT bit will not be set
17945 // for some inputs, we can replace any uses of their chain with the
17946 // input chain.
17947 if (BuildVectorSDNode *BVN =
17948 dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 3))) {
17949 APInt APSplatBits, APSplatUndef;
17950 unsigned SplatBitSize;
17951 bool HasAnyUndefs;
17952 bool BVNIsConstantSplat = BVN->isConstantSplat(
17953 SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 0,
17954 isBigEndian: !Subtarget.isLittleEndian());
17955 // If the constant splat vector is 0, the SAT bit will not be set.
17956 if (BVNIsConstantSplat && APSplatBits == 0)
17957 DAG.ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: N->getOperand(Num: 0));
17958 }
17959 return SDValue();
17960 }
17961 case Intrinsic::ppc_vsx_lxvw4x:
17962 case Intrinsic::ppc_vsx_lxvd2x:
17963 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17964 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17965 if (Subtarget.needsSwapsForVSXMemOps())
17966 return expandVSXLoadForLE(N, DCI);
17967 break;
17968 }
17969 break;
17970 case ISD::INTRINSIC_VOID:
17971 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17972 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17973 if (Subtarget.needsSwapsForVSXMemOps()) {
17974 switch (N->getConstantOperandVal(Num: 1)) {
17975 default:
17976 break;
17977 case Intrinsic::ppc_vsx_stxvw4x:
17978 case Intrinsic::ppc_vsx_stxvd2x:
17979 return expandVSXStoreForLE(N, DCI);
17980 }
17981 }
17982 break;
17983 case ISD::BSWAP: {
17984 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17985 // For subtargets without LDBRX, we can still do better than the default
17986 // expansion even for 64-bit BSWAP (LOAD).
17987 bool Is64BitBswapOn64BitTgt =
17988 Subtarget.isPPC64() && N->getValueType(ResNo: 0) == MVT::i64;
17989 bool IsSingleUseNormalLd = ISD::isNormalLoad(N: N->getOperand(Num: 0).getNode()) &&
17990 N->getOperand(Num: 0).hasOneUse();
17991 if (IsSingleUseNormalLd &&
17992 (N->getValueType(ResNo: 0) == MVT::i32 || N->getValueType(ResNo: 0) == MVT::i16 ||
17993 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17994 SDValue Load = N->getOperand(Num: 0);
17995 LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
17996 // Create the byte-swapping load.
17997 SDValue Ops[] = {
17998 LD->getChain(), // Chain
17999 LD->getBasePtr(), // Ptr
18000 DAG.getValueType(N->getValueType(ResNo: 0)) // VT
18001 };
18002 SDValue BSLoad =
18003 DAG.getMemIntrinsicNode(Opcode: PPCISD::LBRX, dl,
18004 VTList: DAG.getVTList(VT1: N->getValueType(ResNo: 0) == MVT::i64 ?
18005 MVT::i64 : MVT::i32, VT2: MVT::Other),
18006 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
18007
18008 // If this is an i16 load, insert the truncate.
18009 SDValue ResVal = BSLoad;
18010 if (N->getValueType(ResNo: 0) == MVT::i16)
18011 ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i16, Operand: BSLoad);
18012
18013 // First, combine the bswap away. This makes the value produced by the
18014 // load dead.
18015 DCI.CombineTo(N, Res: ResVal);
18016
18017 // Next, combine the load away, we give it a bogus result value but a real
18018 // chain result. The result value is dead because the bswap is dead.
18019 DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: 1));
18020
18021 // Return N so it doesn't get rechecked!
18022 return SDValue(N, 0);
18023 }
18024 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18025 // before legalization so that the BUILD_PAIR is handled correctly.
18026 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18027 !IsSingleUseNormalLd)
18028 return SDValue();
18029 LoadSDNode *LD = cast<LoadSDNode>(Val: N->getOperand(Num: 0));
18030
18031 // Can't split volatile or atomic loads.
18032 if (!LD->isSimple())
18033 return SDValue();
18034 SDValue BasePtr = LD->getBasePtr();
18035 SDValue Lo = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr,
18036 PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign());
18037 Lo = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Lo);
18038 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
18039 N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
18040 MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
18041 MMO: LD->getMemOperand(), Offset: 4, Size: 4);
18042 SDValue Hi = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr, MMO: NewMMO);
18043 Hi = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Hi);
18044 SDValue Res;
18045 if (Subtarget.isLittleEndian())
18046 Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Hi, N2: Lo);
18047 else
18048 Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
18049 SDValue TF =
18050 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
18051 N1: Hi.getOperand(i: 0).getValue(R: 1), N2: Lo.getOperand(i: 0).getValue(R: 1));
18052 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, 1), To: TF);
18053 return Res;
18054 }
18055 case PPCISD::VCMP:
18056 // If a VCMP_rec node already exists with exactly the same operands as this
18057 // node, use its result instead of this node (VCMP_rec computes both a CR6
18058 // and a normal output).
18059 //
18060 if (!N->getOperand(Num: 0).hasOneUse() &&
18061 !N->getOperand(Num: 1).hasOneUse() &&
18062 !N->getOperand(Num: 2).hasOneUse()) {
18063
18064 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18065 SDNode *VCMPrecNode = nullptr;
18066
18067 SDNode *LHSN = N->getOperand(Num: 0).getNode();
18068 for (SDNode *User : LHSN->users())
18069 if (User->getOpcode() == PPCISD::VCMP_rec &&
18070 User->getOperand(Num: 1) == N->getOperand(Num: 1) &&
18071 User->getOperand(Num: 2) == N->getOperand(Num: 2) &&
18072 User->getOperand(Num: 0) == N->getOperand(Num: 0)) {
18073 VCMPrecNode = User;
18074 break;
18075 }
18076
18077 // If there is no VCMP_rec node, or if the flag value has a single use,
18078 // don't transform this.
18079 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(NUses: 0, Value: 1))
18080 break;
18081
18082 // Look at the (necessarily single) use of the flag value. If it has a
18083 // chain, this transformation is more complex. Note that multiple things
18084 // could use the value result, which we should ignore.
18085 SDNode *FlagUser = nullptr;
18086 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18087 FlagUser == nullptr; ++UI) {
18088 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18089 SDNode *User = UI->getUser();
18090 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18091 if (User->getOperand(Num: i) == SDValue(VCMPrecNode, 1)) {
18092 FlagUser = User;
18093 break;
18094 }
18095 }
18096 }
18097
18098 // If the user is a MFOCRF instruction, we know this is safe.
18099 // Otherwise we give up for right now.
18100 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18101 return SDValue(VCMPrecNode, 0);
18102 }
18103 break;
18104 case ISD::BR_CC: {
18105 // If this is a branch on an altivec predicate comparison, lower this so
18106 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18107 // lowering is done pre-legalize, because the legalizer lowers the predicate
18108 // compare down to code that is difficult to reassemble.
18109 // This code also handles branches that depend on the result of a store
18110 // conditional.
18111 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 1))->get();
18112 SDValue LHS = N->getOperand(Num: 2), RHS = N->getOperand(Num: 3);
18113
18114 int CompareOpc;
18115 bool isDot;
18116
18117 if (!isa<ConstantSDNode>(Val: RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18118 break;
18119
18120 // Since we are doing this pre-legalize, the RHS can be a constant of
18121 // arbitrary bitwidth which may cause issues when trying to get the value
18122 // from the underlying APInt.
18123 auto RHSAPInt = RHS->getAsAPIntVal();
18124 if (!RHSAPInt.isIntN(N: 64))
18125 break;
18126
18127 unsigned Val = RHSAPInt.getZExtValue();
18128 auto isImpossibleCompare = [&]() {
18129 // If this is a comparison against something other than 0/1, then we know
18130 // that the condition is never/always true.
18131 if (Val != 0 && Val != 1) {
18132 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18133 return N->getOperand(Num: 0);
18134 // Always !=, turn it into an unconditional branch.
18135 return DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other,
18136 N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 4));
18137 }
18138 return SDValue();
18139 };
18140 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18141 unsigned StoreWidth = 0;
18142 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18143 isStoreConditional(Intrin: LHS, StoreWidth)) {
18144 if (SDValue Impossible = isImpossibleCompare())
18145 return Impossible;
18146 PPC::Predicate CompOpc;
18147 // eq 0 => ne
18148 // ne 0 => eq
18149 // eq 1 => eq
18150 // ne 1 => ne
18151 if (Val == 0)
18152 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18153 else
18154 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18155
18156 SDValue Ops[] = {LHS.getOperand(i: 0), LHS.getOperand(i: 2), LHS.getOperand(i: 3),
18157 DAG.getConstant(Val: StoreWidth, DL: dl, VT: MVT::i32)};
18158 auto *MemNode = cast<MemSDNode>(Val&: LHS);
18159 SDValue ConstSt = DAG.getMemIntrinsicNode(
18160 Opcode: PPCISD::STORE_COND, dl,
18161 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other, VT3: MVT::Glue), Ops,
18162 MemVT: MemNode->getMemoryVT(), MMO: MemNode->getMemOperand());
18163
18164 SDValue InChain;
18165 // Unchain the branch from the original store conditional.
18166 if (N->getOperand(Num: 0) == LHS.getValue(R: 1))
18167 InChain = LHS.getOperand(i: 0);
18168 else if (N->getOperand(Num: 0).getOpcode() == ISD::TokenFactor) {
18169 SmallVector<SDValue, 4> InChains;
18170 SDValue InTF = N->getOperand(Num: 0);
18171 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18172 if (InTF.getOperand(i) != LHS.getValue(R: 1))
18173 InChains.push_back(Elt: InTF.getOperand(i));
18174 InChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: InChains);
18175 }
18176
18177 return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: InChain,
18178 N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
18179 N3: DAG.getRegister(Reg: PPC::CR0, VT: MVT::i32), N4: N->getOperand(Num: 4),
18180 N5: ConstSt.getValue(R: 2));
18181 }
18182
18183 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18184 getVectorCompareInfo(Intrin: LHS, CompareOpc, isDot, Subtarget)) {
18185 assert(isDot && "Can't compare against a vector result!");
18186
18187 if (SDValue Impossible = isImpossibleCompare())
18188 return Impossible;
18189
18190 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18191 // Create the PPCISD altivec 'dot' comparison node.
18192 SDValue Ops[] = {
18193 LHS.getOperand(i: 2), // LHS of compare
18194 LHS.getOperand(i: 3), // RHS of compare
18195 DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
18196 };
18197 EVT VTs[] = { LHS.getOperand(i: 2).getValueType(), MVT::Glue };
18198 SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
18199
18200 // Unpack the result based on how the target uses it.
18201 PPC::Predicate CompOpc;
18202 switch (LHS.getConstantOperandVal(i: 1)) {
18203 default: // Can't happen, don't crash on invalid number though.
18204 case 0: // Branch on the value of the EQ bit of CR6.
18205 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18206 break;
18207 case 1: // Branch on the inverted value of the EQ bit of CR6.
18208 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18209 break;
18210 case 2: // Branch on the value of the LT bit of CR6.
18211 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18212 break;
18213 case 3: // Branch on the inverted value of the LT bit of CR6.
18214 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18215 break;
18216 }
18217
18218 return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: N->getOperand(Num: 0),
18219 N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
18220 N3: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32),
18221 N4: N->getOperand(Num: 4), N5: CompNode.getValue(R: 1));
18222 }
18223 break;
18224 }
18225 case ISD::BUILD_VECTOR:
18226 return DAGCombineBuildVector(N, DCI);
18227 case PPCISD::ADDC:
18228 return DAGCombineAddc(N, DCI);
18229 }
18230
18231 return SDValue();
18232}
18233
18234SDValue
18235PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
18236 SelectionDAG &DAG,
18237 SmallVectorImpl<SDNode *> &Created) const {
18238 // fold (sdiv X, pow2)
18239 EVT VT = N->getValueType(ResNo: 0);
18240 if (VT == MVT::i64 && !Subtarget.isPPC64())
18241 return SDValue();
18242 if ((VT != MVT::i32 && VT != MVT::i64) ||
18243 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18244 return SDValue();
18245
18246 SDLoc DL(N);
18247 SDValue N0 = N->getOperand(Num: 0);
18248
18249 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18250 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18251 SDValue ShiftAmt = DAG.getConstant(Val: Lg2, DL, VT);
18252
18253 SDValue Op = DAG.getNode(Opcode: PPCISD::SRA_ADDZE, DL, VT, N1: N0, N2: ShiftAmt);
18254 Created.push_back(Elt: Op.getNode());
18255
18256 if (IsNegPow2) {
18257 Op = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
18258 Created.push_back(Elt: Op.getNode());
18259 }
18260
18261 return Op;
18262}
18263
18264//===----------------------------------------------------------------------===//
18265// Inline Assembly Support
18266//===----------------------------------------------------------------------===//
18267
18268void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
18269 KnownBits &Known,
18270 const APInt &DemandedElts,
18271 const SelectionDAG &DAG,
18272 unsigned Depth) const {
18273 Known.resetAll();
18274 switch (Op.getOpcode()) {
18275 default: break;
18276 case PPCISD::LBRX: {
18277 // lhbrx is known to have the top bits cleared out.
18278 if (cast<VTSDNode>(Val: Op.getOperand(i: 2))->getVT() == MVT::i16)
18279 Known.Zero = 0xFFFF0000;
18280 break;
18281 }
18282 case PPCISD::ADDE: {
18283 if (Op.getResNo() == 0) {
18284 // (0|1), _ = ADDE 0, 0, CARRY
18285 SDValue LHS = Op.getOperand(i: 0);
18286 SDValue RHS = Op.getOperand(i: 1);
18287 if (isNullConstant(V: LHS) && isNullConstant(V: RHS))
18288 Known.Zero = ~1ULL;
18289 }
18290 break;
18291 }
18292 case ISD::INTRINSIC_WO_CHAIN: {
18293 switch (Op.getConstantOperandVal(i: 0)) {
18294 default: break;
18295 case Intrinsic::ppc_altivec_vcmpbfp_p:
18296 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18297 case Intrinsic::ppc_altivec_vcmpequb_p:
18298 case Intrinsic::ppc_altivec_vcmpequh_p:
18299 case Intrinsic::ppc_altivec_vcmpequw_p:
18300 case Intrinsic::ppc_altivec_vcmpequd_p:
18301 case Intrinsic::ppc_altivec_vcmpequq_p:
18302 case Intrinsic::ppc_altivec_vcmpgefp_p:
18303 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18304 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18305 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18306 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18307 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18308 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18309 case Intrinsic::ppc_altivec_vcmpgtub_p:
18310 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18311 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18312 case Intrinsic::ppc_altivec_vcmpgtud_p:
18313 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18314 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18315 break;
18316 }
18317 break;
18318 }
18319 case ISD::INTRINSIC_W_CHAIN: {
18320 switch (Op.getConstantOperandVal(i: 1)) {
18321 default:
18322 break;
18323 case Intrinsic::ppc_load2r:
18324 // Top bits are cleared for load2r (which is the same as lhbrx).
18325 Known.Zero = 0xFFFF0000;
18326 break;
18327 }
18328 break;
18329 }
18330 }
18331}
18332
18333Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
18334 switch (Subtarget.getCPUDirective()) {
18335 default: break;
18336 case PPC::DIR_970:
18337 case PPC::DIR_PWR4:
18338 case PPC::DIR_PWR5:
18339 case PPC::DIR_PWR5X:
18340 case PPC::DIR_PWR6:
18341 case PPC::DIR_PWR6X:
18342 case PPC::DIR_PWR7:
18343 case PPC::DIR_PWR8:
18344 case PPC::DIR_PWR9:
18345 case PPC::DIR_PWR10:
18346 case PPC::DIR_PWR11:
18347 case PPC::DIR_PWR_FUTURE: {
18348 if (!ML)
18349 break;
18350
18351 if (!DisableInnermostLoopAlign32) {
18352 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18353 // so that we can decrease cache misses and branch-prediction misses.
18354 // Actual alignment of the loop will depend on the hotness check and other
18355 // logic in alignBlocks.
18356 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18357 return Align(32);
18358 }
18359
18360 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18361
18362 // For small loops (between 5 and 8 instructions), align to a 32-byte
18363 // boundary so that the entire loop fits in one instruction-cache line.
18364 uint64_t LoopSize = 0;
18365 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18366 for (const MachineInstr &J : **I) {
18367 LoopSize += TII->getInstSizeInBytes(MI: J);
18368 if (LoopSize > 32)
18369 break;
18370 }
18371
18372 if (LoopSize > 16 && LoopSize <= 32)
18373 return Align(32);
18374
18375 break;
18376 }
18377 }
18378
18379 return TargetLowering::getPrefLoopAlignment(ML);
18380}
18381
18382/// getConstraintType - Given a constraint, return the type of
18383/// constraint it is for this target.
18384PPCTargetLowering::ConstraintType
18385PPCTargetLowering::getConstraintType(StringRef Constraint) const {
18386 if (Constraint.size() == 1) {
18387 switch (Constraint[0]) {
18388 default: break;
18389 case 'b':
18390 case 'r':
18391 case 'f':
18392 case 'd':
18393 case 'v':
18394 case 'y':
18395 return C_RegisterClass;
18396 case 'Z':
18397 // FIXME: While Z does indicate a memory constraint, it specifically
18398 // indicates an r+r address (used in conjunction with the 'y' modifier
18399 // in the replacement string). Currently, we're forcing the base
18400 // register to be r0 in the asm printer (which is interpreted as zero)
18401 // and forming the complete address in the second register. This is
18402 // suboptimal.
18403 return C_Memory;
18404 }
18405 } else if (Constraint == "wc") { // individual CR bits.
18406 return C_RegisterClass;
18407 } else if (Constraint == "wa" || Constraint == "wd" ||
18408 Constraint == "wf" || Constraint == "ws" ||
18409 Constraint == "wi" || Constraint == "ww") {
18410 return C_RegisterClass; // VSX registers.
18411 }
18412 return TargetLowering::getConstraintType(Constraint);
18413}
18414
18415/// Examine constraint type and operand type and determine a weight value.
18416/// This object must already have been set up with the operand type
18417/// and the current alternative constraint selected.
18418TargetLowering::ConstraintWeight
18419PPCTargetLowering::getSingleConstraintMatchWeight(
18420 AsmOperandInfo &info, const char *constraint) const {
18421 ConstraintWeight weight = CW_Invalid;
18422 Value *CallOperandVal = info.CallOperandVal;
18423 // If we don't have a value, we can't do a match,
18424 // but allow it at the lowest weight.
18425 if (!CallOperandVal)
18426 return CW_Default;
18427 Type *type = CallOperandVal->getType();
18428
18429 // Look at the constraint type.
18430 if (StringRef(constraint) == "wc" && type->isIntegerTy(Bitwidth: 1))
18431 return CW_Register; // an individual CR bit.
18432 else if ((StringRef(constraint) == "wa" ||
18433 StringRef(constraint) == "wd" ||
18434 StringRef(constraint) == "wf") &&
18435 type->isVectorTy())
18436 return CW_Register;
18437 else if (StringRef(constraint) == "wi" && type->isIntegerTy(Bitwidth: 64))
18438 return CW_Register; // just hold 64-bit integers data.
18439 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18440 return CW_Register;
18441 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18442 return CW_Register;
18443
18444 switch (*constraint) {
18445 default:
18446 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
18447 break;
18448 case 'b':
18449 if (type->isIntegerTy())
18450 weight = CW_Register;
18451 break;
18452 case 'f':
18453 if (type->isFloatTy())
18454 weight = CW_Register;
18455 break;
18456 case 'd':
18457 if (type->isDoubleTy())
18458 weight = CW_Register;
18459 break;
18460 case 'v':
18461 if (type->isVectorTy())
18462 weight = CW_Register;
18463 break;
18464 case 'y':
18465 weight = CW_Register;
18466 break;
18467 case 'Z':
18468 weight = CW_Memory;
18469 break;
18470 }
18471 return weight;
18472}
18473
18474std::pair<unsigned, const TargetRegisterClass *>
18475PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
18476 StringRef Constraint,
18477 MVT VT) const {
18478 if (Constraint.size() == 1) {
18479 // GCC RS6000 Constraint Letters
18480 switch (Constraint[0]) {
18481 case 'b': // R1-R31
18482 if (VT == MVT::i64 && Subtarget.isPPC64())
18483 return std::make_pair(x: 0U, y: &PPC::G8RC_NOX0RegClass);
18484 return std::make_pair(x: 0U, y: &PPC::GPRC_NOR0RegClass);
18485 case 'r': // R0-R31
18486 if (VT == MVT::i64 && Subtarget.isPPC64())
18487 return std::make_pair(x: 0U, y: &PPC::G8RCRegClass);
18488 return std::make_pair(x: 0U, y: &PPC::GPRCRegClass);
18489 // 'd' and 'f' constraints are both defined to be "the floating point
18490 // registers", where one is for 32-bit and the other for 64-bit. We don't
18491 // really care overly much here so just give them all the same reg classes.
18492 case 'd':
18493 case 'f':
18494 if (Subtarget.hasSPE()) {
18495 if (VT == MVT::f32 || VT == MVT::i32)
18496 return std::make_pair(x: 0U, y: &PPC::GPRCRegClass);
18497 if (VT == MVT::f64 || VT == MVT::i64)
18498 return std::make_pair(x: 0U, y: &PPC::SPERCRegClass);
18499 } else {
18500 if (VT == MVT::f32 || VT == MVT::i32)
18501 return std::make_pair(x: 0U, y: &PPC::F4RCRegClass);
18502 if (VT == MVT::f64 || VT == MVT::i64)
18503 return std::make_pair(x: 0U, y: &PPC::F8RCRegClass);
18504 }
18505 break;
18506 case 'v':
18507 if (Subtarget.hasAltivec() && VT.isVector())
18508 return std::make_pair(x: 0U, y: &PPC::VRRCRegClass);
18509 else if (Subtarget.hasVSX())
18510 // Scalars in Altivec registers only make sense with VSX.
18511 return std::make_pair(x: 0U, y: &PPC::VFRCRegClass);
18512 break;
18513 case 'y': // crrc
18514 return std::make_pair(x: 0U, y: &PPC::CRRCRegClass);
18515 }
18516 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18517 // An individual CR bit.
18518 return std::make_pair(x: 0U, y: &PPC::CRBITRCRegClass);
18519 } else if ((Constraint == "wa" || Constraint == "wd" ||
18520 Constraint == "wf" || Constraint == "wi") &&
18521 Subtarget.hasVSX()) {
18522 // A VSX register for either a scalar (FP) or vector. There is no
18523 // support for single precision scalars on subtargets prior to Power8.
18524 if (VT.isVector())
18525 return std::make_pair(x: 0U, y: &PPC::VSRCRegClass);
18526 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18527 return std::make_pair(x: 0U, y: &PPC::VSSRCRegClass);
18528 return std::make_pair(x: 0U, y: &PPC::VSFRCRegClass);
18529 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18530 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18531 return std::make_pair(x: 0U, y: &PPC::VSSRCRegClass);
18532 else
18533 return std::make_pair(x: 0U, y: &PPC::VSFRCRegClass);
18534 } else if (Constraint == "lr") {
18535 if (VT == MVT::i64)
18536 return std::make_pair(x: 0U, y: &PPC::LR8RCRegClass);
18537 else
18538 return std::make_pair(x: 0U, y: &PPC::LRRCRegClass);
18539 }
18540
18541 // Handle special cases of physical registers that are not properly handled
18542 // by the base class.
18543 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18544 // If we name a VSX register, we can't defer to the base class because it
18545 // will not recognize the correct register (their names will be VSL{0-31}
18546 // and V{0-31} so they won't match). So we match them here.
18547 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18548 int VSNum = atoi(nptr: Constraint.data() + 3);
18549 assert(VSNum >= 0 && VSNum <= 63 &&
18550 "Attempted to access a vsr out of range");
18551 if (VSNum < 32)
18552 return std::make_pair(x: PPC::VSL0 + VSNum, y: &PPC::VSRCRegClass);
18553 return std::make_pair(x: PPC::V0 + VSNum - 32, y: &PPC::VSRCRegClass);
18554 }
18555
18556 // For float registers, we can't defer to the base class as it will match
18557 // the SPILLTOVSRRC class.
18558 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18559 int RegNum = atoi(nptr: Constraint.data() + 2);
18560 if (RegNum > 31 || RegNum < 0)
18561 report_fatal_error(reason: "Invalid floating point register number");
18562 if (VT == MVT::f32 || VT == MVT::i32)
18563 return Subtarget.hasSPE()
18564 ? std::make_pair(x: PPC::R0 + RegNum, y: &PPC::GPRCRegClass)
18565 : std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F4RCRegClass);
18566 if (VT == MVT::f64 || VT == MVT::i64)
18567 return Subtarget.hasSPE()
18568 ? std::make_pair(x: PPC::S0 + RegNum, y: &PPC::SPERCRegClass)
18569 : std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F8RCRegClass);
18570 }
18571 }
18572
18573 std::pair<unsigned, const TargetRegisterClass *> R =
18574 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18575
18576 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18577 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18578 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18579 // register.
18580 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18581 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18582 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18583 PPC::GPRCRegClass.contains(Reg: R.first))
18584 return std::make_pair(x: TRI->getMatchingSuperReg(Reg: R.first,
18585 SubIdx: PPC::sub_32, RC: &PPC::G8RCRegClass),
18586 y: &PPC::G8RCRegClass);
18587
18588 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18589 if (!R.second && StringRef("{cc}").equals_insensitive(RHS: Constraint)) {
18590 R.first = PPC::CR0;
18591 R.second = &PPC::CRRCRegClass;
18592 }
18593 // FIXME: This warning should ideally be emitted in the front end.
18594 const auto &TM = getTargetMachine();
18595 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18596 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18597 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18598 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18599 errs() << "warning: vector registers 20 to 32 are reserved in the "
18600 "default AIX AltiVec ABI and cannot be used\n";
18601 }
18602
18603 return R;
18604}
18605
18606/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18607/// vector. If it is invalid, don't add anything to Ops.
18608void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
18609 StringRef Constraint,
18610 std::vector<SDValue> &Ops,
18611 SelectionDAG &DAG) const {
18612 SDValue Result;
18613
18614 // Only support length 1 constraints.
18615 if (Constraint.size() > 1)
18616 return;
18617
18618 char Letter = Constraint[0];
18619 switch (Letter) {
18620 default: break;
18621 case 'I':
18622 case 'J':
18623 case 'K':
18624 case 'L':
18625 case 'M':
18626 case 'N':
18627 case 'O':
18628 case 'P': {
18629 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Val&: Op);
18630 if (!CST) return; // Must be an immediate to match.
18631 SDLoc dl(Op);
18632 int64_t Value = CST->getSExtValue();
18633 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18634 // numbers are printed as such.
18635 switch (Letter) {
18636 default: llvm_unreachable("Unknown constraint letter!");
18637 case 'I': // "I" is a signed 16-bit constant.
18638 if (isInt<16>(x: Value))
18639 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18640 break;
18641 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18642 if (isShiftedUInt<16, 16>(x: Value))
18643 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18644 break;
18645 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18646 if (isShiftedInt<16, 16>(x: Value))
18647 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18648 break;
18649 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18650 if (isUInt<16>(x: Value))
18651 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18652 break;
18653 case 'M': // "M" is a constant that is greater than 31.
18654 if (Value > 31)
18655 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18656 break;
18657 case 'N': // "N" is a positive constant that is an exact power of two.
18658 if (Value > 0 && isPowerOf2_64(Value))
18659 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18660 break;
18661 case 'O': // "O" is the constant zero.
18662 if (Value == 0)
18663 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18664 break;
18665 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18666 if (isInt<16>(x: -Value))
18667 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
18668 break;
18669 }
18670 break;
18671 }
18672 }
18673
18674 if (Result.getNode()) {
18675 Ops.push_back(x: Result);
18676 return;
18677 }
18678
18679 // Handle standard constraint letters.
18680 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
18681}
18682
18683void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
18684 SmallVectorImpl<SDValue> &Ops,
18685 SelectionDAG &DAG) const {
18686 if (I.getNumOperands() <= 1)
18687 return;
18688 if (!isa<ConstantSDNode>(Val: Ops[1].getNode()))
18689 return;
18690 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18691 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18692 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18693 return;
18694
18695 if (MDNode *MDN = I.getMetadata(KindID: LLVMContext::MD_annotation))
18696 Ops.push_back(Elt: DAG.getMDNode(MD: MDN));
18697}
18698
18699// isLegalAddressingMode - Return true if the addressing mode represented
18700// by AM is legal for this target, for a load/store of the specified type.
18701bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
18702 const AddrMode &AM, Type *Ty,
18703 unsigned AS,
18704 Instruction *I) const {
18705 // Vector type r+i form is supported since power9 as DQ form. We don't check
18706 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18707 // imm form is preferred and the offset can be adjusted to use imm form later
18708 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18709 // max offset to check legal addressing mode, we should be a little aggressive
18710 // to contain other offsets for that LSRUse.
18711 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18712 return false;
18713
18714 // PPC allows a sign-extended 16-bit immediate field.
18715 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18716 return false;
18717
18718 // No global is ever allowed as a base.
18719 if (AM.BaseGV)
18720 return false;
18721
18722 // PPC only support r+r,
18723 switch (AM.Scale) {
18724 case 0: // "r+i" or just "i", depending on HasBaseReg.
18725 break;
18726 case 1:
18727 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18728 return false;
18729 // Otherwise we have r+r or r+i.
18730 break;
18731 case 2:
18732 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18733 return false;
18734 // Allow 2*r as r+r.
18735 break;
18736 default:
18737 // No other scales are supported.
18738 return false;
18739 }
18740
18741 return true;
18742}
18743
18744SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18745 SelectionDAG &DAG) const {
18746 MachineFunction &MF = DAG.getMachineFunction();
18747 MachineFrameInfo &MFI = MF.getFrameInfo();
18748 MFI.setReturnAddressIsTaken(true);
18749
18750 SDLoc dl(Op);
18751 unsigned Depth = Op.getConstantOperandVal(i: 0);
18752
18753 // Make sure the function does not optimize away the store of the RA to
18754 // the stack.
18755 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18756 FuncInfo->setLRStoreRequired();
18757 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
18758
18759 if (Depth > 0) {
18760 // The link register (return address) is saved in the caller's frame
18761 // not the callee's stack frame. So we must get the caller's frame
18762 // address and load the return address at the LR offset from there.
18763 SDValue FrameAddr =
18764 DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
18765 Ptr: LowerFRAMEADDR(Op, DAG), PtrInfo: MachinePointerInfo());
18766 SDValue Offset =
18767 DAG.getConstant(Val: Subtarget.getFrameLowering()->getReturnSaveOffset(), DL: dl,
18768 VT: Subtarget.getScalarIntVT());
18769 return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(),
18770 Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FrameAddr, N2: Offset),
18771 PtrInfo: MachinePointerInfo());
18772 }
18773
18774 // Just load the return address off the stack.
18775 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18776 return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: RetAddrFI,
18777 PtrInfo: MachinePointerInfo());
18778}
18779
18780SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18781 SelectionDAG &DAG) const {
18782 SDLoc dl(Op);
18783 unsigned Depth = Op.getConstantOperandVal(i: 0);
18784
18785 MachineFunction &MF = DAG.getMachineFunction();
18786 MachineFrameInfo &MFI = MF.getFrameInfo();
18787 MFI.setFrameAddressIsTaken(true);
18788
18789 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
18790 bool isPPC64 = PtrVT == MVT::i64;
18791
18792 // Naked functions never have a frame pointer, and so we use r1. For all
18793 // other functions, this decision must be delayed until during PEI.
18794 unsigned FrameReg;
18795 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
18796 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18797 else
18798 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18799
18800 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: FrameReg,
18801 VT: PtrVT);
18802 while (Depth--)
18803 FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
18804 Ptr: FrameAddr, PtrInfo: MachinePointerInfo());
18805 return FrameAddr;
18806}
18807
18808#define GET_REGISTER_MATCHER
18809#include "PPCGenAsmMatcher.inc"
18810
18811Register PPCTargetLowering::getRegisterByName(const char *RegName, LLT VT,
18812 const MachineFunction &MF) const {
18813 bool IsPPC64 = Subtarget.isPPC64();
18814
18815 bool Is64Bit = IsPPC64 && VT == LLT::scalar(SizeInBits: 64);
18816 if (!Is64Bit && VT != LLT::scalar(SizeInBits: 32))
18817 report_fatal_error(reason: "Invalid register global variable type");
18818
18819 Register Reg = MatchRegisterName(Name: RegName);
18820 if (!Reg)
18821 return Reg;
18822
18823 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18824 // Need followup investigation as to why.
18825 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18826 report_fatal_error(reason: Twine("Trying to reserve an invalid register \"" +
18827 StringRef(RegName) + "\"."));
18828
18829 // Convert GPR to GP8R register for 64bit.
18830 if (Is64Bit && StringRef(RegName).starts_with_insensitive(Prefix: "r"))
18831 Reg = Reg.id() - PPC::R0 + PPC::X0;
18832
18833 return Reg;
18834}
18835
18836bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
18837 // 32-bit SVR4 ABI access everything as got-indirect.
18838 if (Subtarget.is32BitELFABI())
18839 return true;
18840
18841 // AIX accesses everything indirectly through the TOC, which is similar to
18842 // the GOT.
18843 if (Subtarget.isAIXABI())
18844 return true;
18845
18846 CodeModel::Model CModel = getTargetMachine().getCodeModel();
18847 // If it is small or large code model, module locals are accessed
18848 // indirectly by loading their address from .toc/.got.
18849 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18850 return true;
18851
18852 // JumpTable and BlockAddress are accessed as got-indirect.
18853 if (isa<JumpTableSDNode>(Val: GA) || isa<BlockAddressSDNode>(Val: GA))
18854 return true;
18855
18856 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: GA))
18857 return Subtarget.isGVIndirectSymbol(GV: G->getGlobal());
18858
18859 return false;
18860}
18861
18862bool
18863PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
18864 // The PowerPC target isn't yet aware of offsets.
18865 return false;
18866}
18867
18868void PPCTargetLowering::getTgtMemIntrinsic(
18869 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
18870 MachineFunction &MF, unsigned Intrinsic) const {
18871 IntrinsicInfo Info;
18872 switch (Intrinsic) {
18873 case Intrinsic::ppc_atomicrmw_xchg_i128:
18874 case Intrinsic::ppc_atomicrmw_add_i128:
18875 case Intrinsic::ppc_atomicrmw_sub_i128:
18876 case Intrinsic::ppc_atomicrmw_nand_i128:
18877 case Intrinsic::ppc_atomicrmw_and_i128:
18878 case Intrinsic::ppc_atomicrmw_or_i128:
18879 case Intrinsic::ppc_atomicrmw_xor_i128:
18880 case Intrinsic::ppc_cmpxchg_i128:
18881 Info.opc = ISD::INTRINSIC_W_CHAIN;
18882 Info.memVT = MVT::i128;
18883 Info.ptrVal = I.getArgOperand(i: 0);
18884 Info.offset = 0;
18885 Info.align = Align(16);
18886 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
18887 MachineMemOperand::MOVolatile;
18888 Infos.push_back(Elt: Info);
18889 return;
18890 case Intrinsic::ppc_atomic_load_i128:
18891 Info.opc = ISD::INTRINSIC_W_CHAIN;
18892 Info.memVT = MVT::i128;
18893 Info.ptrVal = I.getArgOperand(i: 0);
18894 Info.offset = 0;
18895 Info.align = Align(16);
18896 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
18897 Infos.push_back(Elt: Info);
18898 return;
18899 case Intrinsic::ppc_atomic_store_i128:
18900 Info.opc = ISD::INTRINSIC_VOID;
18901 Info.memVT = MVT::i128;
18902 Info.ptrVal = I.getArgOperand(i: 2);
18903 Info.offset = 0;
18904 Info.align = Align(16);
18905 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
18906 Infos.push_back(Elt: Info);
18907 return;
18908 case Intrinsic::ppc_altivec_lvx:
18909 case Intrinsic::ppc_altivec_lvxl:
18910 case Intrinsic::ppc_altivec_lvebx:
18911 case Intrinsic::ppc_altivec_lvehx:
18912 case Intrinsic::ppc_altivec_lvewx:
18913 case Intrinsic::ppc_vsx_lxvd2x:
18914 case Intrinsic::ppc_vsx_lxvw4x:
18915 case Intrinsic::ppc_vsx_lxvd2x_be:
18916 case Intrinsic::ppc_vsx_lxvw4x_be:
18917 case Intrinsic::ppc_vsx_lxvl:
18918 case Intrinsic::ppc_vsx_lxvll: {
18919 EVT VT;
18920 switch (Intrinsic) {
18921 case Intrinsic::ppc_altivec_lvebx:
18922 VT = MVT::i8;
18923 break;
18924 case Intrinsic::ppc_altivec_lvehx:
18925 VT = MVT::i16;
18926 break;
18927 case Intrinsic::ppc_altivec_lvewx:
18928 VT = MVT::i32;
18929 break;
18930 case Intrinsic::ppc_vsx_lxvd2x:
18931 case Intrinsic::ppc_vsx_lxvd2x_be:
18932 VT = MVT::v2f64;
18933 break;
18934 default:
18935 VT = MVT::v4i32;
18936 break;
18937 }
18938
18939 Info.opc = ISD::INTRINSIC_W_CHAIN;
18940 Info.memVT = VT;
18941 Info.ptrVal = I.getArgOperand(i: 0);
18942 Info.offset = -VT.getStoreSize()+1;
18943 Info.size = 2*VT.getStoreSize()-1;
18944 Info.align = Align(1);
18945 Info.flags = MachineMemOperand::MOLoad;
18946 Infos.push_back(Elt: Info);
18947 return;
18948 }
18949 case Intrinsic::ppc_altivec_stvx:
18950 case Intrinsic::ppc_altivec_stvxl:
18951 case Intrinsic::ppc_altivec_stvebx:
18952 case Intrinsic::ppc_altivec_stvehx:
18953 case Intrinsic::ppc_altivec_stvewx:
18954 case Intrinsic::ppc_vsx_stxvd2x:
18955 case Intrinsic::ppc_vsx_stxvw4x:
18956 case Intrinsic::ppc_vsx_stxvd2x_be:
18957 case Intrinsic::ppc_vsx_stxvw4x_be:
18958 case Intrinsic::ppc_vsx_stxvl:
18959 case Intrinsic::ppc_vsx_stxvll: {
18960 EVT VT;
18961 switch (Intrinsic) {
18962 case Intrinsic::ppc_altivec_stvebx:
18963 VT = MVT::i8;
18964 break;
18965 case Intrinsic::ppc_altivec_stvehx:
18966 VT = MVT::i16;
18967 break;
18968 case Intrinsic::ppc_altivec_stvewx:
18969 VT = MVT::i32;
18970 break;
18971 case Intrinsic::ppc_vsx_stxvd2x:
18972 case Intrinsic::ppc_vsx_stxvd2x_be:
18973 VT = MVT::v2f64;
18974 break;
18975 default:
18976 VT = MVT::v4i32;
18977 break;
18978 }
18979
18980 Info.opc = ISD::INTRINSIC_VOID;
18981 Info.memVT = VT;
18982 Info.ptrVal = I.getArgOperand(i: 1);
18983 Info.offset = -VT.getStoreSize()+1;
18984 Info.size = 2*VT.getStoreSize()-1;
18985 Info.align = Align(1);
18986 Info.flags = MachineMemOperand::MOStore;
18987 Infos.push_back(Elt: Info);
18988 return;
18989 }
18990 case Intrinsic::ppc_stdcx:
18991 case Intrinsic::ppc_stwcx:
18992 case Intrinsic::ppc_sthcx:
18993 case Intrinsic::ppc_stbcx: {
18994 EVT VT;
18995 auto Alignment = Align(8);
18996 switch (Intrinsic) {
18997 case Intrinsic::ppc_stdcx:
18998 VT = MVT::i64;
18999 break;
19000 case Intrinsic::ppc_stwcx:
19001 VT = MVT::i32;
19002 Alignment = Align(4);
19003 break;
19004 case Intrinsic::ppc_sthcx:
19005 VT = MVT::i16;
19006 Alignment = Align(2);
19007 break;
19008 case Intrinsic::ppc_stbcx:
19009 VT = MVT::i8;
19010 Alignment = Align(1);
19011 break;
19012 }
19013 Info.opc = ISD::INTRINSIC_W_CHAIN;
19014 Info.memVT = VT;
19015 Info.ptrVal = I.getArgOperand(i: 0);
19016 Info.offset = 0;
19017 Info.align = Alignment;
19018 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
19019 Infos.push_back(Elt: Info);
19020 return;
19021 }
19022 default:
19023 break;
19024 }
19025}
19026
19027/// It returns EVT::Other if the type should be determined using generic
19028/// target-independent logic.
19029EVT PPCTargetLowering::getOptimalMemOpType(
19030 LLVMContext &Context, const MemOp &Op,
19031 const AttributeList &FuncAttributes) const {
19032 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19033 // We should use Altivec/VSX loads and stores when available. For unaligned
19034 // addresses, unaligned VSX loads are only fast starting with the P8.
19035 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19036 if (Op.isMemset() && Subtarget.hasVSX()) {
19037 uint64_t TailSize = Op.size() % 16;
19038 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19039 // element if vector element type matches tail store. For tail size
19040 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19041 if (TailSize > 2 && TailSize <= 4) {
19042 return MVT::v8i16;
19043 }
19044 return MVT::v4i32;
19045 }
19046 if (Op.isAligned(AlignCheck: Align(16)) || Subtarget.hasP8Vector())
19047 return MVT::v4i32;
19048 }
19049 }
19050
19051 if (Subtarget.isPPC64()) {
19052 return MVT::i64;
19053 }
19054
19055 return MVT::i32;
19056}
19057
19058/// Returns true if it is beneficial to convert a load of a constant
19059/// to just the constant itself.
19060bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
19061 Type *Ty) const {
19062 assert(Ty->isIntegerTy());
19063
19064 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19065 return !(BitSize == 0 || BitSize > 64);
19066}
19067
19068bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
19069 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19070 return false;
19071 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19072 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19073 return NumBits1 == 64 && NumBits2 == 32;
19074}
19075
19076bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
19077 if (!VT1.isInteger() || !VT2.isInteger())
19078 return false;
19079 unsigned NumBits1 = VT1.getSizeInBits();
19080 unsigned NumBits2 = VT2.getSizeInBits();
19081 return NumBits1 == 64 && NumBits2 == 32;
19082}
19083
19084bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
19085 // Generally speaking, zexts are not free, but they are free when they can be
19086 // folded with other operations.
19087 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19088 EVT MemVT = LD->getMemoryVT();
19089 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19090 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19091 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19092 LD->getExtensionType() == ISD::ZEXTLOAD))
19093 return true;
19094 }
19095
19096 // FIXME: Add other cases...
19097 // - 32-bit shifts with a zext to i64
19098 // - zext after ctlz, bswap, etc.
19099 // - zext after and by a constant mask
19100
19101 return TargetLowering::isZExtFree(Val, VT2);
19102}
19103
19104bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19105 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19106 "invalid fpext types");
19107 // Extending to float128 is not free.
19108 if (DestVT == MVT::f128)
19109 return false;
19110 return true;
19111}
19112
19113bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
19114 return isInt<16>(x: Imm) || isUInt<16>(x: Imm);
19115}
19116
19117bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
19118 return isInt<16>(x: Imm) || isUInt<16>(x: Imm);
19119}
19120
19121bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
19122 MachineMemOperand::Flags,
19123 unsigned *Fast) const {
19124 if (DisablePPCUnaligned)
19125 return false;
19126
19127 // PowerPC supports unaligned memory access for simple non-vector types.
19128 // Although accessing unaligned addresses is not as efficient as accessing
19129 // aligned addresses, it is generally more efficient than manual expansion,
19130 // and generally only traps for software emulation when crossing page
19131 // boundaries.
19132
19133 if (!VT.isSimple())
19134 return false;
19135
19136 if (VT.isFloatingPoint() && !VT.isVector() &&
19137 !Subtarget.allowsUnalignedFPAccess())
19138 return false;
19139
19140 if (VT.getSimpleVT().isVector()) {
19141 if (Subtarget.hasVSX()) {
19142 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19143 VT != MVT::v4f32 && VT != MVT::v4i32)
19144 return false;
19145 } else {
19146 return false;
19147 }
19148 }
19149
19150 if (VT == MVT::ppcf128)
19151 return false;
19152
19153 if (Fast)
19154 *Fast = 1;
19155
19156 return true;
19157}
19158
19159bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
19160 SDValue C) const {
19161 // Check integral scalar types.
19162 if (!VT.isScalarInteger())
19163 return false;
19164 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
19165 if (!ConstNode->getAPIntValue().isSignedIntN(N: 64))
19166 return false;
19167 // This transformation will generate >= 2 operations. But the following
19168 // cases will generate <= 2 instructions during ISEL. So exclude them.
19169 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19170 // HW instruction, ie. MULLI
19171 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19172 // instruction is needed than case 1, ie. MULLI and RLDICR
19173 int64_t Imm = ConstNode->getSExtValue();
19174 unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
19175 Imm >>= Shift;
19176 if (isInt<16>(x: Imm))
19177 return false;
19178 uint64_t UImm = static_cast<uint64_t>(Imm);
19179 if (isPowerOf2_64(Value: UImm + 1) || isPowerOf2_64(Value: UImm - 1) ||
19180 isPowerOf2_64(Value: 1 - UImm) || isPowerOf2_64(Value: -1 - UImm))
19181 return true;
19182 }
19183 return false;
19184}
19185
19186bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19187 EVT VT) const {
19188 return isFMAFasterThanFMulAndFAdd(
19189 F: MF.getFunction(), Ty: VT.getTypeForEVT(Context&: MF.getFunction().getContext()));
19190}
19191
19192bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
19193 Type *Ty) const {
19194 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19195 return false;
19196 switch (Ty->getScalarType()->getTypeID()) {
19197 case Type::FloatTyID:
19198 case Type::DoubleTyID:
19199 return true;
19200 case Type::FP128TyID:
19201 return Subtarget.hasP9Vector();
19202 default:
19203 return false;
19204 }
19205}
19206
19207// FIXME: add more patterns which are not profitable to hoist.
19208bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
19209 if (!I->hasOneUse())
19210 return true;
19211
19212 Instruction *User = I->user_back();
19213 assert(User && "A single use instruction with no uses.");
19214
19215 switch (I->getOpcode()) {
19216 case Instruction::FMul: {
19217 // Don't break FMA, PowerPC prefers FMA.
19218 if (User->getOpcode() != Instruction::FSub &&
19219 User->getOpcode() != Instruction::FAdd)
19220 return true;
19221
19222 const TargetOptions &Options = getTargetMachine().Options;
19223 const Function *F = I->getFunction();
19224 const DataLayout &DL = F->getDataLayout();
19225 Type *Ty = User->getOperand(i: 0)->getType();
19226 bool AllowContract = I->getFastMathFlags().allowContract() &&
19227 User->getFastMathFlags().allowContract();
19228
19229 return !(isFMAFasterThanFMulAndFAdd(F: *F, Ty) &&
19230 isOperationLegalOrCustom(Op: ISD::FMA, VT: getValueType(DL, Ty)) &&
19231 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19232 }
19233 case Instruction::Load: {
19234 // Don't break "store (load float*)" pattern, this pattern will be combined
19235 // to "store (load int32)" in later InstCombine pass. See function
19236 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19237 // cycles than loading a 32 bit integer.
19238 LoadInst *LI = cast<LoadInst>(Val: I);
19239 // For the loads that combineLoadToOperationType does nothing, like
19240 // ordered load, it should be profitable to hoist them.
19241 // For swifterror load, it can only be used for pointer to pointer type, so
19242 // later type check should get rid of this case.
19243 if (!LI->isUnordered())
19244 return true;
19245
19246 if (User->getOpcode() != Instruction::Store)
19247 return true;
19248
19249 if (I->getType()->getTypeID() != Type::FloatTyID)
19250 return true;
19251
19252 return false;
19253 }
19254 default:
19255 return true;
19256 }
19257 return true;
19258}
19259
19260const MCPhysReg *
19261PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
19262 // LR is a callee-save register, but we must treat it as clobbered by any call
19263 // site. Hence we include LR in the scratch registers, which are in turn added
19264 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19265 // to CTR, which is used by any indirect call.
19266 static const MCPhysReg ScratchRegs[] = {
19267 PPC::X12, PPC::LR8, PPC::CTR8, 0
19268 };
19269
19270 return ScratchRegs;
19271}
19272
19273Register PPCTargetLowering::getExceptionPointerRegister(
19274 const Constant *PersonalityFn) const {
19275 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19276}
19277
19278Register PPCTargetLowering::getExceptionSelectorRegister(
19279 const Constant *PersonalityFn) const {
19280 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19281}
19282
19283bool
19284PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
19285 EVT VT , unsigned DefinedValues) const {
19286 if (VT == MVT::v2i64)
19287 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19288
19289 if (Subtarget.hasVSX())
19290 return true;
19291
19292 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
19293}
19294
19295Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
19296 if (DisableILPPref || Subtarget.enableMachineScheduler())
19297 return TargetLowering::getSchedulingPreference(N);
19298
19299 return Sched::ILP;
19300}
19301
19302// Create a fast isel object.
19303FastISel *PPCTargetLowering::createFastISel(
19304 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19305 const LibcallLoweringInfo *LibcallLowering) const {
19306 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19307}
19308
19309// 'Inverted' means the FMA opcode after negating one multiplicand.
19310// For example, (fma -a b c) = (fnmsub a b c)
19311static unsigned invertFMAOpcode(unsigned Opc) {
19312 switch (Opc) {
19313 default:
19314 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19315 case ISD::FMA:
19316 return PPCISD::FNMSUB;
19317 case PPCISD::FNMSUB:
19318 return ISD::FMA;
19319 }
19320}
19321
19322SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
19323 bool LegalOps, bool OptForSize,
19324 NegatibleCost &Cost,
19325 unsigned Depth) const {
19326 if (Depth > SelectionDAG::MaxRecursionDepth)
19327 return SDValue();
19328
19329 unsigned Opc = Op.getOpcode();
19330 EVT VT = Op.getValueType();
19331 SDNodeFlags Flags = Op.getNode()->getFlags();
19332
19333 switch (Opc) {
19334 case PPCISD::FNMSUB:
19335 if (!Op.hasOneUse() || !isTypeLegal(VT))
19336 break;
19337
19338 SDValue N0 = Op.getOperand(i: 0);
19339 SDValue N1 = Op.getOperand(i: 1);
19340 SDValue N2 = Op.getOperand(i: 2);
19341 SDLoc Loc(Op);
19342
19343 NegatibleCost N2Cost = NegatibleCost::Expensive;
19344 SDValue NegN2 =
19345 getNegatedExpression(Op: N2, DAG, LegalOps, OptForSize, Cost&: N2Cost, Depth: Depth + 1);
19346
19347 if (!NegN2)
19348 return SDValue();
19349
19350 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19351 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19352 // These transformations may change sign of zeroes. For example,
19353 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19354 if (Flags.hasNoSignedZeros()) {
19355 // Try and choose the cheaper one to negate.
19356 NegatibleCost N0Cost = NegatibleCost::Expensive;
19357 SDValue NegN0 = getNegatedExpression(Op: N0, DAG, LegalOps, OptForSize,
19358 Cost&: N0Cost, Depth: Depth + 1);
19359
19360 NegatibleCost N1Cost = NegatibleCost::Expensive;
19361 SDValue NegN1 = getNegatedExpression(Op: N1, DAG, LegalOps, OptForSize,
19362 Cost&: N1Cost, Depth: Depth + 1);
19363
19364 if (NegN0 && N0Cost <= N1Cost) {
19365 Cost = std::min(a: N0Cost, b: N2Cost);
19366 return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: NegN0, N2: N1, N3: NegN2, Flags);
19367 } else if (NegN1) {
19368 Cost = std::min(a: N1Cost, b: N2Cost);
19369 return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: N0, N2: NegN1, N3: NegN2, Flags);
19370 }
19371 }
19372
19373 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19374 if (isOperationLegal(Op: ISD::FMA, VT)) {
19375 Cost = N2Cost;
19376 return DAG.getNode(Opcode: ISD::FMA, DL: Loc, VT, N1: N0, N2: N1, N3: NegN2, Flags);
19377 }
19378
19379 break;
19380 }
19381
19382 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19383 Cost, Depth);
19384}
19385
19386// Override to enable LOAD_STACK_GUARD lowering on Linux.
19387bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const {
19388 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19389 return true;
19390 return TargetLowering::useLoadStackGuardNode(M);
19391}
19392
19393bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
19394 bool ForCodeSize) const {
19395 if (!VT.isSimple() || !Subtarget.hasVSX())
19396 return false;
19397
19398 switch(VT.getSimpleVT().SimpleTy) {
19399 default:
19400 // For FP types that are currently not supported by PPC backend, return
19401 // false. Examples: f16, f80.
19402 return false;
19403 case MVT::f32:
19404 case MVT::f64: {
19405 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19406 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19407 return true;
19408 }
19409 bool IsExact;
19410 APSInt IntResult(16, false);
19411 // The rounding mode doesn't really matter because we only care about floats
19412 // that can be converted to integers exactly.
19413 Imm.convertToInteger(Result&: IntResult, RM: APFloat::rmTowardZero, IsExact: &IsExact);
19414 // For exact values in the range [-16, 15] we can materialize the float.
19415 if (IsExact && IntResult <= 15 && IntResult >= -16)
19416 return true;
19417 return Imm.isZero();
19418 }
19419 case MVT::ppcf128:
19420 return Imm.isPosZero();
19421 }
19422}
19423
19424// For vector shift operation op, fold
19425// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19426static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
19427 SelectionDAG &DAG) {
19428 SDValue N0 = N->getOperand(Num: 0);
19429 SDValue N1 = N->getOperand(Num: 1);
19430 EVT VT = N0.getValueType();
19431 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19432 unsigned Opcode = N->getOpcode();
19433 unsigned TargetOpcode;
19434
19435 switch (Opcode) {
19436 default:
19437 llvm_unreachable("Unexpected shift operation");
19438 case ISD::SHL:
19439 TargetOpcode = PPCISD::SHL;
19440 break;
19441 case ISD::SRL:
19442 TargetOpcode = PPCISD::SRL;
19443 break;
19444 case ISD::SRA:
19445 TargetOpcode = PPCISD::SRA;
19446 break;
19447 }
19448
19449 if (VT.isVector() && TLI.isOperationLegal(Op: Opcode, VT) &&
19450 N1->getOpcode() == ISD::AND)
19451 if (ConstantSDNode *Mask = isConstOrConstSplat(N: N1->getOperand(Num: 1)))
19452 if (Mask->getZExtValue() == OpSizeInBits - 1)
19453 return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc(N), VT, N1: N0, N2: N1->getOperand(Num: 0));
19454
19455 return SDValue();
19456}
19457
19458SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19459 DAGCombinerInfo &DCI) const {
19460 EVT VT = N->getValueType(ResNo: 0);
19461 assert(VT.isVector() && "Vector type expected.");
19462
19463 unsigned Opc = N->getOpcode();
19464 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19465 "Unexpected opcode.");
19466
19467 if (!isOperationLegal(Op: Opc, VT))
19468 return SDValue();
19469
19470 EVT EltTy = VT.getScalarType();
19471 unsigned EltBits = EltTy.getSizeInBits();
19472 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19473 return SDValue();
19474
19475 SDValue N1 = N->getOperand(Num: 1);
19476 uint64_t SplatBits = 0;
19477 bool AddSplatCase = false;
19478 unsigned OpcN1 = N1.getOpcode();
19479 if (OpcN1 == PPCISD::VADD_SPLAT &&
19480 N1.getConstantOperandVal(i: 1) == VT.getVectorNumElements()) {
19481 AddSplatCase = true;
19482 SplatBits = N1.getConstantOperandVal(i: 0);
19483 }
19484
19485 if (!AddSplatCase) {
19486 if (OpcN1 != ISD::BUILD_VECTOR)
19487 return SDValue();
19488
19489 unsigned SplatBitSize;
19490 bool HasAnyUndefs;
19491 APInt APSplatBits, APSplatUndef;
19492 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Val&: N1);
19493 bool BVNIsConstantSplat =
19494 BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
19495 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian());
19496 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19497 return SDValue();
19498 SplatBits = APSplatBits.getZExtValue();
19499 }
19500
19501 SDLoc DL(N);
19502 SDValue N0 = N->getOperand(Num: 0);
19503 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19504 // shift vector, which means the max value is 31/63. A shift vector of all
19505 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19506 // -16 to 15 range.
19507 if (SplatBits == (EltBits - 1)) {
19508 unsigned NewOpc;
19509 switch (Opc) {
19510 case ISD::SHL:
19511 NewOpc = PPCISD::SHL;
19512 break;
19513 case ISD::SRL:
19514 NewOpc = PPCISD::SRL;
19515 break;
19516 case ISD::SRA:
19517 NewOpc = PPCISD::SRA;
19518 break;
19519 }
19520 SDValue SplatOnes = getCanonicalConstSplat(Val: 255, SplatSize: 1, VT, DAG&: DCI.DAG, dl: DL);
19521 return DCI.DAG.getNode(Opcode: NewOpc, DL, VT, N1: N0, N2: SplatOnes);
19522 }
19523
19524 if (Opc != ISD::SHL || !isOperationLegal(Op: ISD::ADD, VT))
19525 return SDValue();
19526
19527 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19528 // before the BUILD_VECTOR is replaced by a load.
19529 if (EltTy != MVT::i64 || SplatBits != 1)
19530 return SDValue();
19531
19532 return DCI.DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT, N1: N0, N2: N0);
19533}
19534
19535SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19536 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19537 return Value;
19538
19539 if (N->getValueType(ResNo: 0).isVector())
19540 return combineVectorShift(N, DCI);
19541
19542 SDValue N0 = N->getOperand(Num: 0);
19543 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
19544 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19545 N0.getOpcode() != ISD::SIGN_EXTEND ||
19546 N0.getOperand(i: 0).getValueType() != MVT::i32 || CN1 == nullptr ||
19547 N->getValueType(ResNo: 0) != MVT::i64)
19548 return SDValue();
19549
19550 // We can't save an operation here if the value is already extended, and
19551 // the existing shift is easier to combine.
19552 SDValue ExtsSrc = N0.getOperand(i: 0);
19553 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19554 ExtsSrc.getOperand(i: 0).getOpcode() == ISD::AssertSext)
19555 return SDValue();
19556
19557 SDLoc DL(N0);
19558 SDValue ShiftBy = SDValue(CN1, 0);
19559 // We want the shift amount to be i32 on the extswli, but the shift could
19560 // have an i64.
19561 if (ShiftBy.getValueType() == MVT::i64)
19562 ShiftBy = DCI.DAG.getConstant(Val: CN1->getZExtValue(), DL, VT: MVT::i32);
19563
19564 return DCI.DAG.getNode(Opcode: PPCISD::EXTSWSLI, DL, VT: MVT::i64, N1: N0->getOperand(Num: 0),
19565 N2: ShiftBy);
19566}
19567
19568SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19569 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19570 return Value;
19571
19572 if (N->getValueType(ResNo: 0).isVector())
19573 return combineVectorShift(N, DCI);
19574
19575 return SDValue();
19576}
19577
19578SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19579 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
19580 return Value;
19581
19582 if (N->getValueType(ResNo: 0).isVector())
19583 return combineVectorShift(N, DCI);
19584
19585 return SDValue();
19586}
19587
19588// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19589// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19590// When C is zero, the equation (addi Z, -C) can be simplified to Z
19591// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19592static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
19593 const PPCSubtarget &Subtarget) {
19594 if (!Subtarget.isPPC64())
19595 return SDValue();
19596
19597 SDValue LHS = N->getOperand(Num: 0);
19598 SDValue RHS = N->getOperand(Num: 1);
19599
19600 auto isZextOfCompareWithConstant = [](SDValue Op) {
19601 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19602 Op.getValueType() != MVT::i64)
19603 return false;
19604
19605 SDValue Cmp = Op.getOperand(i: 0);
19606 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19607 Cmp.getOperand(i: 0).getValueType() != MVT::i64)
19608 return false;
19609
19610 if (auto *Constant = dyn_cast<ConstantSDNode>(Val: Cmp.getOperand(i: 1))) {
19611 int64_t NegConstant = 0 - Constant->getSExtValue();
19612 // Due to the limitations of the addi instruction,
19613 // -C is required to be [-32768, 32767].
19614 return isInt<16>(x: NegConstant);
19615 }
19616
19617 return false;
19618 };
19619
19620 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19621 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19622
19623 // If there is a pattern, canonicalize a zext operand to the RHS.
19624 if (LHSHasPattern && !RHSHasPattern)
19625 std::swap(a&: LHS, b&: RHS);
19626 else if (!LHSHasPattern && !RHSHasPattern)
19627 return SDValue();
19628
19629 SDLoc DL(N);
19630 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19631 SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: CarryType);
19632 SDValue Cmp = RHS.getOperand(i: 0);
19633 SDValue Z = Cmp.getOperand(i: 0);
19634 auto *Constant = cast<ConstantSDNode>(Val: Cmp.getOperand(i: 1));
19635 int64_t NegConstant = 0 - Constant->getSExtValue();
19636
19637 switch(cast<CondCodeSDNode>(Val: Cmp.getOperand(i: 2))->get()) {
19638 default: break;
19639 case ISD::SETNE: {
19640 // when C == 0
19641 // --> addze X, (addic Z, -1).carry
19642 // /
19643 // add X, (zext(setne Z, C))--
19644 // \ when -32768 <= -C <= 32767 && C != 0
19645 // --> addze X, (addic (addi Z, -C), -1).carry
19646 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
19647 N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
19648 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19649 SDValue Addc =
19650 DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
19651 N1: AddOrZ, N2: DAG.getAllOnesConstant(DL, VT: MVT::i64),
19652 N3: DAG.getConstant(Val: 0, DL, VT: CarryType));
19653 return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
19654 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i64),
19655 N3: SDValue(Addc.getNode(), 1));
19656 }
19657 case ISD::SETEQ: {
19658 // when C == 0
19659 // --> addze X, (subfic Z, 0).carry
19660 // /
19661 // add X, (zext(sete Z, C))--
19662 // \ when -32768 <= -C <= 32767 && C != 0
19663 // --> addze X, (subfic (addi Z, -C), 0).carry
19664 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
19665 N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
19666 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19667 SDValue Subc =
19668 DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
19669 N1: DAG.getConstant(Val: 0, DL, VT: MVT::i64), N2: AddOrZ,
19670 N3: DAG.getConstant(Val: 0, DL, VT: CarryType));
19671 SDValue Invert = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Subc.getValue(R: 1),
19672 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryType));
19673 return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
19674 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i64), N3: Invert);
19675 }
19676 }
19677
19678 return SDValue();
19679}
19680
19681// Transform
19682// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19683// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19684// In this case both C1 and C2 must be known constants.
19685// C1+C2 must fit into a 34 bit signed integer.
19686static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
19687 const PPCSubtarget &Subtarget) {
19688 if (!Subtarget.isUsingPCRelativeCalls())
19689 return SDValue();
19690
19691 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19692 // If we find that node try to cast the Global Address and the Constant.
19693 SDValue LHS = N->getOperand(Num: 0);
19694 SDValue RHS = N->getOperand(Num: 1);
19695
19696 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19697 std::swap(a&: LHS, b&: RHS);
19698
19699 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19700 return SDValue();
19701
19702 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19703 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(Val: LHS.getOperand(i: 0));
19704 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(Val&: RHS);
19705
19706 // Check that both casts succeeded.
19707 if (!GSDN || !ConstNode)
19708 return SDValue();
19709
19710 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19711 SDLoc DL(GSDN);
19712
19713 // The signed int offset needs to fit in 34 bits.
19714 if (!isInt<34>(x: NewOffset))
19715 return SDValue();
19716
19717 // The new global address is a copy of the old global address except
19718 // that it has the updated Offset.
19719 SDValue GA =
19720 DAG.getTargetGlobalAddress(GV: GSDN->getGlobal(), DL, VT: GSDN->getValueType(ResNo: 0),
19721 offset: NewOffset, TargetFlags: GSDN->getTargetFlags());
19722 SDValue MatPCRel =
19723 DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: GSDN->getValueType(ResNo: 0), Operand: GA);
19724 return MatPCRel;
19725}
19726
19727// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19728// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19729// Mathematical identity: X + 1 = X - (-1)
19730// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19731// Requirement: VSX feature for efficient xxleqv generation
19732static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG,
19733 const PPCSubtarget &Subtarget) {
19734
19735 EVT VT = N->getValueType(ResNo: 0);
19736 if (!Subtarget.hasVSX())
19737 return SDValue();
19738
19739 // Handle v2i64, v4i32, v8i16 and v16i8 types
19740 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19741 VT == MVT::v2i64))
19742 return SDValue();
19743
19744 SDValue LHS = N->getOperand(Num: 0);
19745 SDValue RHS = N->getOperand(Num: 1);
19746
19747 // Check if RHS is BUILD_VECTOR
19748 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19749 return SDValue();
19750
19751 // Check if all the elements are 1
19752 unsigned NumOfEles = RHS.getNumOperands();
19753 for (unsigned i = 0; i < NumOfEles; ++i) {
19754 auto *CN = dyn_cast<ConstantSDNode>(Val: RHS.getOperand(i));
19755 if (!CN || CN->getSExtValue() != 1)
19756 return SDValue();
19757 }
19758 SDLoc DL(N);
19759
19760 SDValue MinusOne = DAG.getConstant(Val: APInt::getAllOnes(numBits: 32), DL, VT: MVT::i32);
19761 SmallVector<SDValue, 4> Ops(4, MinusOne);
19762 SDValue AllOnesVec = DAG.getBuildVector(VT: MVT::v4i32, DL, Ops);
19763
19764 // Bitcast to the target vector type
19765 SDValue Bitcast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: AllOnesVec);
19766
19767 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: LHS, N2: Bitcast);
19768}
19769
19770SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19771 if (auto Value = combineADDToADDZE(N, DAG&: DCI.DAG, Subtarget))
19772 return Value;
19773
19774 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DAG&: DCI.DAG, Subtarget))
19775 return Value;
19776
19777 if (auto Value = combineADDToSUB(N, DAG&: DCI.DAG, Subtarget))
19778 return Value;
19779 return SDValue();
19780}
19781
19782// Detect TRUNCATE operations on bitcasts of float128 values.
19783// What we are looking for here is the situtation where we extract a subset
19784// of bits from a 128 bit float.
19785// This can be of two forms:
19786// 1) BITCAST of f128 feeding TRUNCATE
19787// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19788// The reason this is required is because we do not have a legal i128 type
19789// and so we want to prevent having to store the f128 and then reload part
19790// of it.
19791SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19792 DAGCombinerInfo &DCI) const {
19793 // If we are using CRBits then try that first.
19794 if (Subtarget.useCRBits()) {
19795 // Check if CRBits did anything and return that if it did.
19796 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19797 return CRTruncValue;
19798 }
19799
19800 SDLoc dl(N);
19801 SDValue Op0 = N->getOperand(Num: 0);
19802
19803 // Looking for a truncate of i128 to i64.
19804 if (Op0.getValueType() != MVT::i128 || N->getValueType(ResNo: 0) != MVT::i64)
19805 return SDValue();
19806
19807 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19808
19809 // SRL feeding TRUNCATE.
19810 if (Op0.getOpcode() == ISD::SRL) {
19811 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
19812 // The right shift has to be by 64 bits.
19813 if (!ConstNode || ConstNode->getZExtValue() != 64)
19814 return SDValue();
19815
19816 // Switch the element number to extract.
19817 EltToExtract = EltToExtract ? 0 : 1;
19818 // Update Op0 past the SRL.
19819 Op0 = Op0.getOperand(i: 0);
19820 }
19821
19822 // BITCAST feeding a TRUNCATE possibly via SRL.
19823 if (Op0.getOpcode() == ISD::BITCAST &&
19824 Op0.getValueType() == MVT::i128 &&
19825 Op0.getOperand(i: 0).getValueType() == MVT::f128) {
19826 SDValue Bitcast = DCI.DAG.getBitcast(VT: MVT::v2i64, V: Op0.getOperand(i: 0));
19827 return DCI.DAG.getNode(
19828 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Bitcast,
19829 N2: DCI.DAG.getTargetConstant(Val: EltToExtract, DL: dl, VT: MVT::i32));
19830 }
19831 return SDValue();
19832}
19833
19834SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19835 SelectionDAG &DAG = DCI.DAG;
19836
19837 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N: N->getOperand(Num: 1));
19838 if (!ConstOpOrElement)
19839 return SDValue();
19840
19841 // An imul is usually smaller than the alternative sequence for legal type.
19842 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
19843 isOperationLegal(Op: ISD::MUL, VT: N->getValueType(ResNo: 0)))
19844 return SDValue();
19845
19846 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19847 switch (this->Subtarget.getCPUDirective()) {
19848 default:
19849 // TODO: enhance the condition for subtarget before pwr8
19850 return false;
19851 case PPC::DIR_PWR8:
19852 // type mul add shl
19853 // scalar 4 1 1
19854 // vector 7 2 2
19855 return true;
19856 case PPC::DIR_PWR9:
19857 case PPC::DIR_PWR10:
19858 case PPC::DIR_PWR11:
19859 case PPC::DIR_PWR_FUTURE:
19860 // type mul add shl
19861 // scalar 5 2 2
19862 // vector 7 2 2
19863
19864 // The cycle RATIO of related operations are showed as a table above.
19865 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19866 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19867 // are 4, it is always profitable; but for 3 instrs patterns
19868 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19869 // So we should only do it for vector type.
19870 return IsAddOne && IsNeg ? VT.isVector() : true;
19871 }
19872 };
19873
19874 EVT VT = N->getValueType(ResNo: 0);
19875 SDLoc DL(N);
19876
19877 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19878 bool IsNeg = MulAmt.isNegative();
19879 APInt MulAmtAbs = MulAmt.abs();
19880
19881 if ((MulAmtAbs - 1).isPowerOf2()) {
19882 // (mul x, 2^N + 1) => (add (shl x, N), x)
19883 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19884
19885 if (!IsProfitable(IsNeg, true, VT))
19886 return SDValue();
19887
19888 SDValue Op0 = N->getOperand(Num: 0);
19889 SDValue Op1 =
19890 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
19891 N2: DAG.getConstant(Val: (MulAmtAbs - 1).logBase2(), DL, VT));
19892 SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
19893
19894 if (!IsNeg)
19895 return Res;
19896
19897 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Res);
19898 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19899 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19900 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19901
19902 if (!IsProfitable(IsNeg, false, VT))
19903 return SDValue();
19904
19905 SDValue Op0 = N->getOperand(Num: 0);
19906 SDValue Op1 =
19907 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
19908 N2: DAG.getConstant(Val: (MulAmtAbs + 1).logBase2(), DL, VT));
19909
19910 if (!IsNeg)
19911 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op1, N2: Op0);
19912 else
19913 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
19914
19915 } else {
19916 return SDValue();
19917 }
19918}
19919
19920// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19921// in combiner since we need to check SD flags and other subtarget features.
19922SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19923 DAGCombinerInfo &DCI) const {
19924 SDValue N0 = N->getOperand(Num: 0);
19925 SDValue N1 = N->getOperand(Num: 1);
19926 SDValue N2 = N->getOperand(Num: 2);
19927 SDNodeFlags Flags = N->getFlags();
19928 EVT VT = N->getValueType(ResNo: 0);
19929 SelectionDAG &DAG = DCI.DAG;
19930 unsigned Opc = N->getOpcode();
19931 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
19932 bool LegalOps = !DCI.isBeforeLegalizeOps();
19933 SDLoc Loc(N);
19934
19935 if (!isOperationLegal(Op: ISD::FMA, VT))
19936 return SDValue();
19937
19938 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19939 // since (fnmsub a b c)=-0 while c-ab=+0.
19940 if (!Flags.hasNoSignedZeros())
19941 return SDValue();
19942
19943 // (fma (fneg a) b c) => (fnmsub a b c)
19944 // (fnmsub (fneg a) b c) => (fma a b c)
19945 if (SDValue NegN0 = getCheaperNegatedExpression(Op: N0, DAG, LegalOps, OptForSize: CodeSize))
19946 return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: NegN0, N2: N1, N3: N2, Flags);
19947
19948 // (fma a (fneg b) c) => (fnmsub a b c)
19949 // (fnmsub a (fneg b) c) => (fma a b c)
19950 if (SDValue NegN1 = getCheaperNegatedExpression(Op: N1, DAG, LegalOps, OptForSize: CodeSize))
19951 return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: N0, N2: NegN1, N3: N2, Flags);
19952
19953 return SDValue();
19954}
19955
19956bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19957 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19958 if (!Subtarget.is64BitELFABI())
19959 return false;
19960
19961 // If not a tail call then no need to proceed.
19962 if (!CI->isTailCall())
19963 return false;
19964
19965 // If sibling calls have been disabled and tail-calls aren't guaranteed
19966 // there is no reason to duplicate.
19967 auto &TM = getTargetMachine();
19968 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19969 return false;
19970
19971 // Can't tail call a function called indirectly, or if it has variadic args.
19972 const Function *Callee = CI->getCalledFunction();
19973 if (!Callee || Callee->isVarArg())
19974 return false;
19975
19976 // Make sure the callee and caller calling conventions are eligible for tco.
19977 const Function *Caller = CI->getParent()->getParent();
19978 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC: Caller->getCallingConv(),
19979 CalleeCC: CI->getCallingConv()))
19980 return false;
19981
19982 // If the function is local then we have a good chance at tail-calling it
19983 return getTargetMachine().shouldAssumeDSOLocal(GV: Callee);
19984}
19985
19986bool PPCTargetLowering::
19987isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19988 const Value *Mask = AndI.getOperand(i: 1);
19989 // If the mask is suitable for andi. or andis. we should sink the and.
19990 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Mask)) {
19991 // Can't handle constants wider than 64-bits.
19992 if (CI->getBitWidth() > 64)
19993 return false;
19994 int64_t ConstVal = CI->getZExtValue();
19995 return isUInt<16>(x: ConstVal) ||
19996 (isUInt<16>(x: ConstVal >> 16) && !(ConstVal & 0xFFFF));
19997 }
19998
19999 // For non-constant masks, we can always use the record-form and.
20000 return true;
20001}
20002
20003/// getAddrModeForFlags - Based on the set of address flags, select the most
20004/// optimal instruction format to match by.
20005PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20006 // This is not a node we should be handling here.
20007 if (Flags == PPC::MOF_None)
20008 return PPC::AM_None;
20009 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20010 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DForm))
20011 if ((Flags & FlagSet) == FlagSet)
20012 return PPC::AM_DForm;
20013 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DSForm))
20014 if ((Flags & FlagSet) == FlagSet)
20015 return PPC::AM_DSForm;
20016 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DQForm))
20017 if ((Flags & FlagSet) == FlagSet)
20018 return PPC::AM_DQForm;
20019 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_PrefixDForm))
20020 if ((Flags & FlagSet) == FlagSet)
20021 return PPC::AM_PrefixDForm;
20022 // If no other forms are selected, return an X-Form as it is the most
20023 // general addressing mode.
20024 return PPC::AM_XForm;
20025}
20026
20027/// Set alignment flags based on whether or not the Frame Index is aligned.
20028/// Utilized when computing flags for address computation when selecting
20029/// load and store instructions.
20030static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20031 SelectionDAG &DAG) {
20032 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20033 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: IsAdd ? N.getOperand(i: 0) : N);
20034 if (!FI)
20035 return;
20036 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
20037 unsigned FrameIndexAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
20038 // If this is (add $FI, $S16Imm), the alignment flags are already set
20039 // based on the immediate. We just need to clear the alignment flags
20040 // if the FI alignment is weaker.
20041 if ((FrameIndexAlign % 4) != 0)
20042 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20043 if ((FrameIndexAlign % 16) != 0)
20044 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20045 // If the address is a plain FrameIndex, set alignment flags based on
20046 // FI alignment.
20047 if (!IsAdd) {
20048 if ((FrameIndexAlign % 4) == 0)
20049 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20050 if ((FrameIndexAlign % 16) == 0)
20051 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20052 }
20053}
20054
20055/// Given a node, compute flags that are used for address computation when
20056/// selecting load and store instructions. The flags computed are stored in
20057/// FlagSet. This function takes into account whether the node is a constant,
20058/// an ADD, OR, or a constant, and computes the address flags accordingly.
20059static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20060 SelectionDAG &DAG) {
20061 // Set the alignment flags for the node depending on if the node is
20062 // 4-byte or 16-byte aligned.
20063 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20064 if ((Imm & 0x3) == 0)
20065 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20066 if ((Imm & 0xf) == 0)
20067 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20068 };
20069
20070 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
20071 // All 32-bit constants can be computed as LIS + Disp.
20072 const APInt &ConstImm = CN->getAPIntValue();
20073 if (ConstImm.isSignedIntN(N: 32)) { // Flag to handle 32-bit constants.
20074 FlagSet |= PPC::MOF_AddrIsSImm32;
20075 SetAlignFlagsForImm(ConstImm.getZExtValue());
20076 setAlignFlagsForFI(N, FlagSet, DAG);
20077 }
20078 if (ConstImm.isSignedIntN(N: 34)) // Flag to handle 34-bit constants.
20079 FlagSet |= PPC::MOF_RPlusSImm34;
20080 else // Let constant materialization handle large constants.
20081 FlagSet |= PPC::MOF_NotAddNorCst;
20082 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20083 // This address can be represented as an addition of:
20084 // - Register + Imm16 (possibly a multiple of 4/16)
20085 // - Register + Imm34
20086 // - Register + PPCISD::Lo
20087 // - Register + Register
20088 // In any case, we won't have to match this as Base + Zero.
20089 SDValue RHS = N.getOperand(i: 1);
20090 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: RHS)) {
20091 const APInt &ConstImm = CN->getAPIntValue();
20092 if (ConstImm.isSignedIntN(N: 16)) {
20093 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20094 SetAlignFlagsForImm(ConstImm.getZExtValue());
20095 setAlignFlagsForFI(N, FlagSet, DAG);
20096 }
20097 if (ConstImm.isSignedIntN(N: 34))
20098 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20099 else
20100 FlagSet |= PPC::MOF_RPlusR; // Register.
20101 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(i: 1))
20102 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20103 else
20104 FlagSet |= PPC::MOF_RPlusR;
20105 } else { // The address computation is not a constant or an addition.
20106 setAlignFlagsForFI(N, FlagSet, DAG);
20107 FlagSet |= PPC::MOF_NotAddNorCst;
20108 }
20109}
20110
20111static bool isPCRelNode(SDValue N) {
20112 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20113 isValidPCRelNode<ConstantPoolSDNode>(N) ||
20114 isValidPCRelNode<GlobalAddressSDNode>(N) ||
20115 isValidPCRelNode<JumpTableSDNode>(N) ||
20116 isValidPCRelNode<BlockAddressSDNode>(N));
20117}
20118
20119/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20120/// the address flags of the load/store instruction that is to be matched.
20121unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20122 SelectionDAG &DAG) const {
20123 unsigned FlagSet = PPC::MOF_None;
20124
20125 // Compute subtarget flags.
20126 if (!Subtarget.hasP9Vector())
20127 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20128 else
20129 FlagSet |= PPC::MOF_SubtargetP9;
20130
20131 if (Subtarget.hasPrefixInstrs())
20132 FlagSet |= PPC::MOF_SubtargetP10;
20133
20134 if (Subtarget.hasSPE())
20135 FlagSet |= PPC::MOF_SubtargetSPE;
20136
20137 // Check if we have a PCRel node and return early.
20138 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20139 return FlagSet;
20140
20141 // If the node is the paired load/store intrinsics, compute flags for
20142 // address computation and return early.
20143 unsigned ParentOp = Parent->getOpcode();
20144 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20145 (ParentOp == ISD::INTRINSIC_VOID))) {
20146 unsigned ID = Parent->getConstantOperandVal(Num: 1);
20147 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20148 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20149 ? Parent->getOperand(Num: 2)
20150 : Parent->getOperand(Num: 3);
20151 computeFlagsForAddressComputation(N: IntrinOp, FlagSet, DAG);
20152 FlagSet |= PPC::MOF_Vector;
20153 return FlagSet;
20154 }
20155 }
20156
20157 // Mark this as something we don't want to handle here if it is atomic
20158 // or pre-increment instruction.
20159 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Val: Parent))
20160 if (LSB->isIndexed())
20161 return PPC::MOF_None;
20162
20163 // Compute in-memory type flags. This is based on if there are scalars,
20164 // floats or vectors.
20165 const MemSDNode *MN = dyn_cast<MemSDNode>(Val: Parent);
20166 assert(MN && "Parent should be a MemSDNode!");
20167 EVT MemVT = MN->getMemoryVT();
20168 unsigned Size = MemVT.getSizeInBits();
20169 if (MemVT.isScalarInteger()) {
20170 assert(Size <= 128 &&
20171 "Not expecting scalar integers larger than 16 bytes!");
20172 if (Size < 32)
20173 FlagSet |= PPC::MOF_SubWordInt;
20174 else if (Size == 32)
20175 FlagSet |= PPC::MOF_WordInt;
20176 else
20177 FlagSet |= PPC::MOF_DoubleWordInt;
20178 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20179 if (Size == 128)
20180 FlagSet |= PPC::MOF_Vector;
20181 else if (Size == 256) {
20182 assert(Subtarget.pairedVectorMemops() &&
20183 "256-bit vectors are only available when paired vector memops is "
20184 "enabled!");
20185 FlagSet |= PPC::MOF_Vector;
20186 } else
20187 llvm_unreachable("Not expecting illegal vectors!");
20188 } else { // Floating point type: can be scalar, f128 or vector types.
20189 if (Size == 32 || Size == 64)
20190 FlagSet |= PPC::MOF_ScalarFloat;
20191 else if (MemVT == MVT::f128 || MemVT.isVector())
20192 FlagSet |= PPC::MOF_Vector;
20193 else
20194 llvm_unreachable("Not expecting illegal scalar floats!");
20195 }
20196
20197 // Compute flags for address computation.
20198 computeFlagsForAddressComputation(N, FlagSet, DAG);
20199
20200 // Compute type extension flags.
20201 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Val: Parent)) {
20202 switch (LN->getExtensionType()) {
20203 case ISD::SEXTLOAD:
20204 FlagSet |= PPC::MOF_SExt;
20205 break;
20206 case ISD::EXTLOAD:
20207 case ISD::ZEXTLOAD:
20208 FlagSet |= PPC::MOF_ZExt;
20209 break;
20210 case ISD::NON_EXTLOAD:
20211 FlagSet |= PPC::MOF_NoExt;
20212 break;
20213 }
20214 } else
20215 FlagSet |= PPC::MOF_NoExt;
20216
20217 // For integers, no extension is the same as zero extension.
20218 // We set the extension mode to zero extension so we don't have
20219 // to add separate entries in AddrModesMap for loads and stores.
20220 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20221 FlagSet |= PPC::MOF_ZExt;
20222 FlagSet &= ~PPC::MOF_NoExt;
20223 }
20224
20225 // If we don't have prefixed instructions, 34-bit constants should be
20226 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20227 bool IsNonP1034BitConst =
20228 ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
20229 FlagSet) == PPC::MOF_RPlusSImm34;
20230 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20231 IsNonP1034BitConst)
20232 FlagSet |= PPC::MOF_NotAddNorCst;
20233
20234 return FlagSet;
20235}
20236
20237/// SelectForceXFormMode - Given the specified address, force it to be
20238/// represented as an indexed [r+r] operation (an XForm instruction).
20239PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
20240 SDValue &Base,
20241 SelectionDAG &DAG) const {
20242
20243 PPC::AddrMode Mode = PPC::AM_XForm;
20244 int16_t ForceXFormImm = 0;
20245 if (provablyDisjointOr(DAG, N) &&
20246 !isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: ForceXFormImm)) {
20247 Disp = N.getOperand(i: 0);
20248 Base = N.getOperand(i: 1);
20249 return Mode;
20250 }
20251
20252 // If the address is the result of an add, we will utilize the fact that the
20253 // address calculation includes an implicit add. However, we can reduce
20254 // register pressure if we do not materialize a constant just for use as the
20255 // index register. We only get rid of the add if it is not an add of a
20256 // value and a 16-bit signed constant and both have a single use.
20257 if (N.getOpcode() == ISD::ADD &&
20258 (!isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: ForceXFormImm) ||
20259 !N.getOperand(i: 1).hasOneUse() || !N.getOperand(i: 0).hasOneUse())) {
20260 Disp = N.getOperand(i: 0);
20261 Base = N.getOperand(i: 1);
20262 return Mode;
20263 }
20264
20265 // Otherwise, use R0 as the base register.
20266 Disp = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20267 VT: N.getValueType());
20268 Base = N;
20269
20270 return Mode;
20271}
20272
20273bool PPCTargetLowering::splitValueIntoRegisterParts(
20274 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20275 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20276 EVT ValVT = Val.getValueType();
20277 // If we are splitting a scalar integer into f64 parts (i.e. so they
20278 // can be placed into VFRC registers), we need to zero extend and
20279 // bitcast the values. This will ensure the value is placed into a
20280 // VSR using direct moves or stack operations as needed.
20281 if (PartVT == MVT::f64 &&
20282 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20283 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
20284 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Val);
20285 Parts[0] = Val;
20286 return true;
20287 }
20288 return false;
20289}
20290
20291SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20292 SelectionDAG &DAG) const {
20293 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20294 TargetLowering::CallLoweringInfo CLI(DAG);
20295 EVT RetVT = Op.getValueType();
20296 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
20297 SDValue Callee =
20298 DAG.getExternalSymbol(Sym: LibCallName, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
20299 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: false);
20300 TargetLowering::ArgListTy Args;
20301 for (const SDValue &N : Op->op_values()) {
20302 EVT ArgVT = N.getValueType();
20303 Type *ArgTy = ArgVT.getTypeForEVT(Context&: *DAG.getContext());
20304 TargetLowering::ArgListEntry Entry(N, ArgTy);
20305 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(Ty: ArgTy, IsSigned: SignExtend);
20306 Entry.IsZExt = !Entry.IsSExt;
20307 Args.push_back(x: Entry);
20308 }
20309
20310 SDValue InChain = DAG.getEntryNode();
20311 SDValue TCChain = InChain;
20312 const Function &F = DAG.getMachineFunction().getFunction();
20313 bool isTailCall =
20314 TLI.isInTailCallPosition(DAG, Node: Op.getNode(), Chain&: TCChain) &&
20315 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20316 if (isTailCall)
20317 InChain = TCChain;
20318 CLI.setDebugLoc(SDLoc(Op))
20319 .setChain(InChain)
20320 .setLibCallee(CC: CallingConv::C, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
20321 .setTailCall(isTailCall)
20322 .setSExtResult(SignExtend)
20323 .setZExtResult(!SignExtend)
20324 .setIsPostTypeLegalization(true);
20325 return TLI.LowerCallTo(CLI).first;
20326}
20327
20328SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20329 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20330 SelectionDAG &DAG) const {
20331 if (Op.getValueType() == MVT::f32)
20332 return lowerToLibCall(LibCallName: LibCallFloatName, Op, DAG);
20333
20334 if (Op.getValueType() == MVT::f64)
20335 return lowerToLibCall(LibCallName: LibCallDoubleName, Op, DAG);
20336
20337 return SDValue();
20338}
20339
20340bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20341 SDNodeFlags Flags = Op.getNode()->getFlags();
20342 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20343 Flags.hasNoNaNs() && Flags.hasNoInfs();
20344}
20345
20346bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20347 return Op.getNode()->getFlags().hasApproximateFuncs();
20348}
20349
20350bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20351 return getTargetMachine().Options.PPCGenScalarMASSEntries;
20352}
20353
20354SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20355 const char *LibCallFloatName,
20356 const char *LibCallDoubleNameFinite,
20357 const char *LibCallFloatNameFinite,
20358 SDValue Op,
20359 SelectionDAG &DAG) const {
20360 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20361 return SDValue();
20362
20363 if (!isLowringToMASSFiniteSafe(Op))
20364 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20365 DAG);
20366
20367 return lowerLibCallBasedOnType(LibCallFloatName: LibCallFloatNameFinite,
20368 LibCallDoubleName: LibCallDoubleNameFinite, Op, DAG);
20369}
20370
20371SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20372 return lowerLibCallBase(LibCallDoubleName: "__xl_pow", LibCallFloatName: "__xl_powf", LibCallDoubleNameFinite: "__xl_pow_finite",
20373 LibCallFloatNameFinite: "__xl_powf_finite", Op, DAG);
20374}
20375
20376SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20377 return lowerLibCallBase(LibCallDoubleName: "__xl_sin", LibCallFloatName: "__xl_sinf", LibCallDoubleNameFinite: "__xl_sin_finite",
20378 LibCallFloatNameFinite: "__xl_sinf_finite", Op, DAG);
20379}
20380
20381SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20382 return lowerLibCallBase(LibCallDoubleName: "__xl_cos", LibCallFloatName: "__xl_cosf", LibCallDoubleNameFinite: "__xl_cos_finite",
20383 LibCallFloatNameFinite: "__xl_cosf_finite", Op, DAG);
20384}
20385
20386SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20387 return lowerLibCallBase(LibCallDoubleName: "__xl_log", LibCallFloatName: "__xl_logf", LibCallDoubleNameFinite: "__xl_log_finite",
20388 LibCallFloatNameFinite: "__xl_logf_finite", Op, DAG);
20389}
20390
20391SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20392 return lowerLibCallBase(LibCallDoubleName: "__xl_log10", LibCallFloatName: "__xl_log10f", LibCallDoubleNameFinite: "__xl_log10_finite",
20393 LibCallFloatNameFinite: "__xl_log10f_finite", Op, DAG);
20394}
20395
20396SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20397 return lowerLibCallBase(LibCallDoubleName: "__xl_exp", LibCallFloatName: "__xl_expf", LibCallDoubleNameFinite: "__xl_exp_finite",
20398 LibCallFloatNameFinite: "__xl_expf_finite", Op, DAG);
20399}
20400
20401// If we happen to match to an aligned D-Form, check if the Frame Index is
20402// adequately aligned. If it is not, reset the mode to match to X-Form.
20403static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20404 PPC::AddrMode &Mode) {
20405 if (!isa<FrameIndexSDNode>(Val: N))
20406 return;
20407 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20408 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
20409 Mode = PPC::AM_XForm;
20410}
20411
20412/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20413/// compute the address flags of the node, get the optimal address mode based
20414/// on the flags, and set the Base and Disp based on the address mode.
20415PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
20416 SDValue N, SDValue &Disp,
20417 SDValue &Base,
20418 SelectionDAG &DAG,
20419 MaybeAlign Align) const {
20420 SDLoc DL(Parent);
20421
20422 // Compute the address flags.
20423 unsigned Flags = computeMOFlags(Parent, N, DAG);
20424
20425 // Get the optimal address mode based on the Flags.
20426 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20427
20428 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20429 // Select an X-Form load if it is not.
20430 setXFormForUnalignedFI(N, Flags, Mode);
20431
20432 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20433 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20434 assert(Subtarget.isUsingPCRelativeCalls() &&
20435 "Must be using PC-Relative calls when a valid PC-Relative node is "
20436 "present!");
20437 Mode = PPC::AM_PCRel;
20438 }
20439
20440 // Set Base and Disp accordingly depending on the address mode.
20441 switch (Mode) {
20442 case PPC::AM_DForm:
20443 case PPC::AM_DSForm:
20444 case PPC::AM_DQForm: {
20445 // This is a register plus a 16-bit immediate. The base will be the
20446 // register and the displacement will be the immediate unless it
20447 // isn't sufficiently aligned.
20448 if (Flags & PPC::MOF_RPlusSImm16) {
20449 SDValue Op0 = N.getOperand(i: 0);
20450 SDValue Op1 = N.getOperand(i: 1);
20451 int16_t Imm = Op1->getAsZExtVal();
20452 if (!Align || isAligned(Lhs: *Align, SizeInBytes: Imm)) {
20453 Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: N.getValueType());
20454 Base = Op0;
20455 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op0)) {
20456 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20457 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
20458 }
20459 break;
20460 }
20461 }
20462 // This is a register plus the @lo relocation. The base is the register
20463 // and the displacement is the global address.
20464 else if (Flags & PPC::MOF_RPlusLo) {
20465 Disp = N.getOperand(i: 1).getOperand(i: 0); // The global address.
20466 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
20467 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
20468 Disp.getOpcode() == ISD::TargetConstantPool ||
20469 Disp.getOpcode() == ISD::TargetJumpTable);
20470 Base = N.getOperand(i: 0);
20471 break;
20472 }
20473 // This is a constant address at most 32 bits. The base will be
20474 // zero or load-immediate-shifted and the displacement will be
20475 // the low 16 bits of the address.
20476 else if (Flags & PPC::MOF_AddrIsSImm32) {
20477 auto *CN = cast<ConstantSDNode>(Val&: N);
20478 EVT CNType = CN->getValueType(ResNo: 0);
20479 uint64_t CNImm = CN->getZExtValue();
20480 // If this address fits entirely in a 16-bit sext immediate field, codegen
20481 // this as "d, 0".
20482 int16_t Imm;
20483 if (isIntS16Immediate(N: CN, Imm) && (!Align || isAligned(Lhs: *Align, SizeInBytes: Imm))) {
20484 Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: CNType);
20485 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20486 VT: CNType);
20487 break;
20488 }
20489 // Handle 32-bit sext immediate with LIS + Addr mode.
20490 if ((CNType == MVT::i32 || isInt<32>(x: CNImm)) &&
20491 (!Align || isAligned(Lhs: *Align, SizeInBytes: CNImm))) {
20492 int32_t Addr = (int32_t)CNImm;
20493 // Otherwise, break this down into LIS + Disp.
20494 Disp = DAG.getSignedTargetConstant(Val: (int16_t)Addr, DL, VT: MVT::i32);
20495 Base = DAG.getSignedTargetConstant(Val: (Addr - (int16_t)Addr) >> 16, DL,
20496 VT: MVT::i32);
20497 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20498 Base = SDValue(DAG.getMachineNode(Opcode: LIS, dl: DL, VT: CNType, Op1: Base), 0);
20499 break;
20500 }
20501 }
20502 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20503 Disp = DAG.getTargetConstant(Val: 0, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
20504 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
20505 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20506 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
20507 } else
20508 Base = N;
20509 break;
20510 }
20511 case PPC::AM_PrefixDForm: {
20512 int64_t Imm34 = 0;
20513 unsigned Opcode = N.getOpcode();
20514 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20515 (isIntS34Immediate(Op: N.getOperand(i: 1), Imm&: Imm34))) {
20516 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20517 Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
20518 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
20519 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
20520 else
20521 Base = N.getOperand(i: 0);
20522 } else if (isIntS34Immediate(Op: N, Imm&: Imm34)) {
20523 // The address is a 34-bit signed immediate.
20524 Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
20525 Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
20526 }
20527 break;
20528 }
20529 case PPC::AM_PCRel: {
20530 // When selecting PC-Relative instructions, "Base" is not utilized as
20531 // we select the address as [PC+imm].
20532 Disp = N;
20533 break;
20534 }
20535 case PPC::AM_None:
20536 break;
20537 default: { // By default, X-Form is always available to be selected.
20538 // When a frame index is not aligned, we also match by XForm.
20539 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N);
20540 Base = FI ? N : N.getOperand(i: 1);
20541 Disp = FI ? DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20542 VT: N.getValueType())
20543 : N.getOperand(i: 0);
20544 break;
20545 }
20546 }
20547 return Mode;
20548}
20549
20550CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
20551 bool Return,
20552 bool IsVarArg) const {
20553 switch (CC) {
20554 case CallingConv::Cold:
20555 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20556 default:
20557 return CC_PPC64_ELF;
20558 }
20559}
20560
20561bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
20562 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20563}
20564
20565TargetLowering::AtomicExpansionKind
20566PPCTargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
20567 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20568 if (shouldInlineQuadwordAtomics() && Size == 128)
20569 return AtomicExpansionKind::MaskedIntrinsic;
20570
20571 switch (AI->getOperation()) {
20572 case AtomicRMWInst::UIncWrap:
20573 case AtomicRMWInst::UDecWrap:
20574 case AtomicRMWInst::USubCond:
20575 case AtomicRMWInst::USubSat:
20576 return AtomicExpansionKind::CmpXChg;
20577 default:
20578 return TargetLowering::shouldExpandAtomicRMWInIR(RMW: AI);
20579 }
20580
20581 llvm_unreachable("unreachable atomicrmw operation");
20582}
20583
20584TargetLowering::AtomicExpansionKind
20585PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(
20586 const AtomicCmpXchgInst *AI) const {
20587 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
20588 if (shouldInlineQuadwordAtomics() && Size == 128)
20589 return AtomicExpansionKind::MaskedIntrinsic;
20590 return AtomicExpansionKind::LLSC;
20591}
20592
20593static Intrinsic::ID
20594getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
20595 switch (BinOp) {
20596 default:
20597 llvm_unreachable("Unexpected AtomicRMW BinOp");
20598 case AtomicRMWInst::Xchg:
20599 return Intrinsic::ppc_atomicrmw_xchg_i128;
20600 case AtomicRMWInst::Add:
20601 return Intrinsic::ppc_atomicrmw_add_i128;
20602 case AtomicRMWInst::Sub:
20603 return Intrinsic::ppc_atomicrmw_sub_i128;
20604 case AtomicRMWInst::And:
20605 return Intrinsic::ppc_atomicrmw_and_i128;
20606 case AtomicRMWInst::Or:
20607 return Intrinsic::ppc_atomicrmw_or_i128;
20608 case AtomicRMWInst::Xor:
20609 return Intrinsic::ppc_atomicrmw_xor_i128;
20610 case AtomicRMWInst::Nand:
20611 return Intrinsic::ppc_atomicrmw_nand_i128;
20612 }
20613}
20614
20615Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
20616 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20617 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20618 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20619 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20620 Type *ValTy = Incr->getType();
20621 assert(ValTy->getPrimitiveSizeInBits() == 128);
20622 Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
20623 Value *IncrLo = Builder.CreateTrunc(V: Incr, DestTy: Int64Ty, Name: "incr_lo");
20624 Value *IncrHi =
20625 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Incr, RHS: 64), DestTy: Int64Ty, Name: "incr_hi");
20626 Value *LoHi = Builder.CreateIntrinsic(
20627 ID: getIntrinsicForAtomicRMWBinOp128(BinOp: AI->getOperation()), Types: {},
20628 Args: {AlignedAddr, IncrLo, IncrHi});
20629 Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: 0, Name: "lo");
20630 Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: 1, Name: "hi");
20631 Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
20632 Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
20633 return Builder.CreateOr(
20634 LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: 64)), Name: "val64");
20635}
20636
20637Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
20638 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20639 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20640 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20641 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20642 Type *ValTy = CmpVal->getType();
20643 assert(ValTy->getPrimitiveSizeInBits() == 128);
20644 Function *IntCmpXchg =
20645 Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::ppc_cmpxchg_i128);
20646 Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
20647 Value *CmpLo = Builder.CreateTrunc(V: CmpVal, DestTy: Int64Ty, Name: "cmp_lo");
20648 Value *CmpHi =
20649 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: CmpVal, RHS: 64), DestTy: Int64Ty, Name: "cmp_hi");
20650 Value *NewLo = Builder.CreateTrunc(V: NewVal, DestTy: Int64Ty, Name: "new_lo");
20651 Value *NewHi =
20652 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: NewVal, RHS: 64), DestTy: Int64Ty, Name: "new_hi");
20653 emitLeadingFence(Builder, Inst: CI, Ord);
20654 Value *LoHi =
20655 Builder.CreateCall(Callee: IntCmpXchg, Args: {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20656 emitTrailingFence(Builder, Inst: CI, Ord);
20657 Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: 0, Name: "lo");
20658 Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: 1, Name: "hi");
20659 Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
20660 Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
20661 return Builder.CreateOr(
20662 LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: 64)), Name: "val64");
20663}
20664
20665bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
20666 return Subtarget.useCRBits();
20667}
20668