1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
14#include "MCTargetDesc/PPCMCTargetDesc.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
21#include "PPCMachineFunctionInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/CodeGen/CallingConvLower.h"
38#include "llvm/CodeGen/ISDOpcodes.h"
39#include "llvm/CodeGen/LivePhysRegs.h"
40#include "llvm/CodeGen/MachineBasicBlock.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineFunction.h"
43#include "llvm/CodeGen/MachineInstr.h"
44#include "llvm/CodeGen/MachineInstrBuilder.h"
45#include "llvm/CodeGen/MachineJumpTableInfo.h"
46#include "llvm/CodeGen/MachineLoopInfo.h"
47#include "llvm/CodeGen/MachineMemOperand.h"
48#include "llvm/CodeGen/MachineModuleInfo.h"
49#include "llvm/CodeGen/MachineOperand.h"
50#include "llvm/CodeGen/MachineRegisterInfo.h"
51#include "llvm/CodeGen/SelectionDAG.h"
52#include "llvm/CodeGen/SelectionDAGNodes.h"
53#include "llvm/CodeGen/TargetInstrInfo.h"
54#include "llvm/CodeGen/TargetLowering.h"
55#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56#include "llvm/CodeGen/TargetRegisterInfo.h"
57#include "llvm/CodeGen/ValueTypes.h"
58#include "llvm/CodeGenTypes/MachineValueType.h"
59#include "llvm/IR/CallingConv.h"
60#include "llvm/IR/Constant.h"
61#include "llvm/IR/Constants.h"
62#include "llvm/IR/DataLayout.h"
63#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/DerivedTypes.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/GlobalValue.h"
67#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Instructions.h"
69#include "llvm/IR/Intrinsics.h"
70#include "llvm/IR/IntrinsicsPowerPC.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/Use.h"
74#include "llvm/IR/Value.h"
75#include "llvm/MC/MCContext.h"
76#include "llvm/MC/MCExpr.h"
77#include "llvm/MC/MCSectionXCOFF.h"
78#include "llvm/MC/MCSymbolXCOFF.h"
79#include "llvm/Support/AtomicOrdering.h"
80#include "llvm/Support/BranchProbability.h"
81#include "llvm/Support/Casting.h"
82#include "llvm/Support/CodeGen.h"
83#include "llvm/Support/CommandLine.h"
84#include "llvm/Support/Compiler.h"
85#include "llvm/Support/Debug.h"
86#include "llvm/Support/ErrorHandling.h"
87#include "llvm/Support/Format.h"
88#include "llvm/Support/KnownBits.h"
89#include "llvm/Support/MathExtras.h"
90#include "llvm/Support/raw_ostream.h"
91#include "llvm/Target/TargetMachine.h"
92#include "llvm/Target/TargetOptions.h"
93#include <algorithm>
94#include <cassert>
95#include <cstdint>
96#include <iterator>
97#include <list>
98#include <optional>
99#include <utility>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "ppc-lowering"
105
106static cl::opt<bool> DisableP10StoreForward(
107 "disable-p10-store-forward",
108 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
109 cl::init(Val: false));
110
111static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
112cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
115cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
118cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
119
120static cl::opt<bool> DisableSCO("disable-ppc-sco",
121cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
122
123static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
124cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
125
126static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
127cl::desc("use absolute jump tables on ppc"), cl::Hidden);
128
129static cl::opt<bool>
130 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
131 cl::desc("disable vector permute decomposition"),
132 cl::init(Val: true), cl::Hidden);
133
134cl::opt<bool> DisableAutoPairedVecSt(
135 "disable-auto-paired-vec-st",
136 cl::desc("disable automatically generated 32byte paired vector stores"),
137 cl::init(Val: true), cl::Hidden);
138
139static cl::opt<unsigned> PPCMinimumJumpTableEntries(
140 "ppc-min-jump-table-entries", cl::init(Val: 64), cl::Hidden,
141 cl::desc("Set minimum number of entries to use a jump table on PPC"));
142
143static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
144 "ppc-gather-alias-max-depth", cl::init(Val: 18), cl::Hidden,
145 cl::desc("max depth when checking alias info in GatherAllAliases()"));
146
147static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
148 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(Val: 1), cl::Hidden,
149 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
150 "function to use initial-exec"));
151
152STATISTIC(NumTailCalls, "Number of tail calls");
153STATISTIC(NumSiblingCalls, "Number of sibling calls");
154STATISTIC(ShufflesHandledWithVPERM,
155 "Number of shuffles lowered to a VPERM or XXPERM");
156STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
157
158static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
159
160static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
161
162static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
163
164// A faster local-[exec|dynamic] TLS access sequence (enabled with the
165// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
166// variables; consistent with the IBM XL compiler, we apply a max size of
167// slightly under 32KB.
168constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
169
170// FIXME: Remove this once the bug has been fixed!
171extern cl::opt<bool> ANDIGlueBug;
172
173PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
174 const PPCSubtarget &STI)
175 : TargetLowering(TM), Subtarget(STI) {
176 // Initialize map that relates the PPC addressing modes to the computed flags
177 // of a load/store instruction. The map is used to determine the optimal
178 // addressing mode when selecting load and stores.
179 initializeAddrModeMap();
180 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
181 // arguments are at least 4/8 bytes aligned.
182 bool isPPC64 = Subtarget.isPPC64();
183 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
184 const MVT RegVT = Subtarget.getScalarIntVT();
185
186 // Set up the register classes.
187 addRegisterClass(VT: MVT::i32, RC: &PPC::GPRCRegClass);
188 if (!useSoftFloat()) {
189 if (hasSPE()) {
190 addRegisterClass(VT: MVT::f32, RC: &PPC::GPRCRegClass);
191 // EFPU2 APU only supports f32
192 if (!Subtarget.hasEFPU2())
193 addRegisterClass(VT: MVT::f64, RC: &PPC::SPERCRegClass);
194 } else {
195 addRegisterClass(VT: MVT::f32, RC: &PPC::F4RCRegClass);
196 addRegisterClass(VT: MVT::f64, RC: &PPC::F8RCRegClass);
197 }
198 }
199
200 setOperationAction(Op: ISD::UADDO, VT: RegVT, Action: Custom);
201 setOperationAction(Op: ISD::USUBO, VT: RegVT, Action: Custom);
202
203 // PowerPC uses addo_carry,subo_carry to propagate carry.
204 setOperationAction(Op: ISD::UADDO_CARRY, VT: RegVT, Action: Custom);
205 setOperationAction(Op: ISD::USUBO_CARRY, VT: RegVT, Action: Custom);
206
207 // On P10, the default lowering generates better code using the
208 // setbc instruction.
209 if (!Subtarget.hasP10Vector()) {
210 setOperationAction(Op: ISD::SSUBO, VT: MVT::i32, Action: Custom);
211 if (isPPC64)
212 setOperationAction(Op: ISD::SSUBO, VT: MVT::i64, Action: Custom);
213 }
214
215 // Match BITREVERSE to customized fast code sequence in the td file.
216 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Legal);
217 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i64, Action: Legal);
218
219 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
220 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i32, Action: Custom);
221
222 // Custom lower inline assembly to check for special registers.
223 setOperationAction(Op: ISD::INLINEASM, VT: MVT::Other, Action: Custom);
224 setOperationAction(Op: ISD::INLINEASM_BR, VT: MVT::Other, Action: Custom);
225
226 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
227 for (MVT VT : MVT::integer_valuetypes()) {
228 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
229 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
230 }
231
232 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
233 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f128, Action: Expand);
234
235 if (Subtarget.isISA3_0()) {
236 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Legal);
237 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
238 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
239 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
240 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
241 } else {
242 // No extending loads from f16 or HW conversions back and forth.
243 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
244 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f128, Action: Expand);
245 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
246 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
247 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
248 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
249 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
250 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
251 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
252 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
253 }
254
255 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
256
257 // PowerPC has pre-inc load and store's.
258 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
259 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
260 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
261 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
262 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
263 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
264 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
265 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
266 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
267 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
268 if (!Subtarget.hasSPE()) {
269 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
270 setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
271 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
272 setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
273 }
274
275 if (Subtarget.useCRBits()) {
276 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
277
278 if (isPPC64 || Subtarget.hasFPCVT()) {
279 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Promote);
280 AddPromotedToType(Opc: ISD::STRICT_SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
281 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Promote);
282 AddPromotedToType(Opc: ISD::STRICT_UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
283
284 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Promote);
285 AddPromotedToType(Opc: ISD::SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
286 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Promote);
287 AddPromotedToType(Opc: ISD::UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
288
289 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i1, Action: Promote);
290 AddPromotedToType(Opc: ISD::STRICT_FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
291 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i1, Action: Promote);
292 AddPromotedToType(Opc: ISD::STRICT_FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
293
294 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i1, Action: Promote);
295 AddPromotedToType(Opc: ISD::FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
296 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i1, Action: Promote);
297 AddPromotedToType(Opc: ISD::FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
298 } else {
299 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Custom);
300 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Custom);
301 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Custom);
302 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Custom);
303 }
304
305 // PowerPC does not support direct load/store of condition registers.
306 setOperationAction(Op: ISD::LOAD, VT: MVT::i1, Action: Custom);
307 setOperationAction(Op: ISD::STORE, VT: MVT::i1, Action: Custom);
308
309 // FIXME: Remove this once the ANDI glue bug is fixed:
310 if (ANDIGlueBug)
311 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::i1, Action: Custom);
312
313 for (MVT VT : MVT::integer_valuetypes()) {
314 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
315 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
316 setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
317 }
318
319 addRegisterClass(VT: MVT::i1, RC: &PPC::CRBITRCRegClass);
320 }
321
322 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
323 // PPC (the libcall is not available).
324 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
325 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
326 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
327 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
328
329 // We do not currently implement these libm ops for PowerPC.
330 setOperationAction(Op: ISD::FFLOOR, VT: MVT::ppcf128, Action: Expand);
331 setOperationAction(Op: ISD::FCEIL, VT: MVT::ppcf128, Action: Expand);
332 setOperationAction(Op: ISD::FTRUNC, VT: MVT::ppcf128, Action: Expand);
333 setOperationAction(Op: ISD::FRINT, VT: MVT::ppcf128, Action: Expand);
334 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::ppcf128, Action: Expand);
335 setOperationAction(Op: ISD::FREM, VT: MVT::ppcf128, Action: Expand);
336
337 // PowerPC has no SREM/UREM instructions unless we are on P9
338 // On P9 we may use a hardware instruction to compute the remainder.
339 // When the result of both the remainder and the division is required it is
340 // more efficient to compute the remainder from the result of the division
341 // rather than use the remainder instruction. The instructions are legalized
342 // directly because the DivRemPairsPass performs the transformation at the IR
343 // level.
344 if (Subtarget.isISA3_0()) {
345 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
346 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
347 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
348 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
349 } else {
350 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Expand);
351 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Expand);
352 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Expand);
353 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Expand);
354 }
355
356 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
357 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
358 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
359 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
360 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
361 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
362 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
363 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
364 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
365
366 // Handle constrained floating-point operations of scalar.
367 // TODO: Handle SPE specific operation.
368 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f32, Action: Legal);
369 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f32, Action: Legal);
370 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f32, Action: Legal);
371 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f32, Action: Legal);
372 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
373
374 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f64, Action: Legal);
375 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f64, Action: Legal);
376 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f64, Action: Legal);
377 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f64, Action: Legal);
378
379 if (!Subtarget.hasSPE()) {
380 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f32, Action: Legal);
381 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f64, Action: Legal);
382 }
383
384 if (Subtarget.hasVSX()) {
385 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f32, Action: Legal);
386 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f64, Action: Legal);
387 }
388
389 if (Subtarget.hasFSQRT()) {
390 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f32, Action: Legal);
391 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f64, Action: Legal);
392 }
393
394 if (Subtarget.hasFPRND()) {
395 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f32, Action: Legal);
396 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f32, Action: Legal);
397 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f32, Action: Legal);
398 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f32, Action: Legal);
399
400 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f64, Action: Legal);
401 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f64, Action: Legal);
402 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f64, Action: Legal);
403 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f64, Action: Legal);
404 }
405
406 // We don't support sin/cos/sqrt/fmod/pow
407 setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Expand);
408 setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Expand);
409 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
410 setOperationAction(Op: ISD::FREM , VT: MVT::f64, Action: Expand);
411 setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Expand);
412 setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Expand);
413 setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Expand);
414 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
415 setOperationAction(Op: ISD::FREM , VT: MVT::f32, Action: Expand);
416 setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Expand);
417
418 // MASS transformation for LLVM intrinsics with replicating fast-math flag
419 // to be consistent to PPCGenScalarMASSEntries pass
420 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
421 setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Custom);
422 setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Custom);
423 setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Custom);
424 setOperationAction(Op: ISD::FLOG, VT: MVT::f64, Action: Custom);
425 setOperationAction(Op: ISD::FLOG10, VT: MVT::f64, Action: Custom);
426 setOperationAction(Op: ISD::FEXP, VT: MVT::f64, Action: Custom);
427 setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Custom);
428 setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Custom);
429 setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Custom);
430 setOperationAction(Op: ISD::FLOG, VT: MVT::f32, Action: Custom);
431 setOperationAction(Op: ISD::FLOG10, VT: MVT::f32, Action: Custom);
432 setOperationAction(Op: ISD::FEXP, VT: MVT::f32, Action: Custom);
433 }
434
435 if (Subtarget.hasSPE()) {
436 setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Expand);
437 setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Expand);
438 } else {
439 setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Legal);
440 setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Legal);
441 setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom);
442 setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom);
443 }
444
445 if (Subtarget.hasSPE())
446 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
447
448 // If we're enabling GP optimizations, use hardware square root
449 if (!Subtarget.hasFSQRT() &&
450 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
451 Subtarget.hasFRE()))
452 setOperationAction(Op: ISD::FSQRT, VT: MVT::f64, Action: Expand);
453
454 if (!Subtarget.hasFSQRT() &&
455 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
456 Subtarget.hasFRES()))
457 setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Expand);
458
459 if (Subtarget.hasFCPSGN()) {
460 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Legal);
461 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Legal);
462 } else {
463 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Expand);
464 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Expand);
465 }
466
467 if (Subtarget.hasFPRND()) {
468 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f64, Action: Legal);
469 setOperationAction(Op: ISD::FCEIL, VT: MVT::f64, Action: Legal);
470 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f64, Action: Legal);
471 setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
472
473 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f32, Action: Legal);
474 setOperationAction(Op: ISD::FCEIL, VT: MVT::f32, Action: Legal);
475 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f32, Action: Legal);
476 setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
477 }
478
479 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
480 // instruction xxbrd to speed up scalar BSWAP64.
481 if (Subtarget.isISA3_1()) {
482 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Legal);
483 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64, Action: Legal);
484 } else {
485 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Expand);
486 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64,
487 Action: (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
488 }
489
490 // CTPOP or CTTZ were introduced in P8/P9 respectively
491 if (Subtarget.isISA3_0()) {
492 setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Legal);
493 setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Legal);
494 } else {
495 setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Expand);
496 setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Expand);
497 }
498
499 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Legal);
501 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Legal);
502 } else {
503 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Expand);
504 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Expand);
505 }
506
507 // PowerPC does not have ROTR
508 setOperationAction(Op: ISD::ROTR, VT: MVT::i32 , Action: Expand);
509 setOperationAction(Op: ISD::ROTR, VT: MVT::i64 , Action: Expand);
510
511 if (!Subtarget.useCRBits()) {
512 // PowerPC does not have Select
513 setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Expand);
514 setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Expand);
515 setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Expand);
516 setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Expand);
517 }
518
519 // PowerPC wants to turn select_cc of FP into fsel when possible.
520 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Custom);
521 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Custom);
522
523 // PowerPC wants to optimize integer setcc a bit
524 if (!Subtarget.useCRBits())
525 setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Custom);
526
527 if (Subtarget.hasFPU()) {
528 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
529 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
530 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Legal);
531
532 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
533 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
534 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Legal);
535 }
536
537 // PowerPC does not have BRCOND which requires SetCC
538 if (!Subtarget.useCRBits())
539 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
540
541 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
542
543 if (Subtarget.hasSPE()) {
544 // SPE has built-in conversions
545 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Legal);
546 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Legal);
547 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Legal);
548 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Legal);
549 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Legal);
550 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Legal);
551
552 // SPE supports signaling compare of f32/f64.
553 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
554 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
555 } else {
556 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
558 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
559
560 // PowerPC does not have [U|S]INT_TO_FP
561 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Expand);
562 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Expand);
563 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Expand);
564 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Expand);
565 }
566
567 if (Subtarget.hasDirectMove() && isPPC64) {
568 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Legal);
569 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Legal);
570 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Legal);
571 setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Legal);
572 if (TM.Options.UnsafeFPMath) {
573 setOperationAction(Op: ISD::LRINT, VT: MVT::f64, Action: Legal);
574 setOperationAction(Op: ISD::LRINT, VT: MVT::f32, Action: Legal);
575 setOperationAction(Op: ISD::LLRINT, VT: MVT::f64, Action: Legal);
576 setOperationAction(Op: ISD::LLRINT, VT: MVT::f32, Action: Legal);
577 setOperationAction(Op: ISD::LROUND, VT: MVT::f64, Action: Legal);
578 setOperationAction(Op: ISD::LROUND, VT: MVT::f32, Action: Legal);
579 setOperationAction(Op: ISD::LLROUND, VT: MVT::f64, Action: Legal);
580 setOperationAction(Op: ISD::LLROUND, VT: MVT::f32, Action: Legal);
581 }
582 } else {
583 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Expand);
584 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Expand);
585 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Expand);
586 setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Expand);
587 }
588
589 // We cannot sextinreg(i1). Expand to shifts.
590 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
591
592 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
593 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
594 // support continuation, user-level threading, and etc.. As a result, no
595 // other SjLj exception interfaces are implemented and please don't build
596 // your own exception handling based on them.
597 // LLVM/Clang supports zero-cost DWARF exception handling.
598 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
599 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
600
601 // We want to legalize GlobalAddress and ConstantPool nodes into the
602 // appropriate instructions to materialize the address.
603 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
604 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i32, Action: Custom);
605 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i32, Action: Custom);
606 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i32, Action: Custom);
607 setOperationAction(Op: ISD::JumpTable, VT: MVT::i32, Action: Custom);
608 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i64, Action: Custom);
609 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i64, Action: Custom);
610 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i64, Action: Custom);
611 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i64, Action: Custom);
612 setOperationAction(Op: ISD::JumpTable, VT: MVT::i64, Action: Custom);
613
614 // TRAP is legal.
615 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
616
617 // TRAMPOLINE is custom lowered.
618 setOperationAction(Op: ISD::INIT_TRAMPOLINE, VT: MVT::Other, Action: Custom);
619 setOperationAction(Op: ISD::ADJUST_TRAMPOLINE, VT: MVT::Other, Action: Custom);
620
621 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
622 setOperationAction(Op: ISD::VASTART , VT: MVT::Other, Action: Custom);
623
624 if (Subtarget.is64BitELFABI()) {
625 // VAARG always uses double-word chunks, so promote anything smaller.
626 setOperationAction(Op: ISD::VAARG, VT: MVT::i1, Action: Promote);
627 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i1, DestVT: MVT::i64);
628 setOperationAction(Op: ISD::VAARG, VT: MVT::i8, Action: Promote);
629 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i8, DestVT: MVT::i64);
630 setOperationAction(Op: ISD::VAARG, VT: MVT::i16, Action: Promote);
631 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i16, DestVT: MVT::i64);
632 setOperationAction(Op: ISD::VAARG, VT: MVT::i32, Action: Promote);
633 AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i32, DestVT: MVT::i64);
634 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
635 } else if (Subtarget.is32BitELFABI()) {
636 // VAARG is custom lowered with the 32-bit SVR4 ABI.
637 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
638 setOperationAction(Op: ISD::VAARG, VT: MVT::i64, Action: Custom);
639 } else
640 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
641
642 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
643 if (Subtarget.is32BitELFABI())
644 setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Custom);
645 else
646 setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Expand);
647
648 // Use the default implementation.
649 setOperationAction(Op: ISD::VAEND , VT: MVT::Other, Action: Expand);
650 setOperationAction(Op: ISD::STACKSAVE , VT: MVT::Other, Action: Expand);
651 setOperationAction(Op: ISD::STACKRESTORE , VT: MVT::Other, Action: Custom);
652 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32 , Action: Custom);
653 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64 , Action: Custom);
654 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i32, Action: Custom);
655 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i64, Action: Custom);
656 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
657 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom);
658
659 // We want to custom lower some of our intrinsics.
660 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
661 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::f64, Action: Custom);
662 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::ppcf128, Action: Custom);
663 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v4f32, Action: Custom);
664 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v2f64, Action: Custom);
665
666 // To handle counter-based loop conditions.
667 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i1, Action: Custom);
668
669 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i8, Action: Custom);
670 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i16, Action: Custom);
671 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
672 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
673
674 // Comparisons that require checking two conditions.
675 if (Subtarget.hasSPE()) {
676 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
677 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
678 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
679 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f64, Action: Expand);
680 }
681 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
682 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f64, Action: Expand);
683 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
684 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
685 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
686 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f64, Action: Expand);
687 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
688 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
689 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f32, Action: Expand);
690 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f64, Action: Expand);
691 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
692 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
693
694 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal);
695 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal);
696
697 if (Subtarget.has64BitSupport()) {
698 // They also have instructions for converting between i64 and fp.
699 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
700 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Expand);
701 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
702 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Expand);
703 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
704 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
705 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
706 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
707 // This is just the low 32 bits of a (signed) fp->i64 conversion.
708 // We cannot do this with Promote because i64 is not a legal type.
709 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
710 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
711
712 if (Subtarget.hasLFIWAX() || isPPC64) {
713 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
714 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
715 }
716 } else {
717 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
718 if (Subtarget.hasSPE()) {
719 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Legal);
720 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Legal);
721 } else {
722 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Expand);
723 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Expand);
724 }
725 }
726
727 // With the instructions enabled under FPCVT, we can do everything.
728 if (Subtarget.hasFPCVT()) {
729 if (Subtarget.has64BitSupport()) {
730 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
731 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Custom);
732 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
733 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Custom);
734 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
735 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Custom);
736 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
737 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
738 }
739
740 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
741 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
742 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
743 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Custom);
744 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
745 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
746 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
747 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Custom);
748 }
749
750 if (Subtarget.use64BitRegs()) {
751 // 64-bit PowerPC implementations can support i64 types directly
752 addRegisterClass(VT: MVT::i64, RC: &PPC::G8RCRegClass);
753 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
754 setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
755 // 64-bit PowerPC wants to expand i128 shifts itself.
756 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Custom);
757 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Custom);
758 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Custom);
759 } else {
760 // 32-bit PowerPC wants to expand i64 shifts itself.
761 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i32, Action: Custom);
762 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i32, Action: Custom);
763 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i32, Action: Custom);
764 }
765
766 // PowerPC has better expansions for funnel shifts than the generic
767 // TargetLowering::expandFunnelShift.
768 if (Subtarget.has64BitSupport()) {
769 setOperationAction(Op: ISD::FSHL, VT: MVT::i64, Action: Custom);
770 setOperationAction(Op: ISD::FSHR, VT: MVT::i64, Action: Custom);
771 }
772 setOperationAction(Op: ISD::FSHL, VT: MVT::i32, Action: Custom);
773 setOperationAction(Op: ISD::FSHR, VT: MVT::i32, Action: Custom);
774
775 if (Subtarget.hasVSX()) {
776 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
777 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
778 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
779 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
780 }
781
782 if (Subtarget.hasAltivec()) {
783 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
784 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
785 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
786 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
787 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
788 }
789 // First set operation action for all vector types to expand. Then we
790 // will selectively turn on ones that can be effectively codegen'd.
791 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
792 // add/sub are legal for all supported vector VT's.
793 setOperationAction(Op: ISD::ADD, VT, Action: Legal);
794 setOperationAction(Op: ISD::SUB, VT, Action: Legal);
795
796 // For v2i64, these are only valid with P8Vector. This is corrected after
797 // the loop.
798 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
799 setOperationAction(Op: ISD::SMAX, VT, Action: Legal);
800 setOperationAction(Op: ISD::SMIN, VT, Action: Legal);
801 setOperationAction(Op: ISD::UMAX, VT, Action: Legal);
802 setOperationAction(Op: ISD::UMIN, VT, Action: Legal);
803 }
804 else {
805 setOperationAction(Op: ISD::SMAX, VT, Action: Expand);
806 setOperationAction(Op: ISD::SMIN, VT, Action: Expand);
807 setOperationAction(Op: ISD::UMAX, VT, Action: Expand);
808 setOperationAction(Op: ISD::UMIN, VT, Action: Expand);
809 }
810
811 if (Subtarget.hasVSX()) {
812 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
813 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
814 }
815
816 // Vector instructions introduced in P8
817 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818 setOperationAction(Op: ISD::CTPOP, VT, Action: Legal);
819 setOperationAction(Op: ISD::CTLZ, VT, Action: Legal);
820 }
821 else {
822 setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
823 setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
824 }
825
826 // Vector instructions introduced in P9
827 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
828 setOperationAction(Op: ISD::CTTZ, VT, Action: Legal);
829 else
830 setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
831
832 // We promote all shuffles to v16i8.
833 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Promote);
834 AddPromotedToType (Opc: ISD::VECTOR_SHUFFLE, OrigVT: VT, DestVT: MVT::v16i8);
835
836 // We promote all non-typed operations to v4i32.
837 setOperationAction(Op: ISD::AND , VT, Action: Promote);
838 AddPromotedToType (Opc: ISD::AND , OrigVT: VT, DestVT: MVT::v4i32);
839 setOperationAction(Op: ISD::OR , VT, Action: Promote);
840 AddPromotedToType (Opc: ISD::OR , OrigVT: VT, DestVT: MVT::v4i32);
841 setOperationAction(Op: ISD::XOR , VT, Action: Promote);
842 AddPromotedToType (Opc: ISD::XOR , OrigVT: VT, DestVT: MVT::v4i32);
843 setOperationAction(Op: ISD::LOAD , VT, Action: Promote);
844 AddPromotedToType (Opc: ISD::LOAD , OrigVT: VT, DestVT: MVT::v4i32);
845 setOperationAction(Op: ISD::SELECT, VT, Action: Promote);
846 AddPromotedToType (Opc: ISD::SELECT, OrigVT: VT, DestVT: MVT::v4i32);
847 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
848 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Promote);
849 AddPromotedToType (Opc: ISD::SELECT_CC, OrigVT: VT, DestVT: MVT::v4i32);
850 setOperationAction(Op: ISD::STORE, VT, Action: Promote);
851 AddPromotedToType (Opc: ISD::STORE, OrigVT: VT, DestVT: MVT::v4i32);
852
853 // No other operations are legal.
854 setOperationAction(Op: ISD::MUL , VT, Action: Expand);
855 setOperationAction(Op: ISD::SDIV, VT, Action: Expand);
856 setOperationAction(Op: ISD::SREM, VT, Action: Expand);
857 setOperationAction(Op: ISD::UDIV, VT, Action: Expand);
858 setOperationAction(Op: ISD::UREM, VT, Action: Expand);
859 setOperationAction(Op: ISD::FDIV, VT, Action: Expand);
860 setOperationAction(Op: ISD::FREM, VT, Action: Expand);
861 setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
862 setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
863 setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
864 setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
865 setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
866 setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
867 setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
868 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
869 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
870 setOperationAction(Op: ISD::FABS, VT, Action: Expand);
871 setOperationAction(Op: ISD::FFLOOR, VT, Action: Expand);
872 setOperationAction(Op: ISD::FCEIL, VT, Action: Expand);
873 setOperationAction(Op: ISD::FTRUNC, VT, Action: Expand);
874 setOperationAction(Op: ISD::FRINT, VT, Action: Expand);
875 setOperationAction(Op: ISD::FLDEXP, VT, Action: Expand);
876 setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Expand);
877 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Expand);
878 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Expand);
879 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Expand);
880 setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
881 setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
882 setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
883 setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
884 setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
885 setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
886 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Expand);
887 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
888 setOperationAction(Op: ISD::BSWAP, VT, Action: Expand);
889 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT, Action: Expand);
890 setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
891 setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
892
893 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
894 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
895 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
896 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
897 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
898 }
899 }
900 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::v4i32, Action: Expand);
901 if (!Subtarget.hasP8Vector()) {
902 setOperationAction(Op: ISD::SMAX, VT: MVT::v2i64, Action: Expand);
903 setOperationAction(Op: ISD::SMIN, VT: MVT::v2i64, Action: Expand);
904 setOperationAction(Op: ISD::UMAX, VT: MVT::v2i64, Action: Expand);
905 setOperationAction(Op: ISD::UMIN, VT: MVT::v2i64, Action: Expand);
906 }
907
908 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
909 // with merges, splats, etc.
910 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v16i8, Action: Custom);
911
912 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
913 // are cheap, so handle them before they get expanded to scalar.
914 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v8i8, Action: Custom);
915 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i8, Action: Custom);
916 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i8, Action: Custom);
917 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i16, Action: Custom);
918 setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i16, Action: Custom);
919
920 setOperationAction(Op: ISD::AND , VT: MVT::v4i32, Action: Legal);
921 setOperationAction(Op: ISD::OR , VT: MVT::v4i32, Action: Legal);
922 setOperationAction(Op: ISD::XOR , VT: MVT::v4i32, Action: Legal);
923 setOperationAction(Op: ISD::LOAD , VT: MVT::v4i32, Action: Legal);
924 setOperationAction(Op: ISD::SELECT, VT: MVT::v4i32,
925 Action: Subtarget.useCRBits() ? Legal : Expand);
926 setOperationAction(Op: ISD::STORE , VT: MVT::v4i32, Action: Legal);
927 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
928 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
929 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
930 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
931 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
932 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
933 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
934 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
935 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal);
936 setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal);
937 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal);
938 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
939
940 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
941 setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Custom);
942 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
943 if (Subtarget.hasAltivec())
944 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
945 setOperationAction(Op: ISD::ROTL, VT, Action: Legal);
946 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
947 if (Subtarget.hasP8Altivec())
948 setOperationAction(Op: ISD::ROTL, VT: MVT::v2i64, Action: Legal);
949
950 addRegisterClass(VT: MVT::v4f32, RC: &PPC::VRRCRegClass);
951 addRegisterClass(VT: MVT::v4i32, RC: &PPC::VRRCRegClass);
952 addRegisterClass(VT: MVT::v8i16, RC: &PPC::VRRCRegClass);
953 addRegisterClass(VT: MVT::v16i8, RC: &PPC::VRRCRegClass);
954
955 setOperationAction(Op: ISD::MUL, VT: MVT::v4f32, Action: Legal);
956 setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal);
957
958 if (Subtarget.hasVSX()) {
959 setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal);
960 setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal);
961 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
962 }
963
964 if (Subtarget.hasP8Altivec())
965 setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Legal);
966 else
967 setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Custom);
968
969 if (Subtarget.isISA3_1()) {
970 setOperationAction(Op: ISD::MUL, VT: MVT::v2i64, Action: Legal);
971 setOperationAction(Op: ISD::MULHS, VT: MVT::v2i64, Action: Legal);
972 setOperationAction(Op: ISD::MULHU, VT: MVT::v2i64, Action: Legal);
973 setOperationAction(Op: ISD::MULHS, VT: MVT::v4i32, Action: Legal);
974 setOperationAction(Op: ISD::MULHU, VT: MVT::v4i32, Action: Legal);
975 setOperationAction(Op: ISD::UDIV, VT: MVT::v2i64, Action: Legal);
976 setOperationAction(Op: ISD::SDIV, VT: MVT::v2i64, Action: Legal);
977 setOperationAction(Op: ISD::UDIV, VT: MVT::v4i32, Action: Legal);
978 setOperationAction(Op: ISD::SDIV, VT: MVT::v4i32, Action: Legal);
979 setOperationAction(Op: ISD::UREM, VT: MVT::v2i64, Action: Legal);
980 setOperationAction(Op: ISD::SREM, VT: MVT::v2i64, Action: Legal);
981 setOperationAction(Op: ISD::UREM, VT: MVT::v4i32, Action: Legal);
982 setOperationAction(Op: ISD::SREM, VT: MVT::v4i32, Action: Legal);
983 setOperationAction(Op: ISD::UREM, VT: MVT::v1i128, Action: Legal);
984 setOperationAction(Op: ISD::SREM, VT: MVT::v1i128, Action: Legal);
985 setOperationAction(Op: ISD::UDIV, VT: MVT::v1i128, Action: Legal);
986 setOperationAction(Op: ISD::SDIV, VT: MVT::v1i128, Action: Legal);
987 setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Legal);
988 }
989
990 setOperationAction(Op: ISD::MUL, VT: MVT::v8i16, Action: Legal);
991 setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Custom);
992
993 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Custom);
994 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Custom);
995 // LE is P8+/64-bit so direct moves are supported and these operations
996 // are legal. The custom transformation requires 64-bit since we need a
997 // pair of stores that will cover a 128-bit load for P10.
998 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
999 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Custom);
1000 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Custom);
1001 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Custom);
1002 }
1003
1004 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v16i8, Action: Custom);
1005 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v8i16, Action: Custom);
1006 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4i32, Action: Custom);
1007 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4f32, Action: Custom);
1008
1009 // Altivec does not contain unordered floating-point compare instructions
1010 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v4f32, Action: Expand);
1011 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v4f32, Action: Expand);
1012 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v4f32, Action: Expand);
1013 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v4f32, Action: Expand);
1014
1015 if (Subtarget.hasVSX()) {
1016 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal);
1017 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1018 if (Subtarget.hasP8Vector()) {
1019 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal);
1020 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Legal);
1021 }
1022 if (Subtarget.hasDirectMove() && isPPC64) {
1023 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Legal);
1024 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Legal);
1025 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Legal);
1026 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Legal);
1027 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1028 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1029 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1030 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1031 }
1032 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1033
1034 // The nearbyint variants are not allowed to raise the inexact exception
1035 // so we can only code-gen them with unsafe math.
1036 if (TM.Options.UnsafeFPMath) {
1037 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f64, Action: Legal);
1038 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f32, Action: Legal);
1039 }
1040
1041 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal);
1042 setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal);
1043 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal);
1044 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v2f64, Action: Legal);
1045 setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal);
1046 setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal);
1047 setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
1048 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
1049
1050 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
1051 setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal);
1052 setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal);
1053 setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
1054 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
1055
1056 setOperationAction(Op: ISD::MUL, VT: MVT::v2f64, Action: Legal);
1057 setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal);
1058
1059 setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal);
1060 setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal);
1061
1062 // Share the Altivec comparison restrictions.
1063 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v2f64, Action: Expand);
1064 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v2f64, Action: Expand);
1065 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v2f64, Action: Expand);
1066 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v2f64, Action: Expand);
1067
1068 setOperationAction(Op: ISD::LOAD, VT: MVT::v2f64, Action: Legal);
1069 setOperationAction(Op: ISD::STORE, VT: MVT::v2f64, Action: Legal);
1070
1071 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2f64, Action: Custom);
1072
1073 if (Subtarget.hasP8Vector())
1074 addRegisterClass(VT: MVT::f32, RC: &PPC::VSSRCRegClass);
1075
1076 addRegisterClass(VT: MVT::f64, RC: &PPC::VSFRCRegClass);
1077
1078 addRegisterClass(VT: MVT::v4i32, RC: &PPC::VSRCRegClass);
1079 addRegisterClass(VT: MVT::v4f32, RC: &PPC::VSRCRegClass);
1080 addRegisterClass(VT: MVT::v2f64, RC: &PPC::VSRCRegClass);
1081
1082 if (Subtarget.hasP8Altivec()) {
1083 setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Legal);
1084 setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Legal);
1085 setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Legal);
1086
1087 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1088 // SRL, but not for SRA because of the instructions available:
1089 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1090 // doing
1091 setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Expand);
1092 setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Expand);
1093 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1094
1095 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Legal);
1096 }
1097 else {
1098 setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Expand);
1099 setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Expand);
1100 setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Expand);
1101
1102 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Custom);
1103
1104 // VSX v2i64 only supports non-arithmetic operations.
1105 setOperationAction(Op: ISD::ADD, VT: MVT::v2i64, Action: Expand);
1106 setOperationAction(Op: ISD::SUB, VT: MVT::v2i64, Action: Expand);
1107 }
1108
1109 if (Subtarget.isISA3_1())
1110 setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Legal);
1111 else
1112 setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Expand);
1113
1114 setOperationAction(Op: ISD::LOAD, VT: MVT::v2i64, Action: Promote);
1115 AddPromotedToType (Opc: ISD::LOAD, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1116 setOperationAction(Op: ISD::STORE, VT: MVT::v2i64, Action: Promote);
1117 AddPromotedToType (Opc: ISD::STORE, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1118
1119 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2i64, Action: Custom);
1120
1121 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1122 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1123 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1124 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1125 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1126 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1127 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1128 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1129
1130 // Custom handling for partial vectors of integers converted to
1131 // floating point. We already have optimal handling for v2i32 through
1132 // the DAG combine, so those aren't necessary.
1133 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1134 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1135 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1136 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1137 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1138 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1139 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1140 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1141 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1142 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1143 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1144 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1145 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1146 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1147 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1148 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1149
1150 setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal);
1151 setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal);
1152 setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal);
1153 setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal);
1154 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v4f32, Action: Legal);
1155 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v2f64, Action: Legal);
1156
1157 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2i64, Action: Custom);
1158 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2f64, Action: Custom);
1159
1160 // Handle constrained floating-point operations of vector.
1161 // The predictor is `hasVSX` because altivec instruction has
1162 // no exception but VSX vector instruction has.
1163 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal);
1164 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal);
1165 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal);
1166 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal);
1167 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal);
1168 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal);
1169 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v4f32, Action: Legal);
1170 setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v4f32, Action: Legal);
1171 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal);
1172 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal);
1173 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal);
1174 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal);
1175 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal);
1176
1177 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal);
1178 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal);
1179 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal);
1180 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal);
1181 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal);
1182 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal);
1183 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v2f64, Action: Legal);
1184 setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v2f64, Action: Legal);
1185 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal);
1186 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal);
1187 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal);
1188 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal);
1189 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal);
1190
1191 addRegisterClass(VT: MVT::v2i64, RC: &PPC::VSRCRegClass);
1192 addRegisterClass(VT: MVT::f128, RC: &PPC::VRRCRegClass);
1193
1194 for (MVT FPT : MVT::fp_valuetypes())
1195 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: FPT, Action: Expand);
1196
1197 // Expand the SELECT to SELECT_CC
1198 setOperationAction(Op: ISD::SELECT, VT: MVT::f128, Action: Expand);
1199
1200 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
1201 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
1202
1203 // No implementation for these ops for PowerPC.
1204 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f128, Action: Expand);
1205 setOperationAction(Op: ISD::FSIN, VT: MVT::f128, Action: Expand);
1206 setOperationAction(Op: ISD::FCOS, VT: MVT::f128, Action: Expand);
1207 setOperationAction(Op: ISD::FPOW, VT: MVT::f128, Action: Expand);
1208 setOperationAction(Op: ISD::FPOWI, VT: MVT::f128, Action: Expand);
1209 setOperationAction(Op: ISD::FREM, VT: MVT::f128, Action: Expand);
1210 }
1211
1212 if (Subtarget.hasP8Altivec()) {
1213 addRegisterClass(VT: MVT::v2i64, RC: &PPC::VRRCRegClass);
1214 addRegisterClass(VT: MVT::v1i128, RC: &PPC::VRRCRegClass);
1215 }
1216
1217 if (Subtarget.hasP9Vector()) {
1218 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Custom);
1219 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
1220
1221 // Test data class instructions store results in CR bits.
1222 if (Subtarget.useCRBits()) {
1223 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom);
1224 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom);
1225 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f128, Action: Custom);
1226 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::ppcf128, Action: Custom);
1227 }
1228
1229 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1230 // SRL, but not for SRA because of the instructions available:
1231 // VS{RL} and VS{RL}O.
1232 setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Legal);
1233 setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Legal);
1234 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1235
1236 setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: Legal);
1237 setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: Legal);
1238 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Legal);
1239 setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Legal);
1240 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Legal);
1241
1242 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Legal);
1243 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f128, Action: Expand);
1244 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f128, Action: Expand);
1245 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f128, Action: Expand);
1246 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f128, Action: Expand);
1247 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f128, Action: Expand);
1248 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f128, Action: Expand);
1249
1250 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f128, Action: Legal);
1251 setOperationAction(Op: ISD::FRINT, VT: MVT::f128, Action: Legal);
1252 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f128, Action: Legal);
1253 setOperationAction(Op: ISD::FCEIL, VT: MVT::f128, Action: Legal);
1254 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f128, Action: Legal);
1255 setOperationAction(Op: ISD::FROUND, VT: MVT::f128, Action: Legal);
1256
1257 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f64, Action: Legal);
1258 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f32, Action: Legal);
1259 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
1260
1261 // Handle constrained floating-point operations of fp128
1262 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f128, Action: Legal);
1263 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f128, Action: Legal);
1264 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f128, Action: Legal);
1265 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f128, Action: Legal);
1266 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Legal);
1267 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f128, Action: Legal);
1268 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Legal);
1269 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f64, Action: Legal);
1270 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
1271 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f128, Action: Legal);
1272 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f128, Action: Legal);
1273 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f128, Action: Legal);
1274 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f128, Action: Legal);
1275 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f128, Action: Legal);
1276 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f128, Action: Legal);
1277 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v2f32, Action: Custom);
1278 setOperationAction(Op: ISD::BSWAP, VT: MVT::v8i16, Action: Legal);
1279 setOperationAction(Op: ISD::BSWAP, VT: MVT::v4i32, Action: Legal);
1280 setOperationAction(Op: ISD::BSWAP, VT: MVT::v2i64, Action: Legal);
1281 setOperationAction(Op: ISD::BSWAP, VT: MVT::v1i128, Action: Legal);
1282 } else if (Subtarget.hasVSX()) {
1283 setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Promote);
1284 setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Promote);
1285
1286 AddPromotedToType(Opc: ISD::LOAD, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1287 AddPromotedToType(Opc: ISD::STORE, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1288
1289 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1290 // fp_to_uint and int_to_fp.
1291 setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: LibCall);
1292 setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: LibCall);
1293
1294 setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Expand);
1295 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
1296 setOperationAction(Op: ISD::FNEG, VT: MVT::f128, Action: Expand);
1297 setOperationAction(Op: ISD::FABS, VT: MVT::f128, Action: Expand);
1298 setOperationAction(Op: ISD::FSQRT, VT: MVT::f128, Action: Expand);
1299 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand);
1300 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand);
1301
1302 // Expand the fp_extend if the target type is fp128.
1303 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Expand);
1304 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Expand);
1305
1306 // Expand the fp_round if the source type is fp128.
1307 for (MVT VT : {MVT::f32, MVT::f64}) {
1308 setOperationAction(Op: ISD::FP_ROUND, VT, Action: Custom);
1309 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Custom);
1310 }
1311
1312 setOperationAction(Op: ISD::SETCC, VT: MVT::f128, Action: Custom);
1313 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Custom);
1314 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Custom);
1315 setOperationAction(Op: ISD::BR_CC, VT: MVT::f128, Action: Expand);
1316
1317 // Lower following f128 select_cc pattern:
1318 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1319 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1320
1321 // We need to handle f128 SELECT_CC with integer result type.
1322 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Custom);
1323 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
1324 }
1325
1326 if (Subtarget.hasP9Altivec()) {
1327 if (Subtarget.isISA3_1()) {
1328 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1329 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1330 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1331 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1332 } else {
1333 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Custom);
1334 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Custom);
1335 }
1336 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i8, Action: Legal);
1337 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i16, Action: Legal);
1338 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i32, Action: Legal);
1339 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i8, Action: Legal);
1340 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i16, Action: Legal);
1341 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i32, Action: Legal);
1342 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i64, Action: Legal);
1343
1344 setOperationAction(Op: ISD::ABDU, VT: MVT::v16i8, Action: Legal);
1345 setOperationAction(Op: ISD::ABDU, VT: MVT::v8i16, Action: Legal);
1346 setOperationAction(Op: ISD::ABDU, VT: MVT::v4i32, Action: Legal);
1347 setOperationAction(Op: ISD::ABDS, VT: MVT::v4i32, Action: Legal);
1348 }
1349
1350 if (Subtarget.hasP10Vector()) {
1351 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1352 }
1353 }
1354
1355 if (Subtarget.pairedVectorMemops()) {
1356 addRegisterClass(VT: MVT::v256i1, RC: &PPC::VSRpRCRegClass);
1357 setOperationAction(Op: ISD::LOAD, VT: MVT::v256i1, Action: Custom);
1358 setOperationAction(Op: ISD::STORE, VT: MVT::v256i1, Action: Custom);
1359 }
1360 if (Subtarget.hasMMA()) {
1361 if (Subtarget.isISAFuture()) {
1362 addRegisterClass(VT: MVT::v512i1, RC: &PPC::WACCRCRegClass);
1363 addRegisterClass(VT: MVT::v1024i1, RC: &PPC::DMRRCRegClass);
1364 addRegisterClass(VT: MVT::v2048i1, RC: &PPC::DMRpRCRegClass);
1365 setOperationAction(Op: ISD::LOAD, VT: MVT::v1024i1, Action: Custom);
1366 setOperationAction(Op: ISD::STORE, VT: MVT::v1024i1, Action: Custom);
1367 setOperationAction(Op: ISD::LOAD, VT: MVT::v2048i1, Action: Custom);
1368 setOperationAction(Op: ISD::STORE, VT: MVT::v2048i1, Action: Custom);
1369 } else {
1370 addRegisterClass(VT: MVT::v512i1, RC: &PPC::UACCRCRegClass);
1371 }
1372 setOperationAction(Op: ISD::LOAD, VT: MVT::v512i1, Action: Custom);
1373 setOperationAction(Op: ISD::STORE, VT: MVT::v512i1, Action: Custom);
1374 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v512i1, Action: Custom);
1375 }
1376
1377 if (Subtarget.has64BitSupport())
1378 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal);
1379
1380 if (Subtarget.isISA3_1())
1381 setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Legal);
1382
1383 setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: isPPC64 ? Legal : Custom);
1384
1385 if (!isPPC64) {
1386 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i64, Action: Expand);
1387 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i64, Action: Expand);
1388 }
1389
1390 if (shouldInlineQuadwordAtomics()) {
1391 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom);
1392 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom);
1393 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i128, Action: Custom);
1394 }
1395
1396 setBooleanContents(ZeroOrOneBooleanContent);
1397
1398 if (Subtarget.hasAltivec()) {
1399 // Altivec instructions set fields to all zeros or all ones.
1400 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1401 }
1402
1403 if (shouldInlineQuadwordAtomics())
1404 setMaxAtomicSizeInBitsSupported(128);
1405 else if (isPPC64)
1406 setMaxAtomicSizeInBitsSupported(64);
1407 else
1408 setMaxAtomicSizeInBitsSupported(32);
1409
1410 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1411
1412 // We have target-specific dag combine patterns for the following nodes:
1413 setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1414 ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1415 if (Subtarget.hasFPCVT())
1416 setTargetDAGCombine(ISD::UINT_TO_FP);
1417 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1418 if (Subtarget.useCRBits())
1419 setTargetDAGCombine(ISD::BRCOND);
1420 setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1421 ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1422
1423 setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1424
1425 setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1426
1427 if (Subtarget.useCRBits()) {
1428 setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1429 }
1430
1431 // With 32 condition bits, we don't need to sink (and duplicate) compares
1432 // aggressively in CodeGenPrep.
1433 if (Subtarget.useCRBits()) {
1434 setHasMultipleConditionRegisters();
1435 setJumpIsExpensive();
1436 }
1437
1438 // TODO: The default entry number is set to 64. This stops most jump table
1439 // generation on PPC. But it is good for current PPC HWs because the indirect
1440 // branch instruction mtctr to the jump table may lead to bad branch predict.
1441 // Re-evaluate this value on future HWs that can do better with mtctr.
1442 setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1443
1444 setMinFunctionAlignment(Align(4));
1445 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1446
1447 auto CPUDirective = Subtarget.getCPUDirective();
1448 switch (CPUDirective) {
1449 default: break;
1450 case PPC::DIR_970:
1451 case PPC::DIR_A2:
1452 case PPC::DIR_E500:
1453 case PPC::DIR_E500mc:
1454 case PPC::DIR_E5500:
1455 case PPC::DIR_PWR4:
1456 case PPC::DIR_PWR5:
1457 case PPC::DIR_PWR5X:
1458 case PPC::DIR_PWR6:
1459 case PPC::DIR_PWR6X:
1460 case PPC::DIR_PWR7:
1461 case PPC::DIR_PWR8:
1462 case PPC::DIR_PWR9:
1463 case PPC::DIR_PWR10:
1464 case PPC::DIR_PWR11:
1465 case PPC::DIR_PWR_FUTURE:
1466 setPrefLoopAlignment(Align(16));
1467 setPrefFunctionAlignment(Align(16));
1468 break;
1469 }
1470
1471 if (Subtarget.enableMachineScheduler())
1472 setSchedulingPreference(Sched::Source);
1473 else
1474 setSchedulingPreference(Sched::Hybrid);
1475
1476 computeRegisterProperties(TRI: STI.getRegisterInfo());
1477
1478 // The Freescale cores do better with aggressive inlining of memcpy and
1479 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1480 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1481 MaxStoresPerMemset = 32;
1482 MaxStoresPerMemsetOptSize = 16;
1483 MaxStoresPerMemcpy = 32;
1484 MaxStoresPerMemcpyOptSize = 8;
1485 MaxStoresPerMemmove = 32;
1486 MaxStoresPerMemmoveOptSize = 8;
1487 } else if (CPUDirective == PPC::DIR_A2) {
1488 // The A2 also benefits from (very) aggressive inlining of memcpy and
1489 // friends. The overhead of a the function call, even when warm, can be
1490 // over one hundred cycles.
1491 MaxStoresPerMemset = 128;
1492 MaxStoresPerMemcpy = 128;
1493 MaxStoresPerMemmove = 128;
1494 MaxLoadsPerMemcmp = 128;
1495 } else {
1496 MaxLoadsPerMemcmp = 8;
1497 MaxLoadsPerMemcmpOptSize = 4;
1498 }
1499
1500 // Enable generation of STXVP instructions by default for mcpu=future.
1501 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1502 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1503 DisableAutoPairedVecSt = false;
1504
1505 IsStrictFPEnabled = true;
1506
1507 // Let the subtarget (CPU) decide if a predictable select is more expensive
1508 // than the corresponding branch. This information is used in CGP to decide
1509 // when to convert selects into branches.
1510 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1511
1512 GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1513}
1514
1515// *********************************** NOTE ************************************
1516// For selecting load and store instructions, the addressing modes are defined
1517// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1518// patterns to match the load the store instructions.
1519//
1520// The TD definitions for the addressing modes correspond to their respective
1521// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1522// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1523// address mode flags of a particular node. Afterwards, the computed address
1524// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1525// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1526// accordingly, based on the preferred addressing mode.
1527//
1528// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1529// MemOpFlags contains all the possible flags that can be used to compute the
1530// optimal addressing mode for load and store instructions.
1531// AddrMode contains all the possible load and store addressing modes available
1532// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1533//
1534// When adding new load and store instructions, it is possible that new address
1535// flags may need to be added into MemOpFlags, and a new addressing mode will
1536// need to be added to AddrMode. An entry of the new addressing mode (consisting
1537// of the minimal and main distinguishing address flags for the new load/store
1538// instructions) will need to be added into initializeAddrModeMap() below.
1539// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1540// need to be updated to account for selecting the optimal addressing mode.
1541// *****************************************************************************
1542/// Initialize the map that relates the different addressing modes of the load
1543/// and store instructions to a set of flags. This ensures the load/store
1544/// instruction is correctly matched during instruction selection.
1545void PPCTargetLowering::initializeAddrModeMap() {
1546 AddrModesMap[PPC::AM_DForm] = {
1547 // LWZ, STW
1548 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1549 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1550 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1551 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1552 // LBZ, LHZ, STB, STH
1553 PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1554 PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1555 PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1556 PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1557 // LHA
1558 PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1559 PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1560 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1561 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1562 // LFS, LFD, STFS, STFD
1563 PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1564 PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1565 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1566 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1567 };
1568 AddrModesMap[PPC::AM_DSForm] = {
1569 // LWA
1570 PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1571 PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1572 PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1573 // LD, STD
1574 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1575 PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1576 PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1577 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1578 PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1579 PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1580 PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1581 };
1582 AddrModesMap[PPC::AM_DQForm] = {
1583 // LXV, STXV
1584 PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1585 PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1586 PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1587 };
1588 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1589 PPC::MOF_SubtargetP10};
1590 // TODO: Add mapping for quadword load/store.
1591}
1592
1593/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1594/// the desired ByVal argument alignment.
1595static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1596 if (MaxAlign == MaxMaxAlign)
1597 return;
1598 if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
1599 if (MaxMaxAlign >= 32 &&
1600 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1601 MaxAlign = Align(32);
1602 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1603 MaxAlign < 16)
1604 MaxAlign = Align(16);
1605 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
1606 Align EltAlign;
1607 getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign, MaxMaxAlign);
1608 if (EltAlign > MaxAlign)
1609 MaxAlign = EltAlign;
1610 } else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
1611 for (auto *EltTy : STy->elements()) {
1612 Align EltAlign;
1613 getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign, MaxMaxAlign);
1614 if (EltAlign > MaxAlign)
1615 MaxAlign = EltAlign;
1616 if (MaxAlign == MaxMaxAlign)
1617 break;
1618 }
1619 }
1620}
1621
1622/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1623/// function arguments in the caller parameter area.
1624Align PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1625 const DataLayout &DL) const {
1626 // 16byte and wider vectors are passed on 16byte boundary.
1627 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1628 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1629 if (Subtarget.hasAltivec())
1630 getMaxByValAlign(Ty, MaxAlign&: Alignment, MaxMaxAlign: Align(16));
1631 return Alignment;
1632}
1633
1634bool PPCTargetLowering::useSoftFloat() const {
1635 return Subtarget.useSoftFloat();
1636}
1637
1638bool PPCTargetLowering::hasSPE() const {
1639 return Subtarget.hasSPE();
1640}
1641
1642bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1643 return VT.isScalarInteger();
1644}
1645
1646bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1647 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1648 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1649 return false;
1650
1651 if (auto *VTy = dyn_cast<VectorType>(Val: VectorTy)) {
1652 if (VTy->getScalarType()->isIntegerTy()) {
1653 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1654 if (ElemSizeInBits == 32) {
1655 Index = Subtarget.isLittleEndian() ? 2 : 1;
1656 return true;
1657 }
1658 if (ElemSizeInBits == 64) {
1659 Index = Subtarget.isLittleEndian() ? 1 : 0;
1660 return true;
1661 }
1662 }
1663 }
1664 return false;
1665}
1666
1667const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1668 switch ((PPCISD::NodeType)Opcode) {
1669 case PPCISD::FIRST_NUMBER: break;
1670 case PPCISD::FSEL: return "PPCISD::FSEL";
1671 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1672 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1673 case PPCISD::FCFID: return "PPCISD::FCFID";
1674 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1675 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1676 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1677 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1678 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1679 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1680 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1681 case PPCISD::FRE: return "PPCISD::FRE";
1682 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1683 case PPCISD::FTSQRT:
1684 return "PPCISD::FTSQRT";
1685 case PPCISD::FSQRT:
1686 return "PPCISD::FSQRT";
1687 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1688 case PPCISD::VPERM: return "PPCISD::VPERM";
1689 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1690 case PPCISD::XXSPLTI_SP_TO_DP:
1691 return "PPCISD::XXSPLTI_SP_TO_DP";
1692 case PPCISD::XXSPLTI32DX:
1693 return "PPCISD::XXSPLTI32DX";
1694 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1695 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1696 case PPCISD::XXPERM:
1697 return "PPCISD::XXPERM";
1698 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1699 case PPCISD::CMPB: return "PPCISD::CMPB";
1700 case PPCISD::Hi: return "PPCISD::Hi";
1701 case PPCISD::Lo: return "PPCISD::Lo";
1702 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1703 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1704 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1705 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1706 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1707 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1708 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1709 case PPCISD::SRL: return "PPCISD::SRL";
1710 case PPCISD::SRA: return "PPCISD::SRA";
1711 case PPCISD::SHL: return "PPCISD::SHL";
1712 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1713 case PPCISD::CALL: return "PPCISD::CALL";
1714 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1715 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1716 case PPCISD::CALL_RM:
1717 return "PPCISD::CALL_RM";
1718 case PPCISD::CALL_NOP_RM:
1719 return "PPCISD::CALL_NOP_RM";
1720 case PPCISD::CALL_NOTOC_RM:
1721 return "PPCISD::CALL_NOTOC_RM";
1722 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1723 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1724 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1725 case PPCISD::BCTRL_RM:
1726 return "PPCISD::BCTRL_RM";
1727 case PPCISD::BCTRL_LOAD_TOC_RM:
1728 return "PPCISD::BCTRL_LOAD_TOC_RM";
1729 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1730 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1731 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1732 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1733 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1734 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1735 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1736 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1737 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1738 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1739 case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1740 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1741 case PPCISD::ANDI_rec_1_EQ_BIT:
1742 return "PPCISD::ANDI_rec_1_EQ_BIT";
1743 case PPCISD::ANDI_rec_1_GT_BIT:
1744 return "PPCISD::ANDI_rec_1_GT_BIT";
1745 case PPCISD::VCMP: return "PPCISD::VCMP";
1746 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1747 case PPCISD::LBRX: return "PPCISD::LBRX";
1748 case PPCISD::STBRX: return "PPCISD::STBRX";
1749 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1750 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1751 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1752 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1753 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1754 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1755 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1756 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1757 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1758 case PPCISD::ST_VSR_SCAL_INT:
1759 return "PPCISD::ST_VSR_SCAL_INT";
1760 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1761 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1762 case PPCISD::BDZ: return "PPCISD::BDZ";
1763 case PPCISD::MFFS: return "PPCISD::MFFS";
1764 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1765 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1766 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1767 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1768 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1769 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1770 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1771 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1772 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1773 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1774 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1775 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1776 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1777 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1778 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1779 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1780 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1781 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1782 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1783 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1784 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1785 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1786 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1787 case PPCISD::PADDI_DTPREL:
1788 return "PPCISD::PADDI_DTPREL";
1789 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1790 case PPCISD::SC: return "PPCISD::SC";
1791 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1792 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1793 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1794 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1795 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1796 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1797 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1798 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1799 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1800 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1801 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1802 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1803 case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1804 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1805 case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1806 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1807 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1808 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1809 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1810 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1811 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1812 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1813 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1814 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1815 case PPCISD::STRICT_FADDRTZ:
1816 return "PPCISD::STRICT_FADDRTZ";
1817 case PPCISD::STRICT_FCTIDZ:
1818 return "PPCISD::STRICT_FCTIDZ";
1819 case PPCISD::STRICT_FCTIWZ:
1820 return "PPCISD::STRICT_FCTIWZ";
1821 case PPCISD::STRICT_FCTIDUZ:
1822 return "PPCISD::STRICT_FCTIDUZ";
1823 case PPCISD::STRICT_FCTIWUZ:
1824 return "PPCISD::STRICT_FCTIWUZ";
1825 case PPCISD::STRICT_FCFID:
1826 return "PPCISD::STRICT_FCFID";
1827 case PPCISD::STRICT_FCFIDU:
1828 return "PPCISD::STRICT_FCFIDU";
1829 case PPCISD::STRICT_FCFIDS:
1830 return "PPCISD::STRICT_FCFIDS";
1831 case PPCISD::STRICT_FCFIDUS:
1832 return "PPCISD::STRICT_FCFIDUS";
1833 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1834 case PPCISD::STORE_COND:
1835 return "PPCISD::STORE_COND";
1836 case PPCISD::SETBC:
1837 return "PPCISD::SETBC";
1838 case PPCISD::SETBCR:
1839 return "PPCISD::SETBCR";
1840 case PPCISD::ADDC:
1841 return "PPCISD::ADDC";
1842 case PPCISD::ADDE:
1843 return "PPCISD::ADDE";
1844 case PPCISD::SUBC:
1845 return "PPCISD::SUBC";
1846 case PPCISD::SUBE:
1847 return "PPCISD::SUBE";
1848 }
1849 return nullptr;
1850}
1851
1852EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1853 EVT VT) const {
1854 if (!VT.isVector())
1855 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1856
1857 return VT.changeVectorElementTypeToInteger();
1858}
1859
1860bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1861 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1862 return true;
1863}
1864
1865//===----------------------------------------------------------------------===//
1866// Node matching predicates, for use by the tblgen matching code.
1867//===----------------------------------------------------------------------===//
1868
1869/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1870static bool isFloatingPointZero(SDValue Op) {
1871 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
1872 return CFP->getValueAPF().isZero();
1873 else if (ISD::isEXTLoad(N: Op.getNode()) || ISD::isNON_EXTLoad(N: Op.getNode())) {
1874 // Maybe this has already been legalized into the constant pool?
1875 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val: Op.getOperand(i: 1)))
1876 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: CP->getConstVal()))
1877 return CFP->getValueAPF().isZero();
1878 }
1879 return false;
1880}
1881
1882/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1883/// true if Op is undef or if it matches the specified value.
1884static bool isConstantOrUndef(int Op, int Val) {
1885 return Op < 0 || Op == Val;
1886}
1887
1888/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1889/// VPKUHUM instruction.
1890/// The ShuffleKind distinguishes between big-endian operations with
1891/// two different inputs (0), either-endian operations with two identical
1892/// inputs (1), and little-endian operations with two different inputs (2).
1893/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1894bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1895 SelectionDAG &DAG) {
1896 bool IsLE = DAG.getDataLayout().isLittleEndian();
1897 if (ShuffleKind == 0) {
1898 if (IsLE)
1899 return false;
1900 for (unsigned i = 0; i != 16; ++i)
1901 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2+1))
1902 return false;
1903 } else if (ShuffleKind == 2) {
1904 if (!IsLE)
1905 return false;
1906 for (unsigned i = 0; i != 16; ++i)
1907 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2))
1908 return false;
1909 } else if (ShuffleKind == 1) {
1910 unsigned j = IsLE ? 0 : 1;
1911 for (unsigned i = 0; i != 8; ++i)
1912 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*2+j) ||
1913 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j))
1914 return false;
1915 }
1916 return true;
1917}
1918
1919/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1920/// VPKUWUM instruction.
1921/// The ShuffleKind distinguishes between big-endian operations with
1922/// two different inputs (0), either-endian operations with two identical
1923/// inputs (1), and little-endian operations with two different inputs (2).
1924/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1925bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1926 SelectionDAG &DAG) {
1927 bool IsLE = DAG.getDataLayout().isLittleEndian();
1928 if (ShuffleKind == 0) {
1929 if (IsLE)
1930 return false;
1931 for (unsigned i = 0; i != 16; i += 2)
1932 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+2) ||
1933 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+3))
1934 return false;
1935 } else if (ShuffleKind == 2) {
1936 if (!IsLE)
1937 return false;
1938 for (unsigned i = 0; i != 16; i += 2)
1939 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2) ||
1940 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+1))
1941 return false;
1942 } else if (ShuffleKind == 1) {
1943 unsigned j = IsLE ? 0 : 2;
1944 for (unsigned i = 0; i != 8; i += 2)
1945 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+j) ||
1946 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+j+1) ||
1947 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j) ||
1948 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+9), Val: i*2+j+1))
1949 return false;
1950 }
1951 return true;
1952}
1953
1954/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1955/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1956/// current subtarget.
1957///
1958/// The ShuffleKind distinguishes between big-endian operations with
1959/// two different inputs (0), either-endian operations with two identical
1960/// inputs (1), and little-endian operations with two different inputs (2).
1961/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1962bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1963 SelectionDAG &DAG) {
1964 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1965 if (!Subtarget.hasP8Vector())
1966 return false;
1967
1968 bool IsLE = DAG.getDataLayout().isLittleEndian();
1969 if (ShuffleKind == 0) {
1970 if (IsLE)
1971 return false;
1972 for (unsigned i = 0; i != 16; i += 4)
1973 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+4) ||
1974 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+5) ||
1975 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+6) ||
1976 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+7))
1977 return false;
1978 } else if (ShuffleKind == 2) {
1979 if (!IsLE)
1980 return false;
1981 for (unsigned i = 0; i != 16; i += 4)
1982 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2) ||
1983 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+1) ||
1984 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+2) ||
1985 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+3))
1986 return false;
1987 } else if (ShuffleKind == 1) {
1988 unsigned j = IsLE ? 0 : 4;
1989 for (unsigned i = 0; i != 8; i += 4)
1990 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*2+j) ||
1991 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+1), Val: i*2+j+1) ||
1992 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+2), Val: i*2+j+2) ||
1993 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+3), Val: i*2+j+3) ||
1994 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+8), Val: i*2+j) ||
1995 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+9), Val: i*2+j+1) ||
1996 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+10), Val: i*2+j+2) ||
1997 !isConstantOrUndef(Op: N->getMaskElt(Idx: i+11), Val: i*2+j+3))
1998 return false;
1999 }
2000 return true;
2001}
2002
2003/// isVMerge - Common function, used to match vmrg* shuffles.
2004///
2005static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2006 unsigned LHSStart, unsigned RHSStart) {
2007 if (N->getValueType(ResNo: 0) != MVT::v16i8)
2008 return false;
2009 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2010 "Unsupported merge size!");
2011
2012 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2013 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2014 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*UnitSize*2+j),
2015 Val: LHSStart+j+i*UnitSize) ||
2016 !isConstantOrUndef(Op: N->getMaskElt(Idx: i*UnitSize*2+UnitSize+j),
2017 Val: RHSStart+j+i*UnitSize))
2018 return false;
2019 }
2020 return true;
2021}
2022
2023/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2024/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2025/// The ShuffleKind distinguishes between big-endian merges with two
2026/// different inputs (0), either-endian merges with two identical inputs (1),
2027/// and little-endian merges with two different inputs (2). For the latter,
2028/// the input operands are swapped (see PPCInstrAltivec.td).
2029bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2030 unsigned ShuffleKind, SelectionDAG &DAG) {
2031 if (DAG.getDataLayout().isLittleEndian()) {
2032 if (ShuffleKind == 1) // unary
2033 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 0);
2034 else if (ShuffleKind == 2) // swapped
2035 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 16);
2036 else
2037 return false;
2038 } else {
2039 if (ShuffleKind == 1) // unary
2040 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 8);
2041 else if (ShuffleKind == 0) // normal
2042 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 24);
2043 else
2044 return false;
2045 }
2046}
2047
2048/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2049/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2050/// The ShuffleKind distinguishes between big-endian merges with two
2051/// different inputs (0), either-endian merges with two identical inputs (1),
2052/// and little-endian merges with two different inputs (2). For the latter,
2053/// the input operands are swapped (see PPCInstrAltivec.td).
2054bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2055 unsigned ShuffleKind, SelectionDAG &DAG) {
2056 if (DAG.getDataLayout().isLittleEndian()) {
2057 if (ShuffleKind == 1) // unary
2058 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 8);
2059 else if (ShuffleKind == 2) // swapped
2060 return isVMerge(N, UnitSize, LHSStart: 8, RHSStart: 24);
2061 else
2062 return false;
2063 } else {
2064 if (ShuffleKind == 1) // unary
2065 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 0);
2066 else if (ShuffleKind == 0) // normal
2067 return isVMerge(N, UnitSize, LHSStart: 0, RHSStart: 16);
2068 else
2069 return false;
2070 }
2071}
2072
2073/**
2074 * Common function used to match vmrgew and vmrgow shuffles
2075 *
2076 * The indexOffset determines whether to look for even or odd words in
2077 * the shuffle mask. This is based on the of the endianness of the target
2078 * machine.
2079 * - Little Endian:
2080 * - Use offset of 0 to check for odd elements
2081 * - Use offset of 4 to check for even elements
2082 * - Big Endian:
2083 * - Use offset of 0 to check for even elements
2084 * - Use offset of 4 to check for odd elements
2085 * A detailed description of the vector element ordering for little endian and
2086 * big endian can be found at
2087 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2088 * Targeting your applications - what little endian and big endian IBM XL C/C++
2089 * compiler differences mean to you
2090 *
2091 * The mask to the shuffle vector instruction specifies the indices of the
2092 * elements from the two input vectors to place in the result. The elements are
2093 * numbered in array-access order, starting with the first vector. These vectors
2094 * are always of type v16i8, thus each vector will contain 16 elements of size
2095 * 8. More info on the shuffle vector can be found in the
2096 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2097 * Language Reference.
2098 *
2099 * The RHSStartValue indicates whether the same input vectors are used (unary)
2100 * or two different input vectors are used, based on the following:
2101 * - If the instruction uses the same vector for both inputs, the range of the
2102 * indices will be 0 to 15. In this case, the RHSStart value passed should
2103 * be 0.
2104 * - If the instruction has two different vectors then the range of the
2105 * indices will be 0 to 31. In this case, the RHSStart value passed should
2106 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2107 * to 31 specify elements in the second vector).
2108 *
2109 * \param[in] N The shuffle vector SD Node to analyze
2110 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2111 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2112 * vector to the shuffle_vector instruction
2113 * \return true iff this shuffle vector represents an even or odd word merge
2114 */
2115static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2116 unsigned RHSStartValue) {
2117 if (N->getValueType(ResNo: 0) != MVT::v16i8)
2118 return false;
2119
2120 for (unsigned i = 0; i < 2; ++i)
2121 for (unsigned j = 0; j < 4; ++j)
2122 if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*4+j),
2123 Val: i*RHSStartValue+j+IndexOffset) ||
2124 !isConstantOrUndef(Op: N->getMaskElt(Idx: i*4+j+8),
2125 Val: i*RHSStartValue+j+IndexOffset+8))
2126 return false;
2127 return true;
2128}
2129
2130/**
2131 * Determine if the specified shuffle mask is suitable for the vmrgew or
2132 * vmrgow instructions.
2133 *
2134 * \param[in] N The shuffle vector SD Node to analyze
2135 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2136 * \param[in] ShuffleKind Identify the type of merge:
2137 * - 0 = big-endian merge with two different inputs;
2138 * - 1 = either-endian merge with two identical inputs;
2139 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2140 * little-endian merges).
2141 * \param[in] DAG The current SelectionDAG
2142 * \return true iff this shuffle mask
2143 */
2144bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
2145 unsigned ShuffleKind, SelectionDAG &DAG) {
2146 if (DAG.getDataLayout().isLittleEndian()) {
2147 unsigned indexOffset = CheckEven ? 4 : 0;
2148 if (ShuffleKind == 1) // Unary
2149 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 0);
2150 else if (ShuffleKind == 2) // swapped
2151 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 16);
2152 else
2153 return false;
2154 }
2155 else {
2156 unsigned indexOffset = CheckEven ? 0 : 4;
2157 if (ShuffleKind == 1) // Unary
2158 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 0);
2159 else if (ShuffleKind == 0) // Normal
2160 return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: 16);
2161 else
2162 return false;
2163 }
2164 return false;
2165}
2166
2167/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2168/// amount, otherwise return -1.
2169/// The ShuffleKind distinguishes between big-endian operations with two
2170/// different inputs (0), either-endian operations with two identical inputs
2171/// (1), and little-endian operations with two different inputs (2). For the
2172/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2173int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2174 SelectionDAG &DAG) {
2175 if (N->getValueType(ResNo: 0) != MVT::v16i8)
2176 return -1;
2177
2178 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2179
2180 // Find the first non-undef value in the shuffle mask.
2181 unsigned i;
2182 for (i = 0; i != 16 && SVOp->getMaskElt(Idx: i) < 0; ++i)
2183 /*search*/;
2184
2185 if (i == 16) return -1; // all undef.
2186
2187 // Otherwise, check to see if the rest of the elements are consecutively
2188 // numbered from this value.
2189 unsigned ShiftAmt = SVOp->getMaskElt(Idx: i);
2190 if (ShiftAmt < i) return -1;
2191
2192 ShiftAmt -= i;
2193 bool isLE = DAG.getDataLayout().isLittleEndian();
2194
2195 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2196 // Check the rest of the elements to see if they are consecutive.
2197 for (++i; i != 16; ++i)
2198 if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: ShiftAmt+i))
2199 return -1;
2200 } else if (ShuffleKind == 1) {
2201 // Check the rest of the elements to see if they are consecutive.
2202 for (++i; i != 16; ++i)
2203 if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: (ShiftAmt+i) & 15))
2204 return -1;
2205 } else
2206 return -1;
2207
2208 if (isLE)
2209 ShiftAmt = 16 - ShiftAmt;
2210
2211 return ShiftAmt;
2212}
2213
2214/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2215/// specifies a splat of a single element that is suitable for input to
2216/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2217bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2218 EVT VT = N->getValueType(ResNo: 0);
2219 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2220 return EltSize == 8 && N->getMaskElt(Idx: 0) == N->getMaskElt(Idx: 1);
2221
2222 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2223 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2224
2225 // The consecutive indices need to specify an element, not part of two
2226 // different elements. So abandon ship early if this isn't the case.
2227 if (N->getMaskElt(Idx: 0) % EltSize != 0)
2228 return false;
2229
2230 // This is a splat operation if each element of the permute is the same, and
2231 // if the value doesn't reference the second vector.
2232 unsigned ElementBase = N->getMaskElt(Idx: 0);
2233
2234 // FIXME: Handle UNDEF elements too!
2235 if (ElementBase >= 16)
2236 return false;
2237
2238 // Check that the indices are consecutive, in the case of a multi-byte element
2239 // splatted with a v16i8 mask.
2240 for (unsigned i = 1; i != EltSize; ++i)
2241 if (N->getMaskElt(Idx: i) < 0 || N->getMaskElt(Idx: i) != (int)(i+ElementBase))
2242 return false;
2243
2244 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2245 // An UNDEF element is a sequence of UNDEF bytes.
2246 if (N->getMaskElt(Idx: i) < 0) {
2247 for (unsigned j = 1; j != EltSize; ++j)
2248 if (N->getMaskElt(Idx: i + j) >= 0)
2249 return false;
2250 } else
2251 for (unsigned j = 0; j != EltSize; ++j)
2252 if (N->getMaskElt(Idx: i + j) != N->getMaskElt(Idx: j))
2253 return false;
2254 }
2255 return true;
2256}
2257
2258/// Check that the mask is shuffling N byte elements. Within each N byte
2259/// element of the mask, the indices could be either in increasing or
2260/// decreasing order as long as they are consecutive.
2261/// \param[in] N the shuffle vector SD Node to analyze
2262/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2263/// Word/DoubleWord/QuadWord).
2264/// \param[in] StepLen the delta indices number among the N byte element, if
2265/// the mask is in increasing/decreasing order then it is 1/-1.
2266/// \return true iff the mask is shuffling N byte elements.
2267static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2268 int StepLen) {
2269 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2270 "Unexpected element width.");
2271 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2272
2273 unsigned NumOfElem = 16 / Width;
2274 unsigned MaskVal[16]; // Width is never greater than 16
2275 for (unsigned i = 0; i < NumOfElem; ++i) {
2276 MaskVal[0] = N->getMaskElt(Idx: i * Width);
2277 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2278 return false;
2279 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2280 return false;
2281 }
2282
2283 for (unsigned int j = 1; j < Width; ++j) {
2284 MaskVal[j] = N->getMaskElt(Idx: i * Width + j);
2285 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2286 return false;
2287 }
2288 }
2289 }
2290
2291 return true;
2292}
2293
2294bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2295 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2296 if (!isNByteElemShuffleMask(N, Width: 4, StepLen: 1))
2297 return false;
2298
2299 // Now we look at mask elements 0,4,8,12
2300 unsigned M0 = N->getMaskElt(Idx: 0) / 4;
2301 unsigned M1 = N->getMaskElt(Idx: 4) / 4;
2302 unsigned M2 = N->getMaskElt(Idx: 8) / 4;
2303 unsigned M3 = N->getMaskElt(Idx: 12) / 4;
2304 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2305 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2306
2307 // Below, let H and L be arbitrary elements of the shuffle mask
2308 // where H is in the range [4,7] and L is in the range [0,3].
2309 // H, 1, 2, 3 or L, 5, 6, 7
2310 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2311 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2312 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2313 InsertAtByte = IsLE ? 12 : 0;
2314 Swap = M0 < 4;
2315 return true;
2316 }
2317 // 0, H, 2, 3 or 4, L, 6, 7
2318 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2319 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2320 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2321 InsertAtByte = IsLE ? 8 : 4;
2322 Swap = M1 < 4;
2323 return true;
2324 }
2325 // 0, 1, H, 3 or 4, 5, L, 7
2326 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2327 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2328 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2329 InsertAtByte = IsLE ? 4 : 8;
2330 Swap = M2 < 4;
2331 return true;
2332 }
2333 // 0, 1, 2, H or 4, 5, 6, L
2334 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2335 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2336 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2337 InsertAtByte = IsLE ? 0 : 12;
2338 Swap = M3 < 4;
2339 return true;
2340 }
2341
2342 // If both vector operands for the shuffle are the same vector, the mask will
2343 // contain only elements from the first one and the second one will be undef.
2344 if (N->getOperand(Num: 1).isUndef()) {
2345 ShiftElts = 0;
2346 Swap = true;
2347 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2348 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 12 : 0;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2353 InsertAtByte = IsLE ? 8 : 4;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2357 InsertAtByte = IsLE ? 4 : 8;
2358 return true;
2359 }
2360 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2361 InsertAtByte = IsLE ? 0 : 12;
2362 return true;
2363 }
2364 }
2365
2366 return false;
2367}
2368
2369bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2370 bool &Swap, bool IsLE) {
2371 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2372 // Ensure each byte index of the word is consecutive.
2373 if (!isNByteElemShuffleMask(N, Width: 4, StepLen: 1))
2374 return false;
2375
2376 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2377 unsigned M0 = N->getMaskElt(Idx: 0) / 4;
2378 unsigned M1 = N->getMaskElt(Idx: 4) / 4;
2379 unsigned M2 = N->getMaskElt(Idx: 8) / 4;
2380 unsigned M3 = N->getMaskElt(Idx: 12) / 4;
2381
2382 // If both vector operands for the shuffle are the same vector, the mask will
2383 // contain only elements from the first one and the second one will be undef.
2384 if (N->getOperand(Num: 1).isUndef()) {
2385 assert(M0 < 4 && "Indexing into an undef vector?");
2386 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2387 return false;
2388
2389 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2390 Swap = false;
2391 return true;
2392 }
2393
2394 // Ensure each word index of the ShuffleVector Mask is consecutive.
2395 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2396 return false;
2397
2398 if (IsLE) {
2399 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2400 // Input vectors don't need to be swapped if the leading element
2401 // of the result is one of the 3 left elements of the second vector
2402 // (or if there is no shift to be done at all).
2403 Swap = false;
2404 ShiftElts = (8 - M0) % 8;
2405 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2406 // Input vectors need to be swapped if the leading element
2407 // of the result is one of the 3 left elements of the first vector
2408 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2409 Swap = true;
2410 ShiftElts = (4 - M0) % 4;
2411 }
2412
2413 return true;
2414 } else { // BE
2415 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2416 // Input vectors don't need to be swapped if the leading element
2417 // of the result is one of the 4 elements of the first vector.
2418 Swap = false;
2419 ShiftElts = M0;
2420 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2421 // Input vectors need to be swapped if the leading element
2422 // of the result is one of the 4 elements of the right vector.
2423 Swap = true;
2424 ShiftElts = M0 - 4;
2425 }
2426
2427 return true;
2428 }
2429}
2430
2431bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2432 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2433
2434 if (!isNByteElemShuffleMask(N, Width, StepLen: -1))
2435 return false;
2436
2437 for (int i = 0; i < 16; i += Width)
2438 if (N->getMaskElt(Idx: i) != i + Width - 1)
2439 return false;
2440
2441 return true;
2442}
2443
2444bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2445 return isXXBRShuffleMaskHelper(N, Width: 2);
2446}
2447
2448bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2449 return isXXBRShuffleMaskHelper(N, Width: 4);
2450}
2451
2452bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2453 return isXXBRShuffleMaskHelper(N, Width: 8);
2454}
2455
2456bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2457 return isXXBRShuffleMaskHelper(N, Width: 16);
2458}
2459
2460/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2461/// if the inputs to the instruction should be swapped and set \p DM to the
2462/// value for the immediate.
2463/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2464/// AND element 0 of the result comes from the first input (LE) or second input
2465/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2466/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2467/// mask.
2468bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2469 bool &Swap, bool IsLE) {
2470 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2471
2472 // Ensure each byte index of the double word is consecutive.
2473 if (!isNByteElemShuffleMask(N, Width: 8, StepLen: 1))
2474 return false;
2475
2476 unsigned M0 = N->getMaskElt(Idx: 0) / 8;
2477 unsigned M1 = N->getMaskElt(Idx: 8) / 8;
2478 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2479
2480 // If both vector operands for the shuffle are the same vector, the mask will
2481 // contain only elements from the first one and the second one will be undef.
2482 if (N->getOperand(Num: 1).isUndef()) {
2483 if ((M0 | M1) < 2) {
2484 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2485 Swap = false;
2486 return true;
2487 } else
2488 return false;
2489 }
2490
2491 if (IsLE) {
2492 if (M0 > 1 && M1 < 2) {
2493 Swap = false;
2494 } else if (M0 < 2 && M1 > 1) {
2495 M0 = (M0 + 2) % 4;
2496 M1 = (M1 + 2) % 4;
2497 Swap = true;
2498 } else
2499 return false;
2500
2501 // Note: if control flow comes here that means Swap is already set above
2502 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2503 return true;
2504 } else { // BE
2505 if (M0 < 2 && M1 > 1) {
2506 Swap = false;
2507 } else if (M0 > 1 && M1 < 2) {
2508 M0 = (M0 + 2) % 4;
2509 M1 = (M1 + 2) % 4;
2510 Swap = true;
2511 } else
2512 return false;
2513
2514 // Note: if control flow comes here that means Swap is already set above
2515 DM = (M0 << 1) + (M1 & 1);
2516 return true;
2517 }
2518}
2519
2520
2521/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2522/// appropriate for PPC mnemonics (which have a big endian bias - namely
2523/// elements are counted from the left of the vector register).
2524unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2525 SelectionDAG &DAG) {
2526 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2527 assert(isSplatShuffleMask(SVOp, EltSize));
2528 EVT VT = SVOp->getValueType(ResNo: 0);
2529
2530 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2531 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(Idx: 0)
2532 : SVOp->getMaskElt(Idx: 0);
2533
2534 if (DAG.getDataLayout().isLittleEndian())
2535 return (16 / EltSize) - 1 - (SVOp->getMaskElt(Idx: 0) / EltSize);
2536 else
2537 return SVOp->getMaskElt(Idx: 0) / EltSize;
2538}
2539
2540/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2541/// by using a vspltis[bhw] instruction of the specified element size, return
2542/// the constant being splatted. The ByteSize field indicates the number of
2543/// bytes of each element [124] -> [bhw].
2544SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2545 SDValue OpVal;
2546
2547 // If ByteSize of the splat is bigger than the element size of the
2548 // build_vector, then we have a case where we are checking for a splat where
2549 // multiple elements of the buildvector are folded together into a single
2550 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2551 unsigned EltSize = 16/N->getNumOperands();
2552 if (EltSize < ByteSize) {
2553 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2554 SDValue UniquedVals[4];
2555 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2556
2557 // See if all of the elements in the buildvector agree across.
2558 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2559 if (N->getOperand(Num: i).isUndef()) continue;
2560 // If the element isn't a constant, bail fully out.
2561 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: i))) return SDValue();
2562
2563 if (!UniquedVals[i&(Multiple-1)].getNode())
2564 UniquedVals[i&(Multiple-1)] = N->getOperand(Num: i);
2565 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(Num: i))
2566 return SDValue(); // no match.
2567 }
2568
2569 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2570 // either constant or undef values that are identical for each chunk. See
2571 // if these chunks can form into a larger vspltis*.
2572
2573 // Check to see if all of the leading entries are either 0 or -1. If
2574 // neither, then this won't fit into the immediate field.
2575 bool LeadingZero = true;
2576 bool LeadingOnes = true;
2577 for (unsigned i = 0; i != Multiple-1; ++i) {
2578 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2579
2580 LeadingZero &= isNullConstant(V: UniquedVals[i]);
2581 LeadingOnes &= isAllOnesConstant(V: UniquedVals[i]);
2582 }
2583 // Finally, check the least significant entry.
2584 if (LeadingZero) {
2585 if (!UniquedVals[Multiple-1].getNode())
2586 return DAG.getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32); // 0,0,0,undef
2587 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2588 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2589 return DAG.getTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
2590 }
2591 if (LeadingOnes) {
2592 if (!UniquedVals[Multiple-1].getNode())
2593 return DAG.getTargetConstant(Val: ~0U, DL: SDLoc(N), VT: MVT::i32); // -1,-1,-1,undef
2594 int Val =cast<ConstantSDNode>(Val&: UniquedVals[Multiple-1])->getSExtValue();
2595 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2596 return DAG.getTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
2597 }
2598
2599 return SDValue();
2600 }
2601
2602 // Check to see if this buildvec has a single non-undef value in its elements.
2603 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2604 if (N->getOperand(Num: i).isUndef()) continue;
2605 if (!OpVal.getNode())
2606 OpVal = N->getOperand(Num: i);
2607 else if (OpVal != N->getOperand(Num: i))
2608 return SDValue();
2609 }
2610
2611 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2612
2613 unsigned ValSizeInBytes = EltSize;
2614 uint64_t Value = 0;
2615 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) {
2616 Value = CN->getZExtValue();
2617 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) {
2618 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2619 Value = llvm::bit_cast<uint32_t>(from: CN->getValueAPF().convertToFloat());
2620 }
2621
2622 // If the splat value is larger than the element value, then we can never do
2623 // this splat. The only case that we could fit the replicated bits into our
2624 // immediate field for would be zero, and we prefer to use vxor for it.
2625 if (ValSizeInBytes < ByteSize) return SDValue();
2626
2627 // If the element value is larger than the splat value, check if it consists
2628 // of a repeated bit pattern of size ByteSize.
2629 if (!APInt(ValSizeInBytes * 8, Value).isSplat(SplatSizeInBits: ByteSize * 8))
2630 return SDValue();
2631
2632 // Properly sign extend the value.
2633 int MaskVal = SignExtend32(X: Value, B: ByteSize * 8);
2634
2635 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2636 if (MaskVal == 0) return SDValue();
2637
2638 // Finally, if this value fits in a 5 bit sext field, return it
2639 if (SignExtend32<5>(X: MaskVal) == MaskVal)
2640 return DAG.getSignedTargetConstant(Val: MaskVal, DL: SDLoc(N), VT: MVT::i32);
2641 return SDValue();
2642}
2643
2644//===----------------------------------------------------------------------===//
2645// Addressing Mode Selection
2646//===----------------------------------------------------------------------===//
2647
2648/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2649/// or 64-bit immediate, and if the value can be accurately represented as a
2650/// sign extension from a 16-bit value. If so, this returns true and the
2651/// immediate.
2652bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2653 if (!isa<ConstantSDNode>(Val: N))
2654 return false;
2655
2656 Imm = (int16_t)N->getAsZExtVal();
2657 if (N->getValueType(ResNo: 0) == MVT::i32)
2658 return Imm == (int32_t)N->getAsZExtVal();
2659 else
2660 return Imm == (int64_t)N->getAsZExtVal();
2661}
2662bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2663 return isIntS16Immediate(N: Op.getNode(), Imm);
2664}
2665
2666/// Used when computing address flags for selecting loads and stores.
2667/// If we have an OR, check if the LHS and RHS are provably disjoint.
2668/// An OR of two provably disjoint values is equivalent to an ADD.
2669/// Most PPC load/store instructions compute the effective address as a sum,
2670/// so doing this conversion is useful.
2671static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2672 if (N.getOpcode() != ISD::OR)
2673 return false;
2674 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2675 if (!LHSKnown.Zero.getBoolValue())
2676 return false;
2677 KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 1));
2678 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2679}
2680
2681/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2682/// be represented as an indexed [r+r] operation.
2683bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2684 SDValue &Index,
2685 SelectionDAG &DAG) const {
2686 for (SDNode *U : N->users()) {
2687 if (MemSDNode *Memop = dyn_cast<MemSDNode>(Val: U)) {
2688 if (Memop->getMemoryVT() == MVT::f64) {
2689 Base = N.getOperand(i: 0);
2690 Index = N.getOperand(i: 1);
2691 return true;
2692 }
2693 }
2694 }
2695 return false;
2696}
2697
2698/// isIntS34Immediate - This method tests if value of node given can be
2699/// accurately represented as a sign extension from a 34-bit value. If so,
2700/// this returns true and the immediate.
2701bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2702 if (!isa<ConstantSDNode>(Val: N))
2703 return false;
2704
2705 Imm = (int64_t)cast<ConstantSDNode>(Val: N)->getSExtValue();
2706 return isInt<34>(x: Imm);
2707}
2708bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2709 return isIntS34Immediate(N: Op.getNode(), Imm);
2710}
2711
2712/// SelectAddressRegReg - Given the specified addressed, check to see if it
2713/// can be represented as an indexed [r+r] operation. Returns false if it
2714/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2715/// non-zero and N can be represented by a base register plus a signed 16-bit
2716/// displacement, make a more precise judgement by checking (displacement % \p
2717/// EncodingAlignment).
2718bool PPCTargetLowering::SelectAddressRegReg(
2719 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2720 MaybeAlign EncodingAlignment) const {
2721 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2722 // a [pc+imm].
2723 if (SelectAddressPCRel(N, Base))
2724 return false;
2725
2726 int16_t Imm = 0;
2727 if (N.getOpcode() == ISD::ADD) {
2728 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2729 // SPE load/store can only handle 8-bit offsets.
2730 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2731 return true;
2732 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm) &&
2733 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2734 return false; // r+i
2735 if (N.getOperand(i: 1).getOpcode() == PPCISD::Lo)
2736 return false; // r+i
2737
2738 Base = N.getOperand(i: 0);
2739 Index = N.getOperand(i: 1);
2740 return true;
2741 } else if (N.getOpcode() == ISD::OR) {
2742 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm) &&
2743 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2744 return false; // r+i can fold it if we can.
2745
2746 // If this is an or of disjoint bitfields, we can codegen this as an add
2747 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2748 // disjoint.
2749 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2750
2751 if (LHSKnown.Zero.getBoolValue()) {
2752 KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 1));
2753 // If all of the bits are known zero on the LHS or RHS, the add won't
2754 // carry.
2755 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2756 Base = N.getOperand(i: 0);
2757 Index = N.getOperand(i: 1);
2758 return true;
2759 }
2760 }
2761 }
2762
2763 return false;
2764}
2765
2766// If we happen to be doing an i64 load or store into a stack slot that has
2767// less than a 4-byte alignment, then the frame-index elimination may need to
2768// use an indexed load or store instruction (because the offset may not be a
2769// multiple of 4). The extra register needed to hold the offset comes from the
2770// register scavenger, and it is possible that the scavenger will need to use
2771// an emergency spill slot. As a result, we need to make sure that a spill slot
2772// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2773// stack slot.
2774static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2775 // FIXME: This does not handle the LWA case.
2776 if (VT != MVT::i64)
2777 return;
2778
2779 // NOTE: We'll exclude negative FIs here, which come from argument
2780 // lowering, because there are no known test cases triggering this problem
2781 // using packed structures (or similar). We can remove this exclusion if
2782 // we find such a test case. The reason why this is so test-case driven is
2783 // because this entire 'fixup' is only to prevent crashes (from the
2784 // register scavenger) on not-really-valid inputs. For example, if we have:
2785 // %a = alloca i1
2786 // %b = bitcast i1* %a to i64*
2787 // store i64* a, i64 b
2788 // then the store should really be marked as 'align 1', but is not. If it
2789 // were marked as 'align 1' then the indexed form would have been
2790 // instruction-selected initially, and the problem this 'fixup' is preventing
2791 // won't happen regardless.
2792 if (FrameIdx < 0)
2793 return;
2794
2795 MachineFunction &MF = DAG.getMachineFunction();
2796 MachineFrameInfo &MFI = MF.getFrameInfo();
2797
2798 if (MFI.getObjectAlign(ObjectIdx: FrameIdx) >= Align(4))
2799 return;
2800
2801 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2802 FuncInfo->setHasNonRISpills();
2803}
2804
2805/// Returns true if the address N can be represented by a base register plus
2806/// a signed 16-bit displacement [r+imm], and if it is not better
2807/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2808/// displacements that are multiples of that value.
2809bool PPCTargetLowering::SelectAddressRegImm(
2810 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2811 MaybeAlign EncodingAlignment) const {
2812 // FIXME dl should come from parent load or store, not from address
2813 SDLoc dl(N);
2814
2815 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2816 // a [pc+imm].
2817 if (SelectAddressPCRel(N, Base))
2818 return false;
2819
2820 // If this can be more profitably realized as r+r, fail.
2821 if (SelectAddressRegReg(N, Base&: Disp, Index&: Base, DAG, EncodingAlignment))
2822 return false;
2823
2824 if (N.getOpcode() == ISD::ADD) {
2825 int16_t imm = 0;
2826 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) &&
2827 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2828 Disp = DAG.getSignedTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2829 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0))) {
2830 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2831 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2832 } else {
2833 Base = N.getOperand(i: 0);
2834 }
2835 return true; // [r+i]
2836 } else if (N.getOperand(i: 1).getOpcode() == PPCISD::Lo) {
2837 // Match LOAD (ADD (X, Lo(G))).
2838 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2839 "Cannot handle constant offsets yet!");
2840 Disp = N.getOperand(i: 1).getOperand(i: 0); // The global address.
2841 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2842 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2843 Disp.getOpcode() == ISD::TargetConstantPool ||
2844 Disp.getOpcode() == ISD::TargetJumpTable);
2845 Base = N.getOperand(i: 0);
2846 return true; // [&g+r]
2847 }
2848 } else if (N.getOpcode() == ISD::OR) {
2849 int16_t imm = 0;
2850 if (isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) &&
2851 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2852 // If this is an or of disjoint bitfields, we can codegen this as an add
2853 // (for better address arithmetic) if the LHS and RHS of the OR are
2854 // provably disjoint.
2855 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2856
2857 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2858 // If all of the bits are known zero on the LHS or RHS, the add won't
2859 // carry.
2860 if (FrameIndexSDNode *FI =
2861 dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0))) {
2862 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2863 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2864 } else {
2865 Base = N.getOperand(i: 0);
2866 }
2867 Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2868 return true;
2869 }
2870 }
2871 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
2872 // Loading from a constant address.
2873
2874 // If this address fits entirely in a 16-bit sext immediate field, codegen
2875 // this as "d, 0"
2876 int16_t Imm;
2877 if (isIntS16Immediate(N: CN, Imm) &&
2878 (!EncodingAlignment || isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm))) {
2879 Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: CN->getValueType(ResNo: 0));
2880 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2881 VT: CN->getValueType(ResNo: 0));
2882 return true;
2883 }
2884
2885 // Handle 32-bit sext immediates with LIS + addr mode.
2886 if ((CN->getValueType(ResNo: 0) == MVT::i32 ||
2887 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2888 (!EncodingAlignment ||
2889 isAligned(Lhs: *EncodingAlignment, SizeInBytes: CN->getZExtValue()))) {
2890 int Addr = (int)CN->getZExtValue();
2891
2892 // Otherwise, break this down into an LIS + disp.
2893 Disp = DAG.getTargetConstant(Val: (short)Addr, DL: dl, VT: MVT::i32);
2894
2895 Base = DAG.getTargetConstant(Val: (Addr - (signed short)Addr) >> 16, DL: dl,
2896 VT: MVT::i32);
2897 unsigned Opc = CN->getValueType(ResNo: 0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2898 Base = SDValue(DAG.getMachineNode(Opcode: Opc, dl, VT: CN->getValueType(ResNo: 0), Op1: Base), 0);
2899 return true;
2900 }
2901 }
2902
2903 Disp = DAG.getTargetConstant(Val: 0, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()));
2904 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2905 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2906 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2907 } else
2908 Base = N;
2909 return true; // [r+0]
2910}
2911
2912/// Similar to the 16-bit case but for instructions that take a 34-bit
2913/// displacement field (prefixed loads/stores).
2914bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2915 SDValue &Base,
2916 SelectionDAG &DAG) const {
2917 // Only on 64-bit targets.
2918 if (N.getValueType() != MVT::i64)
2919 return false;
2920
2921 SDLoc dl(N);
2922 int64_t Imm = 0;
2923
2924 if (N.getOpcode() == ISD::ADD) {
2925 if (!isIntS34Immediate(Op: N.getOperand(i: 1), Imm))
2926 return false;
2927 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2928 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
2929 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2930 else
2931 Base = N.getOperand(i: 0);
2932 return true;
2933 }
2934
2935 if (N.getOpcode() == ISD::OR) {
2936 if (!isIntS34Immediate(Op: N.getOperand(i: 1), Imm))
2937 return false;
2938 // If this is an or of disjoint bitfields, we can codegen this as an add
2939 // (for better address arithmetic) if the LHS and RHS of the OR are
2940 // provably disjoint.
2941 KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: 0));
2942 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2943 return false;
2944 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
2945 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2946 else
2947 Base = N.getOperand(i: 0);
2948 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2949 return true;
2950 }
2951
2952 if (isIntS34Immediate(Op: N, Imm)) { // If the address is a 34-bit const.
2953 Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2954 Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
2955 return true;
2956 }
2957
2958 return false;
2959}
2960
2961/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2962/// represented as an indexed [r+r] operation.
2963bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2964 SDValue &Index,
2965 SelectionDAG &DAG) const {
2966 // Check to see if we can easily represent this as an [r+r] address. This
2967 // will fail if it thinks that the address is more profitably represented as
2968 // reg+imm, e.g. where imm = 0.
2969 if (SelectAddressRegReg(N, Base, Index, DAG))
2970 return true;
2971
2972 // If the address is the result of an add, we will utilize the fact that the
2973 // address calculation includes an implicit add. However, we can reduce
2974 // register pressure if we do not materialize a constant just for use as the
2975 // index register. We only get rid of the add if it is not an add of a
2976 // value and a 16-bit signed constant and both have a single use.
2977 int16_t imm = 0;
2978 if (N.getOpcode() == ISD::ADD &&
2979 (!isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: imm) ||
2980 !N.getOperand(i: 1).hasOneUse() || !N.getOperand(i: 0).hasOneUse())) {
2981 Base = N.getOperand(i: 0);
2982 Index = N.getOperand(i: 1);
2983 return true;
2984 }
2985
2986 // Otherwise, do it the hard way, using R0 as the base register.
2987 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2988 VT: N.getValueType());
2989 Index = N;
2990 return true;
2991}
2992
2993template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2994 Ty *PCRelCand = dyn_cast<Ty>(N);
2995 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(TF: PCRelCand->getTargetFlags()));
2996}
2997
2998/// Returns true if this address is a PC Relative address.
2999/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3000/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3001bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
3002 // This is a materialize PC Relative node. Always select this as PC Relative.
3003 Base = N;
3004 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3005 return true;
3006 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3007 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3008 isValidPCRelNode<JumpTableSDNode>(N) ||
3009 isValidPCRelNode<BlockAddressSDNode>(N))
3010 return true;
3011 return false;
3012}
3013
3014/// Returns true if we should use a direct load into vector instruction
3015/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3016static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3017
3018 // If there are any other uses other than scalar to vector, then we should
3019 // keep it as a scalar load -> direct move pattern to prevent multiple
3020 // loads.
3021 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N);
3022 if (!LD)
3023 return false;
3024
3025 EVT MemVT = LD->getMemoryVT();
3026 if (!MemVT.isSimple())
3027 return false;
3028 switch(MemVT.getSimpleVT().SimpleTy) {
3029 case MVT::i64:
3030 break;
3031 case MVT::i32:
3032 if (!ST.hasP8Vector())
3033 return false;
3034 break;
3035 case MVT::i16:
3036 case MVT::i8:
3037 if (!ST.hasP9Vector())
3038 return false;
3039 break;
3040 default:
3041 return false;
3042 }
3043
3044 SDValue LoadedVal(N, 0);
3045 if (!LoadedVal.hasOneUse())
3046 return false;
3047
3048 for (SDUse &Use : LD->uses())
3049 if (Use.getResNo() == 0 &&
3050 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3051 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3052 return false;
3053
3054 return true;
3055}
3056
3057/// getPreIndexedAddressParts - returns true by value, base pointer and
3058/// offset pointer and addressing mode by reference if the node's address
3059/// can be legally represented as pre-indexed load / store address.
3060bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3061 SDValue &Offset,
3062 ISD::MemIndexedMode &AM,
3063 SelectionDAG &DAG) const {
3064 if (DisablePPCPreinc) return false;
3065
3066 bool isLoad = true;
3067 SDValue Ptr;
3068 EVT VT;
3069 Align Alignment;
3070 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
3071 Ptr = LD->getBasePtr();
3072 VT = LD->getMemoryVT();
3073 Alignment = LD->getAlign();
3074 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
3075 Ptr = ST->getBasePtr();
3076 VT = ST->getMemoryVT();
3077 Alignment = ST->getAlign();
3078 isLoad = false;
3079 } else
3080 return false;
3081
3082 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3083 // instructions because we can fold these into a more efficient instruction
3084 // instead, (such as LXSD).
3085 if (isLoad && usePartialVectorLoads(N, ST: Subtarget)) {
3086 return false;
3087 }
3088
3089 // PowerPC doesn't have preinc load/store instructions for vectors
3090 if (VT.isVector())
3091 return false;
3092
3093 if (SelectAddressRegReg(N: Ptr, Base, Index&: Offset, DAG)) {
3094 // Common code will reject creating a pre-inc form if the base pointer
3095 // is a frame index, or if N is a store and the base pointer is either
3096 // the same as or a predecessor of the value being stored. Check for
3097 // those situations here, and try with swapped Base/Offset instead.
3098 bool Swap = false;
3099
3100 if (isa<FrameIndexSDNode>(Val: Base) || isa<RegisterSDNode>(Val: Base))
3101 Swap = true;
3102 else if (!isLoad) {
3103 SDValue Val = cast<StoreSDNode>(Val: N)->getValue();
3104 if (Val == Base || Base.getNode()->isPredecessorOf(N: Val.getNode()))
3105 Swap = true;
3106 }
3107
3108 if (Swap)
3109 std::swap(a&: Base, b&: Offset);
3110
3111 AM = ISD::PRE_INC;
3112 return true;
3113 }
3114
3115 // LDU/STU can only handle immediates that are a multiple of 4.
3116 if (VT != MVT::i64) {
3117 if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: std::nullopt))
3118 return false;
3119 } else {
3120 // LDU/STU need an address with at least 4-byte alignment.
3121 if (Alignment < Align(4))
3122 return false;
3123
3124 if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: Align(4)))
3125 return false;
3126 }
3127
3128 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
3129 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3130 // sext i32 to i64 when addr mode is r+i.
3131 if (LD->getValueType(ResNo: 0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3132 LD->getExtensionType() == ISD::SEXTLOAD &&
3133 isa<ConstantSDNode>(Val: Offset))
3134 return false;
3135 }
3136
3137 AM = ISD::PRE_INC;
3138 return true;
3139}
3140
3141//===----------------------------------------------------------------------===//
3142// LowerOperation implementation
3143//===----------------------------------------------------------------------===//
3144
3145/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3146/// and LoOpFlags to the target MO flags.
3147static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3148 unsigned &HiOpFlags, unsigned &LoOpFlags,
3149 const GlobalValue *GV = nullptr) {
3150 HiOpFlags = PPCII::MO_HA;
3151 LoOpFlags = PPCII::MO_LO;
3152
3153 // Don't use the pic base if not in PIC relocation model.
3154 if (IsPIC) {
3155 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3156 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3157 }
3158}
3159
3160static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3161 SelectionDAG &DAG) {
3162 SDLoc DL(HiPart);
3163 EVT PtrVT = HiPart.getValueType();
3164 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: PtrVT);
3165
3166 SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL, VT: PtrVT, N1: HiPart, N2: Zero);
3167 SDValue Lo = DAG.getNode(Opcode: PPCISD::Lo, DL, VT: PtrVT, N1: LoPart, N2: Zero);
3168
3169 // With PIC, the first instruction is actually "GR+hi(&G)".
3170 if (isPIC)
3171 Hi = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
3172 N1: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL, VT: PtrVT), N2: Hi);
3173
3174 // Generate non-pic code that has direct accesses to the constant pool.
3175 // The address of the global is just (hi(&g)+lo(&g)).
3176 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Hi, N2: Lo);
3177}
3178
3179static void setUsesTOCBasePtr(MachineFunction &MF) {
3180 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3181 FuncInfo->setUsesTOCBasePtr();
3182}
3183
3184static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3185 setUsesTOCBasePtr(DAG.getMachineFunction());
3186}
3187
3188SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3189 SDValue GA) const {
3190 EVT VT = Subtarget.getScalarIntVT();
3191 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(Reg: PPC::X2, VT)
3192 : Subtarget.isAIXABI()
3193 ? DAG.getRegister(Reg: PPC::R2, VT)
3194 : DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT);
3195 SDValue Ops[] = { GA, Reg };
3196 return DAG.getMemIntrinsicNode(
3197 Opcode: PPCISD::TOC_ENTRY, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops, MemVT: VT,
3198 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()), Alignment: std::nullopt,
3199 Flags: MachineMemOperand::MOLoad);
3200}
3201
3202SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3203 SelectionDAG &DAG) const {
3204 EVT PtrVT = Op.getValueType();
3205 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val&: Op);
3206 const Constant *C = CP->getConstVal();
3207
3208 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3209 // The actual address of the GlobalValue is stored in the TOC.
3210 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3211 if (Subtarget.isUsingPCRelativeCalls()) {
3212 SDLoc DL(CP);
3213 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3214 SDValue ConstPool = DAG.getTargetConstantPool(
3215 C, VT: Ty, Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: PPCII::MO_PCREL_FLAG);
3216 return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: ConstPool);
3217 }
3218 setUsesTOCBasePtr(DAG);
3219 SDValue GA = DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0);
3220 return getTOCEntry(DAG, dl: SDLoc(CP), GA);
3221 }
3222
3223 unsigned MOHiFlag, MOLoFlag;
3224 bool IsPIC = isPositionIndependent();
3225 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3226
3227 if (IsPIC && Subtarget.isSVR4ABI()) {
3228 SDValue GA =
3229 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: PPCII::MO_PIC_FLAG);
3230 return getTOCEntry(DAG, dl: SDLoc(CP), GA);
3231 }
3232
3233 SDValue CPIHi =
3234 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0, TargetFlags: MOHiFlag);
3235 SDValue CPILo =
3236 DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: 0, TargetFlags: MOLoFlag);
3237 return LowerLabelRef(HiPart: CPIHi, LoPart: CPILo, isPIC: IsPIC, DAG);
3238}
3239
3240// For 64-bit PowerPC, prefer the more compact relative encodings.
3241// This trades 32 bits per jump table entry for one or two instructions
3242// on the jump site.
3243unsigned PPCTargetLowering::getJumpTableEncoding() const {
3244 if (isJumpTableRelative())
3245 return MachineJumpTableInfo::EK_LabelDifference32;
3246
3247 return TargetLowering::getJumpTableEncoding();
3248}
3249
3250bool PPCTargetLowering::isJumpTableRelative() const {
3251 if (UseAbsoluteJumpTables)
3252 return false;
3253 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3254 return true;
3255 return TargetLowering::isJumpTableRelative();
3256}
3257
3258SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3259 SelectionDAG &DAG) const {
3260 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3261 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3262
3263 switch (getTargetMachine().getCodeModel()) {
3264 case CodeModel::Small:
3265 case CodeModel::Medium:
3266 return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3267 default:
3268 return DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: SDLoc(),
3269 VT: getPointerTy(DL: DAG.getDataLayout()));
3270 }
3271}
3272
3273const MCExpr *
3274PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3275 unsigned JTI,
3276 MCContext &Ctx) const {
3277 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3278 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3279
3280 switch (getTargetMachine().getCodeModel()) {
3281 case CodeModel::Small:
3282 case CodeModel::Medium:
3283 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3284 default:
3285 return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
3286 }
3287}
3288
3289SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3290 EVT PtrVT = Op.getValueType();
3291 JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
3292
3293 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3294 if (Subtarget.isUsingPCRelativeCalls()) {
3295 SDLoc DL(JT);
3296 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3297 SDValue GA =
3298 DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Ty, TargetFlags: PPCII::MO_PCREL_FLAG);
3299 SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3300 return MatAddr;
3301 }
3302
3303 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3304 // The actual address of the GlobalValue is stored in the TOC.
3305 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3306 setUsesTOCBasePtr(DAG);
3307 SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
3308 return getTOCEntry(DAG, dl: SDLoc(JT), GA);
3309 }
3310
3311 unsigned MOHiFlag, MOLoFlag;
3312 bool IsPIC = isPositionIndependent();
3313 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3314
3315 if (IsPIC && Subtarget.isSVR4ABI()) {
3316 SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT,
3317 TargetFlags: PPCII::MO_PIC_FLAG);
3318 return getTOCEntry(DAG, dl: SDLoc(GA), GA);
3319 }
3320
3321 SDValue JTIHi = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOHiFlag);
3322 SDValue JTILo = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOLoFlag);
3323 return LowerLabelRef(HiPart: JTIHi, LoPart: JTILo, isPIC: IsPIC, DAG);
3324}
3325
3326SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3327 SelectionDAG &DAG) const {
3328 EVT PtrVT = Op.getValueType();
3329 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Val&: Op);
3330 const BlockAddress *BA = BASDN->getBlockAddress();
3331
3332 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3333 if (Subtarget.isUsingPCRelativeCalls()) {
3334 SDLoc DL(BASDN);
3335 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3336 SDValue GA = DAG.getTargetBlockAddress(BA, VT: Ty, Offset: BASDN->getOffset(),
3337 TargetFlags: PPCII::MO_PCREL_FLAG);
3338 SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3339 return MatAddr;
3340 }
3341
3342 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3343 // The actual BlockAddress is stored in the TOC.
3344 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3345 setUsesTOCBasePtr(DAG);
3346 SDValue GA = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset());
3347 return getTOCEntry(DAG, dl: SDLoc(BASDN), GA);
3348 }
3349
3350 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3351 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3352 return getTOCEntry(
3353 DAG, dl: SDLoc(BASDN),
3354 GA: DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset()));
3355
3356 unsigned MOHiFlag, MOLoFlag;
3357 bool IsPIC = isPositionIndependent();
3358 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3359 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: MOHiFlag);
3360 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: MOLoFlag);
3361 return LowerLabelRef(HiPart: TgtBAHi, LoPart: TgtBALo, isPIC: IsPIC, DAG);
3362}
3363
3364SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3365 SelectionDAG &DAG) const {
3366 if (Subtarget.isAIXABI())
3367 return LowerGlobalTLSAddressAIX(Op, DAG);
3368
3369 return LowerGlobalTLSAddressLinux(Op, DAG);
3370}
3371
3372/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3373/// and then apply the update.
3374static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3375 SelectionDAG &DAG,
3376 const TargetMachine &TM) {
3377 // Initialize TLS model opt setting lazily:
3378 // (1) Use initial-exec for single TLS var references within current function.
3379 // (2) Use local-dynamic for multiple TLS var references within current
3380 // function.
3381 PPCFunctionInfo *FuncInfo =
3382 DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3383 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3384 SmallPtrSet<const GlobalValue *, 8> TLSGV;
3385 // Iterate over all instructions within current function, collect all TLS
3386 // global variables (global variables taken as the first parameter to
3387 // Intrinsic::threadlocal_address).
3388 const Function &Func = DAG.getMachineFunction().getFunction();
3389 for (const BasicBlock &BB : Func)
3390 for (const Instruction &I : BB)
3391 if (I.getOpcode() == Instruction::Call)
3392 if (const CallInst *CI = dyn_cast<const CallInst>(Val: &I))
3393 if (Function *CF = CI->getCalledFunction())
3394 if (CF->isDeclaration() &&
3395 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3396 if (const GlobalValue *GV =
3397 dyn_cast<GlobalValue>(Val: I.getOperand(i: 0))) {
3398 TLSModel::Model GVModel = TM.getTLSModel(GV);
3399 if (GVModel == TLSModel::LocalDynamic)
3400 TLSGV.insert(Ptr: GV);
3401 }
3402
3403 unsigned TLSGVCnt = TLSGV.size();
3404 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3405 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3406 FuncInfo->setAIXFuncUseTLSIEForLD();
3407 FuncInfo->setAIXFuncTLSModelOptInitDone();
3408 }
3409
3410 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3411 LLVM_DEBUG(
3412 dbgs() << DAG.getMachineFunction().getName()
3413 << " function is using the TLS-IE model for TLS-LD access.\n");
3414 Model = TLSModel::InitialExec;
3415 }
3416}
3417
3418SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3419 SelectionDAG &DAG) const {
3420 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3421
3422 if (DAG.getTarget().useEmulatedTLS())
3423 report_fatal_error(reason: "Emulated TLS is not yet supported on AIX");
3424
3425 SDLoc dl(GA);
3426 const GlobalValue *GV = GA->getGlobal();
3427 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3428 bool Is64Bit = Subtarget.isPPC64();
3429 TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3430
3431 // Apply update to the TLS model.
3432 if (Subtarget.hasAIXShLibTLSModelOpt())
3433 updateForAIXShLibTLSModelOpt(Model, DAG, TM: getTargetMachine());
3434
3435 // TLS variables are accessed through TOC entries.
3436 // To support this, set the DAG to use the TOC base pointer.
3437 setUsesTOCBasePtr(DAG);
3438
3439 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3440
3441 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3442 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3443 bool HasAIXSmallTLSGlobalAttr = false;
3444 SDValue VariableOffsetTGA =
3445 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TPREL_FLAG);
3446 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3447 SDValue TLSReg;
3448
3449 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV))
3450 if (GVar->hasAttribute(Kind: "aix-small-tls"))
3451 HasAIXSmallTLSGlobalAttr = true;
3452
3453 if (Is64Bit) {
3454 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3455 // involves a load of the variable offset (from the TOC), followed by an
3456 // add of the loaded variable offset to R13 (the thread pointer).
3457 // This code sequence looks like:
3458 // ld reg1,var[TC](2)
3459 // add reg2, reg1, r13 // r13 contains the thread pointer
3460 TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3461
3462 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3463 // global variable attribute, produce a faster access sequence for
3464 // local-exec TLS variables where the offset from the TLS base is encoded
3465 // as an immediate operand.
3466 //
3467 // We only utilize the faster local-exec access sequence when the TLS
3468 // variable has a size within the policy limit. We treat types that are
3469 // not sized or are empty as being over the policy size limit.
3470 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3471 IsTLSLocalExecModel) {
3472 Type *GVType = GV->getValueType();
3473 if (GVType->isSized() && !GVType->isEmptyTy() &&
3474 GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3475 AIXSmallTlsPolicySizeLimit)
3476 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA, N2: TLSReg);
3477 }
3478 } else {
3479 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3480 // involves loading the variable offset from the TOC, generating a call to
3481 // .__get_tpointer to get the thread pointer (which will be in R3), and
3482 // adding the two together:
3483 // lwz reg1,var[TC](2)
3484 // bla .__get_tpointer
3485 // add reg2, reg1, r3
3486 TLSReg = DAG.getNode(Opcode: PPCISD::GET_TPOINTER, DL: dl, VT: PtrVT);
3487
3488 // We do not implement the 32-bit version of the faster access sequence
3489 // for local-exec that is controlled by the -maix-small-local-exec-tls
3490 // option, or the "aix-small-tls" global variable attribute.
3491 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3492 report_fatal_error(reason: "The small-local-exec TLS access sequence is "
3493 "currently only supported on AIX (64-bit mode).");
3494 }
3495 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: VariableOffset);
3496 }
3497
3498 if (Model == TLSModel::LocalDynamic) {
3499 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3500
3501 // We do not implement the 32-bit version of the faster access sequence
3502 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3503 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3504 report_fatal_error(reason: "The small-local-dynamic TLS access sequence is "
3505 "currently only supported on AIX (64-bit mode).");
3506
3507 // For local-dynamic on AIX, we need to generate one TOC entry for each
3508 // variable offset, and a single module-handle TOC entry for the entire
3509 // file.
3510
3511 SDValue VariableOffsetTGA =
3512 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSLD_FLAG);
3513 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3514
3515 Module *M = DAG.getMachineFunction().getFunction().getParent();
3516 GlobalVariable *TLSGV =
3517 dyn_cast_or_null<GlobalVariable>(Val: M->getOrInsertGlobal(
3518 Name: StringRef("_$TLSML"), Ty: PointerType::getUnqual(C&: *DAG.getContext())));
3519 TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3520 assert(TLSGV && "Not able to create GV for _$TLSML.");
3521 SDValue ModuleHandleTGA =
3522 DAG.getTargetGlobalAddress(GV: TLSGV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSLDM_FLAG);
3523 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, GA: ModuleHandleTGA);
3524 SDValue ModuleHandle =
3525 DAG.getNode(Opcode: PPCISD::TLSLD_AIX, DL: dl, VT: PtrVT, Operand: ModuleHandleTOC);
3526
3527 // With the -maix-small-local-dynamic-tls option, produce a faster access
3528 // sequence for local-dynamic TLS variables where the offset from the
3529 // module-handle is encoded as an immediate operand.
3530 //
3531 // We only utilize the faster local-dynamic access sequence when the TLS
3532 // variable has a size within the policy limit. We treat types that are
3533 // not sized or are empty as being over the policy size limit.
3534 if (HasAIXSmallLocalDynamicTLS) {
3535 Type *GVType = GV->getValueType();
3536 if (GVType->isSized() && !GVType->isEmptyTy() &&
3537 GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3538 AIXSmallTlsPolicySizeLimit)
3539 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA,
3540 N2: ModuleHandle);
3541 }
3542
3543 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: ModuleHandle, N2: VariableOffset);
3544 }
3545
3546 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3547 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3548 // need to generate two TOC entries, one for the variable offset, one for the
3549 // region handle. The global address for the TOC entry of the region handle is
3550 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3551 // entry of the variable offset is created with MO_TLSGD_FLAG.
3552 SDValue VariableOffsetTGA =
3553 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSGD_FLAG);
3554 SDValue RegionHandleTGA =
3555 DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: PPCII::MO_TLSGDM_FLAG);
3556 SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3557 SDValue RegionHandle = getTOCEntry(DAG, dl, GA: RegionHandleTGA);
3558 return DAG.getNode(Opcode: PPCISD::TLSGD_AIX, DL: dl, VT: PtrVT, N1: VariableOffset,
3559 N2: RegionHandle);
3560}
3561
3562SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3563 SelectionDAG &DAG) const {
3564 // FIXME: TLS addresses currently use medium model code sequences,
3565 // which is the most useful form. Eventually support for small and
3566 // large models could be added if users need it, at the cost of
3567 // additional complexity.
3568 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3569 if (DAG.getTarget().useEmulatedTLS())
3570 return LowerToTLSEmulatedModel(GA, DAG);
3571
3572 SDLoc dl(GA);
3573 const GlobalValue *GV = GA->getGlobal();
3574 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3575 bool is64bit = Subtarget.isPPC64();
3576 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3577 PICLevel::Level picLevel = M->getPICLevel();
3578
3579 const TargetMachine &TM = getTargetMachine();
3580 TLSModel::Model Model = TM.getTLSModel(GV);
3581
3582 if (Model == TLSModel::LocalExec) {
3583 if (Subtarget.isUsingPCRelativeCalls()) {
3584 SDValue TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3585 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3586 TargetFlags: PPCII::MO_TPREL_PCREL_FLAG);
3587 SDValue MatAddr =
3588 DAG.getNode(Opcode: PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3589 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: MatAddr);
3590 }
3591
3592 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3593 TargetFlags: PPCII::MO_TPREL_HA);
3594 SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3595 TargetFlags: PPCII::MO_TPREL_LO);
3596 SDValue TLSReg = is64bit ? DAG.getRegister(Reg: PPC::X13, VT: MVT::i64)
3597 : DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
3598
3599 SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL: dl, VT: PtrVT, N1: TGAHi, N2: TLSReg);
3600 return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: TGALo, N2: Hi);
3601 }
3602
3603 if (Model == TLSModel::InitialExec) {
3604 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3605 SDValue TGA = DAG.getTargetGlobalAddress(
3606 GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3607 SDValue TGATLS = DAG.getTargetGlobalAddress(
3608 GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3609 SDValue TPOffset;
3610 if (IsPCRel) {
3611 SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3612 TPOffset = DAG.getLoad(VT: MVT::i64, dl, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3613 PtrInfo: MachinePointerInfo());
3614 } else {
3615 SDValue GOTPtr;
3616 if (is64bit) {
3617 setUsesTOCBasePtr(DAG);
3618 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3619 GOTPtr =
3620 DAG.getNode(Opcode: PPCISD::ADDIS_GOT_TPREL_HA, DL: dl, VT: PtrVT, N1: GOTReg, N2: TGA);
3621 } else {
3622 if (!TM.isPositionIndependent())
3623 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_GOT, DL: dl, VT: PtrVT);
3624 else if (picLevel == PICLevel::SmallPIC)
3625 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3626 else
3627 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3628 }
3629 TPOffset = DAG.getNode(Opcode: PPCISD::LD_GOT_TPREL_L, DL: dl, VT: PtrVT, N1: TGA, N2: GOTPtr);
3630 }
3631 return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TPOffset, N2: TGATLS);
3632 }
3633
3634 if (Model == TLSModel::GeneralDynamic) {
3635 if (Subtarget.isUsingPCRelativeCalls()) {
3636 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3637 TargetFlags: PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3638 return DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3639 }
3640
3641 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: 0);
3642 SDValue GOTPtr;
3643 if (is64bit) {
3644 setUsesTOCBasePtr(DAG);
3645 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3646 GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSGD_HA, DL: dl, VT: PtrVT,
3647 N1: GOTReg, N2: TGA);
3648 } else {
3649 if (picLevel == PICLevel::SmallPIC)
3650 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3651 else
3652 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3653 }
3654 return DAG.getNode(Opcode: PPCISD::ADDI_TLSGD_L_ADDR, DL: dl, VT: PtrVT,
3655 N1: GOTPtr, N2: TGA, N3: TGA);
3656 }
3657
3658 if (Model == TLSModel::LocalDynamic) {
3659 if (Subtarget.isUsingPCRelativeCalls()) {
3660 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0,
3661 TargetFlags: PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3662 SDValue MatPCRel =
3663 DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3664 return DAG.getNode(Opcode: PPCISD::PADDI_DTPREL, DL: dl, VT: PtrVT, N1: MatPCRel, N2: TGA);
3665 }
3666
3667 SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: 0);
3668 SDValue GOTPtr;
3669 if (is64bit) {
3670 setUsesTOCBasePtr(DAG);
3671 SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3672 GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSLD_HA, DL: dl, VT: PtrVT,
3673 N1: GOTReg, N2: TGA);
3674 } else {
3675 if (picLevel == PICLevel::SmallPIC)
3676 GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3677 else
3678 GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3679 }
3680 SDValue TLSAddr = DAG.getNode(Opcode: PPCISD::ADDI_TLSLD_L_ADDR, DL: dl,
3681 VT: PtrVT, N1: GOTPtr, N2: TGA, N3: TGA);
3682 SDValue DtvOffsetHi = DAG.getNode(Opcode: PPCISD::ADDIS_DTPREL_HA, DL: dl,
3683 VT: PtrVT, N1: TLSAddr, N2: TGA);
3684 return DAG.getNode(Opcode: PPCISD::ADDI_DTPREL_L, DL: dl, VT: PtrVT, N1: DtvOffsetHi, N2: TGA);
3685 }
3686
3687 llvm_unreachable("Unknown TLS model!");
3688}
3689
3690SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3691 SelectionDAG &DAG) const {
3692 EVT PtrVT = Op.getValueType();
3693 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Val&: Op);
3694 SDLoc DL(GSDN);
3695 const GlobalValue *GV = GSDN->getGlobal();
3696
3697 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3698 // The actual address of the GlobalValue is stored in the TOC.
3699 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3700 if (Subtarget.isUsingPCRelativeCalls()) {
3701 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3702 if (isAccessedAsGotIndirect(N: Op)) {
3703 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3704 TargetFlags: PPCII::MO_GOT_PCREL_FLAG);
3705 SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3706 SDValue Load = DAG.getLoad(VT: MVT::i64, dl: DL, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3707 PtrInfo: MachinePointerInfo());
3708 return Load;
3709 } else {
3710 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3711 TargetFlags: PPCII::MO_PCREL_FLAG);
3712 return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3713 }
3714 }
3715 setUsesTOCBasePtr(DAG);
3716 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset());
3717 return getTOCEntry(DAG, dl: DL, GA);
3718 }
3719
3720 unsigned MOHiFlag, MOLoFlag;
3721 bool IsPIC = isPositionIndependent();
3722 getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag, GV);
3723
3724 if (IsPIC && Subtarget.isSVR4ABI()) {
3725 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT,
3726 offset: GSDN->getOffset(),
3727 TargetFlags: PPCII::MO_PIC_FLAG);
3728 return getTOCEntry(DAG, dl: DL, GA);
3729 }
3730
3731 SDValue GAHi =
3732 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOHiFlag);
3733 SDValue GALo =
3734 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOLoFlag);
3735
3736 return LowerLabelRef(HiPart: GAHi, LoPart: GALo, isPIC: IsPIC, DAG);
3737}
3738
3739SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3740 bool IsStrict = Op->isStrictFPOpcode();
3741 ISD::CondCode CC =
3742 cast<CondCodeSDNode>(Val: Op.getOperand(i: IsStrict ? 3 : 2))->get();
3743 SDValue LHS = Op.getOperand(i: IsStrict ? 1 : 0);
3744 SDValue RHS = Op.getOperand(i: IsStrict ? 2 : 1);
3745 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : SDValue();
3746 EVT LHSVT = LHS.getValueType();
3747 SDLoc dl(Op);
3748
3749 // Soften the setcc with libcall if it is fp128.
3750 if (LHSVT == MVT::f128) {
3751 assert(!Subtarget.hasP9Vector() &&
3752 "SETCC for f128 is already legal under Power9!");
3753 softenSetCCOperands(DAG, VT: LHSVT, NewLHS&: LHS, NewRHS&: RHS, CCCode&: CC, DL: dl, OldLHS: LHS, OldRHS: RHS, Chain,
3754 IsSignaling: Op->getOpcode() == ISD::STRICT_FSETCCS);
3755 if (RHS.getNode())
3756 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS,
3757 N3: DAG.getCondCode(Cond: CC));
3758 if (IsStrict)
3759 return DAG.getMergeValues(Ops: {LHS, Chain}, dl);
3760 return LHS;
3761 }
3762
3763 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3764
3765 if (Op.getValueType() == MVT::v2i64) {
3766 // When the operands themselves are v2i64 values, we need to do something
3767 // special because VSX has no underlying comparison operations for these.
3768 if (LHS.getValueType() == MVT::v2i64) {
3769 // Equality can be handled by casting to the legal type for Altivec
3770 // comparisons, everything else needs to be expanded.
3771 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3772 return SDValue();
3773 SDValue SetCC32 = DAG.getSetCC(
3774 DL: dl, VT: MVT::v4i32, LHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: LHS),
3775 RHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: RHS), Cond: CC);
3776 int ShuffV[] = {1, 0, 3, 2};
3777 SDValue Shuff =
3778 DAG.getVectorShuffle(VT: MVT::v4i32, dl, N1: SetCC32, N2: SetCC32, Mask: ShuffV);
3779 return DAG.getBitcast(VT: MVT::v2i64,
3780 V: DAG.getNode(Opcode: CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3781 DL: dl, VT: MVT::v4i32, N1: Shuff, N2: SetCC32));
3782 }
3783
3784 // We handle most of these in the usual way.
3785 return Op;
3786 }
3787
3788 // If we're comparing for equality to zero, expose the fact that this is
3789 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3790 // fold the new nodes.
3791 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3792 return V;
3793
3794 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3795 // Leave comparisons against 0 and -1 alone for now, since they're usually
3796 // optimized. FIXME: revisit this when we can custom lower all setcc
3797 // optimizations.
3798 if (C->isAllOnes() || C->isZero())
3799 return SDValue();
3800 }
3801
3802 // If we have an integer seteq/setne, turn it into a compare against zero
3803 // by xor'ing the rhs with the lhs, which is faster than setting a
3804 // condition register, reading it back out, and masking the correct bit. The
3805 // normal approach here uses sub to do this instead of xor. Using xor exposes
3806 // the result to other bit-twiddling opportunities.
3807 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3808 EVT VT = Op.getValueType();
3809 SDValue Sub = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: LHSVT, N1: LHS, N2: RHS);
3810 return DAG.getSetCC(DL: dl, VT, LHS: Sub, RHS: DAG.getConstant(Val: 0, DL: dl, VT: LHSVT), Cond: CC);
3811 }
3812 return SDValue();
3813}
3814
3815SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3816 SDNode *Node = Op.getNode();
3817 EVT VT = Node->getValueType(ResNo: 0);
3818 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3819 SDValue InChain = Node->getOperand(Num: 0);
3820 SDValue VAListPtr = Node->getOperand(Num: 1);
3821 const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue();
3822 SDLoc dl(Node);
3823
3824 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3825
3826 // gpr_index
3827 SDValue GprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3828 Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV), MemVT: MVT::i8);
3829 InChain = GprIndex.getValue(R: 1);
3830
3831 if (VT == MVT::i64) {
3832 // Check if GprIndex is even
3833 SDValue GprAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: GprIndex,
3834 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3835 SDValue CC64 = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: GprAnd,
3836 RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32), Cond: ISD::SETNE);
3837 SDValue GprIndexPlusOne = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: GprIndex,
3838 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3839 // Align GprIndex to be even if it isn't
3840 GprIndex = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC64, N2: GprIndexPlusOne,
3841 N3: GprIndex);
3842 }
3843
3844 // fpr index is 1 byte after gpr
3845 SDValue FprPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3846 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
3847
3848 // fpr
3849 SDValue FprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3850 Ptr: FprPtr, PtrInfo: MachinePointerInfo(SV), MemVT: MVT::i8);
3851 InChain = FprIndex.getValue(R: 1);
3852
3853 SDValue RegSaveAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3854 N2: DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32));
3855
3856 SDValue OverflowAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3857 N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32));
3858
3859 // areas
3860 SDValue OverflowArea =
3861 DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: OverflowAreaPtr, PtrInfo: MachinePointerInfo());
3862 InChain = OverflowArea.getValue(R: 1);
3863
3864 SDValue RegSaveArea =
3865 DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: RegSaveAreaPtr, PtrInfo: MachinePointerInfo());
3866 InChain = RegSaveArea.getValue(R: 1);
3867
3868 // select overflow_area if index > 8
3869 SDValue CC = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: VT.isInteger() ? GprIndex : FprIndex,
3870 RHS: DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32), Cond: ISD::SETLT);
3871
3872 // adjustment constant gpr_index * 4/8
3873 SDValue RegConstant = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32,
3874 N1: VT.isInteger() ? GprIndex : FprIndex,
3875 N2: DAG.getConstant(Val: VT.isInteger() ? 4 : 8, DL: dl,
3876 VT: MVT::i32));
3877
3878 // OurReg = RegSaveArea + RegConstant
3879 SDValue OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: RegSaveArea,
3880 N2: RegConstant);
3881
3882 // Floating types are 32 bytes into RegSaveArea
3883 if (VT.isFloatingPoint())
3884 OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OurReg,
3885 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
3886
3887 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3888 SDValue IndexPlus1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32,
3889 N1: VT.isInteger() ? GprIndex : FprIndex,
3890 N2: DAG.getConstant(Val: VT == MVT::i64 ? 2 : 1, DL: dl,
3891 VT: MVT::i32));
3892
3893 InChain = DAG.getTruncStore(Chain: InChain, dl, Val: IndexPlus1,
3894 Ptr: VT.isInteger() ? VAListPtr : FprPtr,
3895 PtrInfo: MachinePointerInfo(SV), SVT: MVT::i8);
3896
3897 // determine if we should load from reg_save_area or overflow_area
3898 SDValue Result = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: PtrVT, N1: CC, N2: OurReg, N3: OverflowArea);
3899
3900 // increase overflow_area by 4/8 if gpr/fpr > 8
3901 SDValue OverflowAreaPlusN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OverflowArea,
3902 N2: DAG.getConstant(Val: VT.isInteger() ? 4 : 8,
3903 DL: dl, VT: MVT::i32));
3904
3905 OverflowArea = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC, N2: OverflowArea,
3906 N3: OverflowAreaPlusN);
3907
3908 InChain = DAG.getTruncStore(Chain: InChain, dl, Val: OverflowArea, Ptr: OverflowAreaPtr,
3909 PtrInfo: MachinePointerInfo(), SVT: MVT::i32);
3910
3911 return DAG.getLoad(VT, dl, Chain: InChain, Ptr: Result, PtrInfo: MachinePointerInfo());
3912}
3913
3914SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3915 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3916
3917 // We have to copy the entire va_list struct:
3918 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3919 return DAG.getMemcpy(Chain: Op.getOperand(i: 0), dl: Op, Dst: Op.getOperand(i: 1), Src: Op.getOperand(i: 2),
3920 Size: DAG.getConstant(Val: 12, DL: SDLoc(Op), VT: MVT::i32), Alignment: Align(8),
3921 isVol: false, AlwaysInline: true, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
3922 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
3923}
3924
3925SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3926 SelectionDAG &DAG) const {
3927 if (Subtarget.isAIXABI())
3928 report_fatal_error(reason: "ADJUST_TRAMPOLINE operation is not supported on AIX.");
3929
3930 return Op.getOperand(i: 0);
3931}
3932
3933SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3934 MachineFunction &MF = DAG.getMachineFunction();
3935 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3936
3937 assert((Op.getOpcode() == ISD::INLINEASM ||
3938 Op.getOpcode() == ISD::INLINEASM_BR) &&
3939 "Expecting Inline ASM node.");
3940
3941 // If an LR store is already known to be required then there is not point in
3942 // checking this ASM as well.
3943 if (MFI.isLRStoreRequired())
3944 return Op;
3945
3946 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3947 // type MVT::Glue. We want to ignore this last operand if that is the case.
3948 unsigned NumOps = Op.getNumOperands();
3949 if (Op.getOperand(i: NumOps - 1).getValueType() == MVT::Glue)
3950 --NumOps;
3951
3952 // Check all operands that may contain the LR.
3953 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3954 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3955 unsigned NumVals = Flags.getNumOperandRegisters();
3956 ++i; // Skip the ID value.
3957
3958 switch (Flags.getKind()) {
3959 default:
3960 llvm_unreachable("Bad flags!");
3961 case InlineAsm::Kind::RegUse:
3962 case InlineAsm::Kind::Imm:
3963 case InlineAsm::Kind::Mem:
3964 i += NumVals;
3965 break;
3966 case InlineAsm::Kind::Clobber:
3967 case InlineAsm::Kind::RegDef:
3968 case InlineAsm::Kind::RegDefEarlyClobber: {
3969 for (; NumVals; --NumVals, ++i) {
3970 Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
3971 if (Reg != PPC::LR && Reg != PPC::LR8)
3972 continue;
3973 MFI.setLRStoreRequired();
3974 return Op;
3975 }
3976 break;
3977 }
3978 }
3979 }
3980
3981 return Op;
3982}
3983
3984SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3985 SelectionDAG &DAG) const {
3986 if (Subtarget.isAIXABI())
3987 report_fatal_error(reason: "INIT_TRAMPOLINE operation is not supported on AIX.");
3988
3989 SDValue Chain = Op.getOperand(i: 0);
3990 SDValue Trmp = Op.getOperand(i: 1); // trampoline
3991 SDValue FPtr = Op.getOperand(i: 2); // nested function
3992 SDValue Nest = Op.getOperand(i: 3); // 'nest' parameter value
3993 SDLoc dl(Op);
3994
3995 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3996 bool isPPC64 = (PtrVT == MVT::i64);
3997 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: *DAG.getContext());
3998
3999 TargetLowering::ArgListTy Args;
4000 TargetLowering::ArgListEntry Entry;
4001
4002 Entry.Ty = IntPtrTy;
4003 Entry.Node = Trmp; Args.push_back(x: Entry);
4004
4005 // TrampSize == (isPPC64 ? 48 : 40);
4006 Entry.Node =
4007 DAG.getConstant(Val: isPPC64 ? 48 : 40, DL: dl, VT: Subtarget.getScalarIntVT());
4008 Args.push_back(x: Entry);
4009
4010 Entry.Node = FPtr; Args.push_back(x: Entry);
4011 Entry.Node = Nest; Args.push_back(x: Entry);
4012
4013 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4014 TargetLowering::CallLoweringInfo CLI(DAG);
4015 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4016 CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *DAG.getContext()),
4017 Target: DAG.getExternalSymbol(Sym: "__trampoline_setup", VT: PtrVT), ArgsList: std::move(Args));
4018
4019 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4020 return CallResult.second;
4021}
4022
4023SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4024 MachineFunction &MF = DAG.getMachineFunction();
4025 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4026 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4027
4028 SDLoc dl(Op);
4029
4030 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4031 // vastart just stores the address of the VarArgsFrameIndex slot into the
4032 // memory location argument.
4033 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4034 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
4035 return DAG.getStore(Chain: Op.getOperand(i: 0), dl, Val: FR, Ptr: Op.getOperand(i: 1),
4036 PtrInfo: MachinePointerInfo(SV));
4037 }
4038
4039 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4040 // We suppose the given va_list is already allocated.
4041 //
4042 // typedef struct {
4043 // char gpr; /* index into the array of 8 GPRs
4044 // * stored in the register save area
4045 // * gpr=0 corresponds to r3,
4046 // * gpr=1 to r4, etc.
4047 // */
4048 // char fpr; /* index into the array of 8 FPRs
4049 // * stored in the register save area
4050 // * fpr=0 corresponds to f1,
4051 // * fpr=1 to f2, etc.
4052 // */
4053 // char *overflow_arg_area;
4054 // /* location on stack that holds
4055 // * the next overflow argument
4056 // */
4057 // char *reg_save_area;
4058 // /* where r3:r10 and f1:f8 (if saved)
4059 // * are stored
4060 // */
4061 // } va_list[1];
4062
4063 SDValue ArgGPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumGPR(), DL: dl, VT: MVT::i32);
4064 SDValue ArgFPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumFPR(), DL: dl, VT: MVT::i32);
4065 SDValue StackOffsetFI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsStackOffset(),
4066 VT: PtrVT);
4067 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
4068 VT: PtrVT);
4069
4070 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4071 SDValue ConstFrameOffset = DAG.getConstant(Val: FrameOffset, DL: dl, VT: PtrVT);
4072
4073 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4074 SDValue ConstStackOffset = DAG.getConstant(Val: StackOffset, DL: dl, VT: PtrVT);
4075
4076 uint64_t FPROffset = 1;
4077 SDValue ConstFPROffset = DAG.getConstant(Val: FPROffset, DL: dl, VT: PtrVT);
4078
4079 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
4080
4081 // Store first byte : number of int regs
4082 SDValue firstStore =
4083 DAG.getTruncStore(Chain: Op.getOperand(i: 0), dl, Val: ArgGPR, Ptr: Op.getOperand(i: 1),
4084 PtrInfo: MachinePointerInfo(SV), SVT: MVT::i8);
4085 uint64_t nextOffset = FPROffset;
4086 SDValue nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Op.getOperand(i: 1),
4087 N2: ConstFPROffset);
4088
4089 // Store second byte : number of float regs
4090 SDValue secondStore =
4091 DAG.getTruncStore(Chain: firstStore, dl, Val: ArgFPR, Ptr: nextPtr,
4092 PtrInfo: MachinePointerInfo(SV, nextOffset), SVT: MVT::i8);
4093 nextOffset += StackOffset;
4094 nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstStackOffset);
4095
4096 // Store second word : arguments given on stack
4097 SDValue thirdStore = DAG.getStore(Chain: secondStore, dl, Val: StackOffsetFI, Ptr: nextPtr,
4098 PtrInfo: MachinePointerInfo(SV, nextOffset));
4099 nextOffset += FrameOffset;
4100 nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstFrameOffset);
4101
4102 // Store third word : arguments given in registers
4103 return DAG.getStore(Chain: thirdStore, dl, Val: FR, Ptr: nextPtr,
4104 PtrInfo: MachinePointerInfo(SV, nextOffset));
4105}
4106
4107/// FPR - The set of FP registers that should be allocated for arguments
4108/// on Darwin and AIX.
4109static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4110 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4111 PPC::F11, PPC::F12, PPC::F13};
4112
4113/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4114/// the stack.
4115static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4116 unsigned PtrByteSize) {
4117 unsigned ArgSize = ArgVT.getStoreSize();
4118 if (Flags.isByVal())
4119 ArgSize = Flags.getByValSize();
4120
4121 // Round up to multiples of the pointer size, except for array members,
4122 // which are always packed.
4123 if (!Flags.isInConsecutiveRegs())
4124 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4125
4126 return ArgSize;
4127}
4128
4129/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4130/// on the stack.
4131static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4132 ISD::ArgFlagsTy Flags,
4133 unsigned PtrByteSize) {
4134 Align Alignment(PtrByteSize);
4135
4136 // Altivec parameters are padded to a 16 byte boundary.
4137 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4138 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4139 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4140 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4141 Alignment = Align(16);
4142
4143 // ByVal parameters are aligned as requested.
4144 if (Flags.isByVal()) {
4145 auto BVAlign = Flags.getNonZeroByValAlign();
4146 if (BVAlign > PtrByteSize) {
4147 if (BVAlign.value() % PtrByteSize != 0)
4148 llvm_unreachable(
4149 "ByVal alignment is not a multiple of the pointer size");
4150
4151 Alignment = BVAlign;
4152 }
4153 }
4154
4155 // Array members are always packed to their original alignment.
4156 if (Flags.isInConsecutiveRegs()) {
4157 // If the array member was split into multiple registers, the first
4158 // needs to be aligned to the size of the full type. (Except for
4159 // ppcf128, which is only aligned as its f64 components.)
4160 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4161 Alignment = Align(OrigVT.getStoreSize());
4162 else
4163 Alignment = Align(ArgVT.getStoreSize());
4164 }
4165
4166 return Alignment;
4167}
4168
4169/// CalculateStackSlotUsed - Return whether this argument will use its
4170/// stack slot (instead of being passed in registers). ArgOffset,
4171/// AvailableFPRs, and AvailableVRs must hold the current argument
4172/// position, and will be updated to account for this argument.
4173static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4174 unsigned PtrByteSize, unsigned LinkageSize,
4175 unsigned ParamAreaSize, unsigned &ArgOffset,
4176 unsigned &AvailableFPRs,
4177 unsigned &AvailableVRs) {
4178 bool UseMemory = false;
4179
4180 // Respect alignment of argument on the stack.
4181 Align Alignment =
4182 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4183 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4184 // If there's no space left in the argument save area, we must
4185 // use memory (this check also catches zero-sized arguments).
4186 if (ArgOffset >= LinkageSize + ParamAreaSize)
4187 UseMemory = true;
4188
4189 // Allocate argument on the stack.
4190 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4191 if (Flags.isInConsecutiveRegsLast())
4192 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4193 // If we overran the argument save area, we must use memory
4194 // (this check catches arguments passed partially in memory)
4195 if (ArgOffset > LinkageSize + ParamAreaSize)
4196 UseMemory = true;
4197
4198 // However, if the argument is actually passed in an FPR or a VR,
4199 // we don't use memory after all.
4200 if (!Flags.isByVal()) {
4201 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4202 if (AvailableFPRs > 0) {
4203 --AvailableFPRs;
4204 return false;
4205 }
4206 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4207 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4208 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4209 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4210 if (AvailableVRs > 0) {
4211 --AvailableVRs;
4212 return false;
4213 }
4214 }
4215
4216 return UseMemory;
4217}
4218
4219/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4220/// ensure minimum alignment required for target.
4221static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4222 unsigned NumBytes) {
4223 return alignTo(Size: NumBytes, A: Lowering->getStackAlign());
4224}
4225
4226SDValue PPCTargetLowering::LowerFormalArguments(
4227 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4228 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4229 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4230 if (Subtarget.isAIXABI())
4231 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4232 InVals);
4233 if (Subtarget.is64BitELFABI())
4234 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4235 InVals);
4236 assert(Subtarget.is32BitELFABI());
4237 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4238 InVals);
4239}
4240
4241SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4242 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4243 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4244 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4245
4246 // 32-bit SVR4 ABI Stack Frame Layout:
4247 // +-----------------------------------+
4248 // +--> | Back chain |
4249 // | +-----------------------------------+
4250 // | | Floating-point register save area |
4251 // | +-----------------------------------+
4252 // | | General register save area |
4253 // | +-----------------------------------+
4254 // | | CR save word |
4255 // | +-----------------------------------+
4256 // | | VRSAVE save word |
4257 // | +-----------------------------------+
4258 // | | Alignment padding |
4259 // | +-----------------------------------+
4260 // | | Vector register save area |
4261 // | +-----------------------------------+
4262 // | | Local variable space |
4263 // | +-----------------------------------+
4264 // | | Parameter list area |
4265 // | +-----------------------------------+
4266 // | | LR save word |
4267 // | +-----------------------------------+
4268 // SP--> +--- | Back chain |
4269 // +-----------------------------------+
4270 //
4271 // Specifications:
4272 // System V Application Binary Interface PowerPC Processor Supplement
4273 // AltiVec Technology Programming Interface Manual
4274
4275 MachineFunction &MF = DAG.getMachineFunction();
4276 MachineFrameInfo &MFI = MF.getFrameInfo();
4277 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4278
4279 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4280 // Potential tail calls could cause overwriting of argument stack slots.
4281 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4282 (CallConv == CallingConv::Fast));
4283 const Align PtrAlign(4);
4284
4285 // Assign locations to all of the incoming arguments.
4286 SmallVector<CCValAssign, 16> ArgLocs;
4287 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4288 *DAG.getContext());
4289
4290 // Reserve space for the linkage area on the stack.
4291 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4292 CCInfo.AllocateStack(Size: LinkageSize, Alignment: PtrAlign);
4293 if (useSoftFloat())
4294 CCInfo.PreAnalyzeFormalArguments(Ins);
4295
4296 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4);
4297 CCInfo.clearWasPPCF128();
4298
4299 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4300 CCValAssign &VA = ArgLocs[i];
4301
4302 // Arguments stored in registers.
4303 if (VA.isRegLoc()) {
4304 const TargetRegisterClass *RC;
4305 EVT ValVT = VA.getValVT();
4306
4307 switch (ValVT.getSimpleVT().SimpleTy) {
4308 default:
4309 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4310 case MVT::i1:
4311 case MVT::i32:
4312 RC = &PPC::GPRCRegClass;
4313 break;
4314 case MVT::f32:
4315 if (Subtarget.hasP8Vector())
4316 RC = &PPC::VSSRCRegClass;
4317 else if (Subtarget.hasSPE())
4318 RC = &PPC::GPRCRegClass;
4319 else
4320 RC = &PPC::F4RCRegClass;
4321 break;
4322 case MVT::f64:
4323 if (Subtarget.hasVSX())
4324 RC = &PPC::VSFRCRegClass;
4325 else if (Subtarget.hasSPE())
4326 // SPE passes doubles in GPR pairs.
4327 RC = &PPC::GPRCRegClass;
4328 else
4329 RC = &PPC::F8RCRegClass;
4330 break;
4331 case MVT::v16i8:
4332 case MVT::v8i16:
4333 case MVT::v4i32:
4334 RC = &PPC::VRRCRegClass;
4335 break;
4336 case MVT::v4f32:
4337 RC = &PPC::VRRCRegClass;
4338 break;
4339 case MVT::v2f64:
4340 case MVT::v2i64:
4341 RC = &PPC::VRRCRegClass;
4342 break;
4343 }
4344
4345 SDValue ArgValue;
4346 // Transform the arguments stored in physical registers into
4347 // virtual ones.
4348 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4349 assert(i + 1 < e && "No second half of double precision argument");
4350 Register RegLo = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4351 Register RegHi = MF.addLiveIn(PReg: ArgLocs[++i].getLocReg(), RC);
4352 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, Reg: RegLo, VT: MVT::i32);
4353 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, Reg: RegHi, VT: MVT::i32);
4354 if (!Subtarget.isLittleEndian())
4355 std::swap (a&: ArgValueLo, b&: ArgValueHi);
4356 ArgValue = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: ArgValueLo,
4357 N2: ArgValueHi);
4358 } else {
4359 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4360 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4361 VT: ValVT == MVT::i1 ? MVT::i32 : ValVT);
4362 if (ValVT == MVT::i1)
4363 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: ArgValue);
4364 }
4365
4366 InVals.push_back(Elt: ArgValue);
4367 } else {
4368 // Argument stored in memory.
4369 assert(VA.isMemLoc());
4370
4371 // Get the extended size of the argument type in stack
4372 unsigned ArgSize = VA.getLocVT().getStoreSize();
4373 // Get the actual size of the argument type
4374 unsigned ObjSize = VA.getValVT().getStoreSize();
4375 unsigned ArgOffset = VA.getLocMemOffset();
4376 // Stack objects in PPC32 are right justified.
4377 ArgOffset += ArgSize - ObjSize;
4378 int FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: isImmutable);
4379
4380 // Create load nodes to retrieve arguments from the stack.
4381 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4382 InVals.push_back(
4383 Elt: DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo()));
4384 }
4385 }
4386
4387 // Assign locations to all of the incoming aggregate by value arguments.
4388 // Aggregates passed by value are stored in the local variable space of the
4389 // caller's stack frame, right above the parameter list area.
4390 SmallVector<CCValAssign, 16> ByValArgLocs;
4391 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4392 ByValArgLocs, *DAG.getContext());
4393
4394 // Reserve stack space for the allocations in CCInfo.
4395 CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
4396
4397 CCByValInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4_ByVal);
4398
4399 // Area that is at least reserved in the caller of this function.
4400 unsigned MinReservedArea = CCByValInfo.getStackSize();
4401 MinReservedArea = std::max(a: MinReservedArea, b: LinkageSize);
4402
4403 // Set the size that is at least reserved in caller of this function. Tail
4404 // call optimized function's reserved stack space needs to be aligned so that
4405 // taking the difference between two stack areas will result in an aligned
4406 // stack.
4407 MinReservedArea =
4408 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4409 FuncInfo->setMinReservedArea(MinReservedArea);
4410
4411 SmallVector<SDValue, 8> MemOps;
4412
4413 // If the function takes variable number of arguments, make a frame index for
4414 // the start of the first vararg value... for expansion of llvm.va_start.
4415 if (isVarArg) {
4416 static const MCPhysReg GPArgRegs[] = {
4417 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4418 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4419 };
4420 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4421
4422 static const MCPhysReg FPArgRegs[] = {
4423 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4424 PPC::F8
4425 };
4426 unsigned NumFPArgRegs = std::size(FPArgRegs);
4427
4428 if (useSoftFloat() || hasSPE())
4429 NumFPArgRegs = 0;
4430
4431 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(Regs: GPArgRegs));
4432 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(Regs: FPArgRegs));
4433
4434 // Make room for NumGPArgRegs and NumFPArgRegs.
4435 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4436 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4437
4438 FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4439 Size: PtrVT.getSizeInBits() / 8, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
4440
4441 FuncInfo->setVarArgsFrameIndex(
4442 MFI.CreateStackObject(Size: Depth, Alignment: Align(8), isSpillSlot: false));
4443 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4444
4445 // The fixed integer arguments of a variadic function are stored to the
4446 // VarArgsFrameIndex on the stack so that they may be loaded by
4447 // dereferencing the result of va_next.
4448 for (MCPhysReg GPArgReg : GPArgRegs) {
4449 // Get an existing live-in vreg, or add a new one.
4450 Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: GPArgReg);
4451 if (!VReg)
4452 VReg = MF.addLiveIn(PReg: GPArgReg, RC: &PPC::GPRCRegClass);
4453
4454 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4455 SDValue Store =
4456 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4457 MemOps.push_back(Elt: Store);
4458 // Increment the address by four for the next argument to store
4459 SDValue PtrOff = DAG.getConstant(Val: PtrVT.getSizeInBits()/8, DL: dl, VT: PtrVT);
4460 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4461 }
4462
4463 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4464 // is set.
4465 // The double arguments are stored to the VarArgsFrameIndex
4466 // on the stack.
4467 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4468 // Get an existing live-in vreg, or add a new one.
4469 Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: FPArgRegs[FPRIndex]);
4470 if (!VReg)
4471 VReg = MF.addLiveIn(PReg: FPArgRegs[FPRIndex], RC: &PPC::F8RCRegClass);
4472
4473 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::f64);
4474 SDValue Store =
4475 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4476 MemOps.push_back(Elt: Store);
4477 // Increment the address by eight for the next argument to store
4478 SDValue PtrOff = DAG.getConstant(Val: MVT(MVT::f64).getSizeInBits()/8, DL: dl,
4479 VT: PtrVT);
4480 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4481 }
4482 }
4483
4484 if (!MemOps.empty())
4485 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4486
4487 return Chain;
4488}
4489
4490// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4491// value to MVT::i64 and then truncate to the correct register size.
4492SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4493 EVT ObjectVT, SelectionDAG &DAG,
4494 SDValue ArgVal,
4495 const SDLoc &dl) const {
4496 if (Flags.isSExt())
4497 ArgVal = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: MVT::i64, N1: ArgVal,
4498 N2: DAG.getValueType(ObjectVT));
4499 else if (Flags.isZExt())
4500 ArgVal = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: MVT::i64, N1: ArgVal,
4501 N2: DAG.getValueType(ObjectVT));
4502
4503 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ObjectVT, Operand: ArgVal);
4504}
4505
4506SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4507 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4508 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4509 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4510 // TODO: add description of PPC stack frame format, or at least some docs.
4511 //
4512 bool isELFv2ABI = Subtarget.isELFv2ABI();
4513 bool isLittleEndian = Subtarget.isLittleEndian();
4514 MachineFunction &MF = DAG.getMachineFunction();
4515 MachineFrameInfo &MFI = MF.getFrameInfo();
4516 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4517
4518 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4519 "fastcc not supported on varargs functions");
4520
4521 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4522 // Potential tail calls could cause overwriting of argument stack slots.
4523 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4524 (CallConv == CallingConv::Fast));
4525 unsigned PtrByteSize = 8;
4526 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4527
4528 static const MCPhysReg GPR[] = {
4529 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4530 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4531 };
4532 static const MCPhysReg VR[] = {
4533 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4534 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4535 };
4536
4537 const unsigned Num_GPR_Regs = std::size(GPR);
4538 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4539 const unsigned Num_VR_Regs = std::size(VR);
4540
4541 // Do a first pass over the arguments to determine whether the ABI
4542 // guarantees that our caller has allocated the parameter save area
4543 // on its stack frame. In the ELFv1 ABI, this is always the case;
4544 // in the ELFv2 ABI, it is true if this is a vararg function or if
4545 // any parameter is located in a stack slot.
4546
4547 bool HasParameterArea = !isELFv2ABI || isVarArg;
4548 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4549 unsigned NumBytes = LinkageSize;
4550 unsigned AvailableFPRs = Num_FPR_Regs;
4551 unsigned AvailableVRs = Num_VR_Regs;
4552 for (const ISD::InputArg &In : Ins) {
4553 if (In.Flags.isNest())
4554 continue;
4555
4556 if (CalculateStackSlotUsed(ArgVT: In.VT, OrigVT: In.ArgVT, Flags: In.Flags, PtrByteSize,
4557 LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4558 AvailableFPRs, AvailableVRs))
4559 HasParameterArea = true;
4560 }
4561
4562 // Add DAG nodes to load the arguments or copy them out of registers. On
4563 // entry to a function on PPC, the arguments start after the linkage area,
4564 // although the first ones are often in registers.
4565
4566 unsigned ArgOffset = LinkageSize;
4567 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4568 SmallVector<SDValue, 8> MemOps;
4569 Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4570 unsigned CurArgIdx = 0;
4571 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4572 SDValue ArgVal;
4573 bool needsLoad = false;
4574 EVT ObjectVT = Ins[ArgNo].VT;
4575 EVT OrigVT = Ins[ArgNo].ArgVT;
4576 unsigned ObjSize = ObjectVT.getStoreSize();
4577 unsigned ArgSize = ObjSize;
4578 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4579 if (Ins[ArgNo].isOrigArg()) {
4580 std::advance(i&: FuncArg, n: Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4581 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4582 }
4583 // We re-align the argument offset for each argument, except when using the
4584 // fast calling convention, when we need to make sure we do that only when
4585 // we'll actually use a stack slot.
4586 unsigned CurArgOffset;
4587 Align Alignment;
4588 auto ComputeArgOffset = [&]() {
4589 /* Respect alignment of argument on the stack. */
4590 Alignment =
4591 CalculateStackSlotAlignment(ArgVT: ObjectVT, OrigVT, Flags, PtrByteSize);
4592 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4593 CurArgOffset = ArgOffset;
4594 };
4595
4596 if (CallConv != CallingConv::Fast) {
4597 ComputeArgOffset();
4598
4599 /* Compute GPR index associated with argument offset. */
4600 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4601 GPR_idx = std::min(a: GPR_idx, b: Num_GPR_Regs);
4602 }
4603
4604 // FIXME the codegen can be much improved in some cases.
4605 // We do not have to keep everything in memory.
4606 if (Flags.isByVal()) {
4607 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4608
4609 if (CallConv == CallingConv::Fast)
4610 ComputeArgOffset();
4611
4612 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4613 ObjSize = Flags.getByValSize();
4614 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4615 // Empty aggregate parameters do not take up registers. Examples:
4616 // struct { } a;
4617 // union { } b;
4618 // int c[0];
4619 // etc. However, we have to provide a place-holder in InVals, so
4620 // pretend we have an 8-byte item at the current address for that
4621 // purpose.
4622 if (!ObjSize) {
4623 int FI = MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: ArgOffset, IsImmutable: true);
4624 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4625 InVals.push_back(Elt: FIN);
4626 continue;
4627 }
4628
4629 // Create a stack object covering all stack doublewords occupied
4630 // by the argument. If the argument is (fully or partially) on
4631 // the stack, or if the argument is fully in registers but the
4632 // caller has allocated the parameter save anyway, we can refer
4633 // directly to the caller's stack frame. Otherwise, create a
4634 // local copy in our own frame.
4635 int FI;
4636 if (HasParameterArea ||
4637 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4638 FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: false, isAliased: true);
4639 else
4640 FI = MFI.CreateStackObject(Size: ArgSize, Alignment, isSpillSlot: false);
4641 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4642
4643 // Handle aggregates smaller than 8 bytes.
4644 if (ObjSize < PtrByteSize) {
4645 // The value of the object is its address, which differs from the
4646 // address of the enclosing doubleword on big-endian systems.
4647 SDValue Arg = FIN;
4648 if (!isLittleEndian) {
4649 SDValue ArgOff = DAG.getConstant(Val: PtrByteSize - ObjSize, DL: dl, VT: PtrVT);
4650 Arg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ArgOff.getValueType(), N1: Arg, N2: ArgOff);
4651 }
4652 InVals.push_back(Elt: Arg);
4653
4654 if (GPR_idx != Num_GPR_Regs) {
4655 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4656 FuncInfo->addLiveInAttr(VReg, Flags);
4657 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4658 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ObjSize * 8);
4659 SDValue Store =
4660 DAG.getTruncStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: Arg,
4661 PtrInfo: MachinePointerInfo(&*FuncArg), SVT: ObjType);
4662 MemOps.push_back(Elt: Store);
4663 }
4664 // Whether we copied from a register or not, advance the offset
4665 // into the parameter save area by a full doubleword.
4666 ArgOffset += PtrByteSize;
4667 continue;
4668 }
4669
4670 // The value of the object is its address, which is the address of
4671 // its first stack doubleword.
4672 InVals.push_back(Elt: FIN);
4673
4674 // Store whatever pieces of the object are in registers to memory.
4675 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4676 if (GPR_idx == Num_GPR_Regs)
4677 break;
4678
4679 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4680 FuncInfo->addLiveInAttr(VReg, Flags);
4681 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4682 SDValue Addr = FIN;
4683 if (j) {
4684 SDValue Off = DAG.getConstant(Val: j, DL: dl, VT: PtrVT);
4685 Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Off.getValueType(), N1: Addr, N2: Off);
4686 }
4687 unsigned StoreSizeInBits = std::min(a: PtrByteSize, b: (ObjSize - j)) * 8;
4688 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreSizeInBits);
4689 SDValue Store =
4690 DAG.getTruncStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: Addr,
4691 PtrInfo: MachinePointerInfo(&*FuncArg, j), SVT: ObjType);
4692 MemOps.push_back(Elt: Store);
4693 ++GPR_idx;
4694 }
4695 ArgOffset += ArgSize;
4696 continue;
4697 }
4698
4699 switch (ObjectVT.getSimpleVT().SimpleTy) {
4700 default: llvm_unreachable("Unhandled argument type!");
4701 case MVT::i1:
4702 case MVT::i32:
4703 case MVT::i64:
4704 if (Flags.isNest()) {
4705 // The 'nest' parameter, if any, is passed in R11.
4706 Register VReg = MF.addLiveIn(PReg: PPC::X11, RC: &PPC::G8RCRegClass);
4707 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4708
4709 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4710 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4711
4712 break;
4713 }
4714
4715 // These can be scalar arguments or elements of an integer array type
4716 // passed directly. Clang may use those instead of "byval" aggregate
4717 // types to avoid forcing arguments to memory unnecessarily.
4718 if (GPR_idx != Num_GPR_Regs) {
4719 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4720 FuncInfo->addLiveInAttr(VReg, Flags);
4721 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4722
4723 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4724 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4725 // value to MVT::i64 and then truncate to the correct register size.
4726 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4727 } else {
4728 if (CallConv == CallingConv::Fast)
4729 ComputeArgOffset();
4730
4731 needsLoad = true;
4732 ArgSize = PtrByteSize;
4733 }
4734 if (CallConv != CallingConv::Fast || needsLoad)
4735 ArgOffset += 8;
4736 break;
4737
4738 case MVT::f32:
4739 case MVT::f64:
4740 // These can be scalar arguments or elements of a float array type
4741 // passed directly. The latter are used to implement ELFv2 homogenous
4742 // float aggregates.
4743 if (FPR_idx != Num_FPR_Regs) {
4744 unsigned VReg;
4745
4746 if (ObjectVT == MVT::f32)
4747 VReg = MF.addLiveIn(PReg: FPR[FPR_idx],
4748 RC: Subtarget.hasP8Vector()
4749 ? &PPC::VSSRCRegClass
4750 : &PPC::F4RCRegClass);
4751 else
4752 VReg = MF.addLiveIn(PReg: FPR[FPR_idx], RC: Subtarget.hasVSX()
4753 ? &PPC::VSFRCRegClass
4754 : &PPC::F8RCRegClass);
4755
4756 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4757 ++FPR_idx;
4758 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4759 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4760 // once we support fp <-> gpr moves.
4761
4762 // This can only ever happen in the presence of f32 array types,
4763 // since otherwise we never run out of FPRs before running out
4764 // of GPRs.
4765 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4766 FuncInfo->addLiveInAttr(VReg, Flags);
4767 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4768
4769 if (ObjectVT == MVT::f32) {
4770 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4771 ArgVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgVal,
4772 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
4773 ArgVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: ArgVal);
4774 }
4775
4776 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ObjectVT, Operand: ArgVal);
4777 } else {
4778 if (CallConv == CallingConv::Fast)
4779 ComputeArgOffset();
4780
4781 needsLoad = true;
4782 }
4783
4784 // When passing an array of floats, the array occupies consecutive
4785 // space in the argument area; only round up to the next doubleword
4786 // at the end of the array. Otherwise, each float takes 8 bytes.
4787 if (CallConv != CallingConv::Fast || needsLoad) {
4788 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4789 ArgOffset += ArgSize;
4790 if (Flags.isInConsecutiveRegsLast())
4791 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4792 }
4793 break;
4794 case MVT::v4f32:
4795 case MVT::v4i32:
4796 case MVT::v8i16:
4797 case MVT::v16i8:
4798 case MVT::v2f64:
4799 case MVT::v2i64:
4800 case MVT::v1i128:
4801 case MVT::f128:
4802 // These can be scalar arguments or elements of a vector array type
4803 // passed directly. The latter are used to implement ELFv2 homogenous
4804 // vector aggregates.
4805 if (VR_idx != Num_VR_Regs) {
4806 Register VReg = MF.addLiveIn(PReg: VR[VR_idx], RC: &PPC::VRRCRegClass);
4807 ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4808 ++VR_idx;
4809 } else {
4810 if (CallConv == CallingConv::Fast)
4811 ComputeArgOffset();
4812 needsLoad = true;
4813 }
4814 if (CallConv != CallingConv::Fast || needsLoad)
4815 ArgOffset += 16;
4816 break;
4817 }
4818
4819 // We need to load the argument to a virtual register if we determined
4820 // above that we ran out of physical registers of the appropriate type.
4821 if (needsLoad) {
4822 if (ObjSize < ArgSize && !isLittleEndian)
4823 CurArgOffset += ArgSize - ObjSize;
4824 int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: CurArgOffset, IsImmutable: isImmutable);
4825 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4826 ArgVal = DAG.getLoad(VT: ObjectVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo());
4827 }
4828
4829 InVals.push_back(Elt: ArgVal);
4830 }
4831
4832 // Area that is at least reserved in the caller of this function.
4833 unsigned MinReservedArea;
4834 if (HasParameterArea)
4835 MinReservedArea = std::max(a: ArgOffset, b: LinkageSize + 8 * PtrByteSize);
4836 else
4837 MinReservedArea = LinkageSize;
4838
4839 // Set the size that is at least reserved in caller of this function. Tail
4840 // call optimized functions' reserved stack space needs to be aligned so that
4841 // taking the difference between two stack areas will result in an aligned
4842 // stack.
4843 MinReservedArea =
4844 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4845 FuncInfo->setMinReservedArea(MinReservedArea);
4846
4847 // If the function takes variable number of arguments, make a frame index for
4848 // the start of the first vararg value... for expansion of llvm.va_start.
4849 // On ELFv2ABI spec, it writes:
4850 // C programs that are intended to be *portable* across different compilers
4851 // and architectures must use the header file <stdarg.h> to deal with variable
4852 // argument lists.
4853 if (isVarArg && MFI.hasVAStart()) {
4854 int Depth = ArgOffset;
4855
4856 FuncInfo->setVarArgsFrameIndex(
4857 MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: Depth, IsImmutable: true));
4858 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4859
4860 // If this function is vararg, store any remaining integer argument regs
4861 // to their spots on the stack so that they may be loaded by dereferencing
4862 // the result of va_next.
4863 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4864 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4865 Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4866 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4867 SDValue Store =
4868 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
4869 MemOps.push_back(Elt: Store);
4870 // Increment the address by four for the next argument to store
4871 SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
4872 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4873 }
4874 }
4875
4876 if (!MemOps.empty())
4877 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4878
4879 return Chain;
4880}
4881
4882/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4883/// adjusted to accommodate the arguments for the tailcall.
4884static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4885 unsigned ParamSize) {
4886
4887 if (!isTailCall) return 0;
4888
4889 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4890 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4891 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4892 // Remember only if the new adjustment is bigger.
4893 if (SPDiff < FI->getTailCallSPDelta())
4894 FI->setTailCallSPDelta(SPDiff);
4895
4896 return SPDiff;
4897}
4898
4899static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4900
4901static bool callsShareTOCBase(const Function *Caller,
4902 const GlobalValue *CalleeGV,
4903 const TargetMachine &TM) {
4904 // It does not make sense to call callsShareTOCBase() with a caller that
4905 // is PC Relative since PC Relative callers do not have a TOC.
4906#ifndef NDEBUG
4907 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4908 assert(!STICaller->isUsingPCRelativeCalls() &&
4909 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4910#endif
4911
4912 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4913 // don't have enough information to determine if the caller and callee share
4914 // the same TOC base, so we have to pessimistically assume they don't for
4915 // correctness.
4916 if (!CalleeGV)
4917 return false;
4918
4919 // If the callee is preemptable, then the static linker will use a plt-stub
4920 // which saves the toc to the stack, and needs a nop after the call
4921 // instruction to convert to a toc-restore.
4922 if (!TM.shouldAssumeDSOLocal(GV: CalleeGV))
4923 return false;
4924
4925 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4926 // We may need a TOC restore in the situation where the caller requires a
4927 // valid TOC but the callee is PC Relative and does not.
4928 const Function *F = dyn_cast<Function>(Val: CalleeGV);
4929 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(Val: CalleeGV);
4930
4931 // If we have an Alias we can try to get the function from there.
4932 if (Alias) {
4933 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4934 F = dyn_cast<Function>(Val: GlobalObj);
4935 }
4936
4937 // If we still have no valid function pointer we do not have enough
4938 // information to determine if the callee uses PC Relative calls so we must
4939 // assume that it does.
4940 if (!F)
4941 return false;
4942
4943 // If the callee uses PC Relative we cannot guarantee that the callee won't
4944 // clobber the TOC of the caller and so we must assume that the two
4945 // functions do not share a TOC base.
4946 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(F: *F);
4947 if (STICallee->isUsingPCRelativeCalls())
4948 return false;
4949
4950 // If the GV is not a strong definition then we need to assume it can be
4951 // replaced by another function at link time. The function that replaces
4952 // it may not share the same TOC as the caller since the callee may be
4953 // replaced by a PC Relative version of the same function.
4954 if (!CalleeGV->isStrongDefinitionForLinker())
4955 return false;
4956
4957 // The medium and large code models are expected to provide a sufficiently
4958 // large TOC to provide all data addressing needs of a module with a
4959 // single TOC.
4960 if (CodeModel::Medium == TM.getCodeModel() ||
4961 CodeModel::Large == TM.getCodeModel())
4962 return true;
4963
4964 // Any explicitly-specified sections and section prefixes must also match.
4965 // Also, if we're using -ffunction-sections, then each function is always in
4966 // a different section (the same is true for COMDAT functions).
4967 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4968 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4969 return false;
4970 if (const auto *F = dyn_cast<Function>(Val: CalleeGV)) {
4971 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4972 return false;
4973 }
4974
4975 return true;
4976}
4977
4978static bool
4979needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4980 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4981 assert(Subtarget.is64BitELFABI());
4982
4983 const unsigned PtrByteSize = 8;
4984 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4985
4986 static const MCPhysReg GPR[] = {
4987 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4988 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4989 };
4990 static const MCPhysReg VR[] = {
4991 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4992 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4993 };
4994
4995 const unsigned NumGPRs = std::size(GPR);
4996 const unsigned NumFPRs = 13;
4997 const unsigned NumVRs = std::size(VR);
4998 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4999
5000 unsigned NumBytes = LinkageSize;
5001 unsigned AvailableFPRs = NumFPRs;
5002 unsigned AvailableVRs = NumVRs;
5003
5004 for (const ISD::OutputArg& Param : Outs) {
5005 if (Param.Flags.isNest()) continue;
5006
5007 if (CalculateStackSlotUsed(ArgVT: Param.VT, OrigVT: Param.ArgVT, Flags: Param.Flags, PtrByteSize,
5008 LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
5009 AvailableFPRs, AvailableVRs))
5010 return true;
5011 }
5012 return false;
5013}
5014
5015static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5016 if (CB.arg_size() != CallerFn->arg_size())
5017 return false;
5018
5019 auto CalleeArgIter = CB.arg_begin();
5020 auto CalleeArgEnd = CB.arg_end();
5021 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5022
5023 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5024 const Value* CalleeArg = *CalleeArgIter;
5025 const Value* CallerArg = &(*CallerArgIter);
5026 if (CalleeArg == CallerArg)
5027 continue;
5028
5029 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5030 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5031 // }
5032 // 1st argument of callee is undef and has the same type as caller.
5033 if (CalleeArg->getType() == CallerArg->getType() &&
5034 isa<UndefValue>(Val: CalleeArg))
5035 continue;
5036
5037 return false;
5038 }
5039
5040 return true;
5041}
5042
5043// Returns true if TCO is possible between the callers and callees
5044// calling conventions.
5045static bool
5046areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
5047 CallingConv::ID CalleeCC) {
5048 // Tail calls are possible with fastcc and ccc.
5049 auto isTailCallableCC = [] (CallingConv::ID CC){
5050 return CC == CallingConv::C || CC == CallingConv::Fast;
5051 };
5052 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5053 return false;
5054
5055 // We can safely tail call both fastcc and ccc callees from a c calling
5056 // convention caller. If the caller is fastcc, we may have less stack space
5057 // than a non-fastcc caller with the same signature so disable tail-calls in
5058 // that case.
5059 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5060}
5061
5062bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5063 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5064 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5065 const SmallVectorImpl<ISD::OutputArg> &Outs,
5066 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5067 bool isCalleeExternalSymbol) const {
5068 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5069
5070 if (DisableSCO && !TailCallOpt) return false;
5071
5072 // Variadic argument functions are not supported.
5073 if (isVarArg) return false;
5074
5075 // Check that the calling conventions are compatible for tco.
5076 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5077 return false;
5078
5079 // Caller contains any byval parameter is not supported.
5080 if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5081 return false;
5082
5083 // Callee contains any byval parameter is not supported, too.
5084 // Note: This is a quick work around, because in some cases, e.g.
5085 // caller's stack size > callee's stack size, we are still able to apply
5086 // sibling call optimization. For example, gcc is able to do SCO for caller1
5087 // in the following example, but not for caller2.
5088 // struct test {
5089 // long int a;
5090 // char ary[56];
5091 // } gTest;
5092 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5093 // b->a = v.a;
5094 // return 0;
5095 // }
5096 // void caller1(struct test a, struct test c, struct test *b) {
5097 // callee(gTest, b); }
5098 // void caller2(struct test *b) { callee(gTest, b); }
5099 if (any_of(Range: Outs, P: [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5100 return false;
5101
5102 // If callee and caller use different calling conventions, we cannot pass
5103 // parameters on stack since offsets for the parameter area may be different.
5104 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5105 return false;
5106
5107 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5108 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5109 // callee potentially have different TOC bases then we cannot tail call since
5110 // we need to restore the TOC pointer after the call.
5111 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5112 // We cannot guarantee this for indirect calls or calls to external functions.
5113 // When PC-Relative addressing is used, the concept of the TOC is no longer
5114 // applicable so this check is not required.
5115 // Check first for indirect calls.
5116 if (!Subtarget.isUsingPCRelativeCalls() &&
5117 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5118 return false;
5119
5120 // Check if we share the TOC base.
5121 if (!Subtarget.isUsingPCRelativeCalls() &&
5122 !callsShareTOCBase(Caller: CallerFunc, CalleeGV, TM: getTargetMachine()))
5123 return false;
5124
5125 // TCO allows altering callee ABI, so we don't have to check further.
5126 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5127 return true;
5128
5129 if (DisableSCO) return false;
5130
5131 // If callee use the same argument list that caller is using, then we can
5132 // apply SCO on this case. If it is not, then we need to check if callee needs
5133 // stack for passing arguments.
5134 // PC Relative tail calls may not have a CallBase.
5135 // If there is no CallBase we cannot verify if we have the same argument
5136 // list so assume that we don't have the same argument list.
5137 if (CB && !hasSameArgumentList(CallerFn: CallerFunc, CB: *CB) &&
5138 needStackSlotPassParameters(Subtarget, Outs))
5139 return false;
5140 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5141 return false;
5142
5143 return true;
5144}
5145
5146/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5147/// for tail call optimization. Targets which want to do tail call
5148/// optimization should implement this function.
5149bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5150 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5151 CallingConv::ID CallerCC, bool isVarArg,
5152 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5153 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5154 return false;
5155
5156 // Variable argument functions are not supported.
5157 if (isVarArg)
5158 return false;
5159
5160 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5161 // Functions containing by val parameters are not supported.
5162 if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5163 return false;
5164
5165 // Non-PIC/GOT tail calls are supported.
5166 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5167 return true;
5168
5169 // At the moment we can only do local tail calls (in same module, hidden
5170 // or protected) if we are generating PIC.
5171 if (CalleeGV)
5172 return CalleeGV->hasHiddenVisibility() ||
5173 CalleeGV->hasProtectedVisibility();
5174 }
5175
5176 return false;
5177}
5178
5179/// isCallCompatibleAddress - Return the immediate to use if the specified
5180/// 32-bit value is representable in the immediate field of a BxA instruction.
5181static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5182 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Op);
5183 if (!C) return nullptr;
5184
5185 int Addr = C->getZExtValue();
5186 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5187 SignExtend32<26>(X: Addr) != Addr)
5188 return nullptr; // Top 6 bits have to be sext of immediate.
5189
5190 return DAG
5191 .getSignedConstant(
5192 Val: (int)C->getZExtValue() >> 2, DL: SDLoc(Op),
5193 VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()))
5194 .getNode();
5195}
5196
5197namespace {
5198
5199struct TailCallArgumentInfo {
5200 SDValue Arg;
5201 SDValue FrameIdxOp;
5202 int FrameIdx = 0;
5203
5204 TailCallArgumentInfo() = default;
5205};
5206
5207} // end anonymous namespace
5208
5209/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5210static void StoreTailCallArgumentsToStackSlot(
5211 SelectionDAG &DAG, SDValue Chain,
5212 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5213 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5214 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5215 SDValue Arg = TailCallArgs[i].Arg;
5216 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5217 int FI = TailCallArgs[i].FrameIdx;
5218 // Store relative to framepointer.
5219 MemOpChains.push_back(Elt: DAG.getStore(
5220 Chain, dl, Val: Arg, Ptr: FIN,
5221 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
5222 }
5223}
5224
5225/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5226/// the appropriate stack slot for the tail call optimized function call.
5227static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5228 SDValue OldRetAddr, SDValue OldFP,
5229 int SPDiff, const SDLoc &dl) {
5230 if (SPDiff) {
5231 // Calculate the new stack slot for the return address.
5232 MachineFunction &MF = DAG.getMachineFunction();
5233 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5234 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5235 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5236 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5237 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(Size: SlotSize,
5238 SPOffset: NewRetAddrLoc, IsImmutable: true);
5239 SDValue NewRetAddrFrIdx =
5240 DAG.getFrameIndex(FI: NewRetAddr, VT: Subtarget.getScalarIntVT());
5241 Chain = DAG.getStore(Chain, dl, Val: OldRetAddr, Ptr: NewRetAddrFrIdx,
5242 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: NewRetAddr));
5243 }
5244 return Chain;
5245}
5246
5247/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5248/// the position of the argument.
5249static void CalculateTailCallArgDest(
5250 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5251 int SPDiff, unsigned ArgOffset,
5252 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5253 int Offset = ArgOffset + SPDiff;
5254 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5255 int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
5256 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5257 SDValue FIN = DAG.getFrameIndex(FI, VT);
5258 TailCallArgumentInfo Info;
5259 Info.Arg = Arg;
5260 Info.FrameIdxOp = FIN;
5261 Info.FrameIdx = FI;
5262 TailCallArguments.push_back(Elt: Info);
5263}
5264
5265/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5266/// stack slot. Returns the chain as result and the loaded frame pointers in
5267/// LROpOut/FPOpout. Used when tail calling.
5268SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5269 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5270 SDValue &FPOpOut, const SDLoc &dl) const {
5271 if (SPDiff) {
5272 // Load the LR and FP stack slot for later adjusting.
5273 LROpOut = getReturnAddrFrameIndex(DAG);
5274 LROpOut = DAG.getLoad(VT: Subtarget.getScalarIntVT(), dl, Chain, Ptr: LROpOut,
5275 PtrInfo: MachinePointerInfo());
5276 Chain = SDValue(LROpOut.getNode(), 1);
5277 }
5278 return Chain;
5279}
5280
5281/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5282/// by "Src" to address "Dst" of size "Size". Alignment information is
5283/// specified by the specific parameter attribute. The copy will be passed as
5284/// a byval function parameter.
5285/// Sometimes what we are copying is the end of a larger object, the part that
5286/// does not fit in registers.
5287static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5288 SDValue Chain, ISD::ArgFlagsTy Flags,
5289 SelectionDAG &DAG, const SDLoc &dl) {
5290 SDValue SizeNode = DAG.getConstant(Val: Flags.getByValSize(), DL: dl, VT: MVT::i32);
5291 return DAG.getMemcpy(
5292 Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(), isVol: false, AlwaysInline: false,
5293 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
5294}
5295
5296/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5297/// tail calls.
5298static void LowerMemOpCallTo(
5299 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5300 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5301 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5302 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5303 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5304 if (!isTailCall) {
5305 if (isVector) {
5306 SDValue StackPtr;
5307 if (isPPC64)
5308 StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
5309 else
5310 StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5311 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr,
5312 N2: DAG.getConstant(Val: ArgOffset, DL: dl, VT: PtrVT));
5313 }
5314 MemOpChains.push_back(
5315 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
5316 // Calculate and remember argument location.
5317 } else
5318 CalculateTailCallArgDest(DAG, MF, IsPPC64: isPPC64, Arg, SPDiff, ArgOffset,
5319 TailCallArguments);
5320}
5321
5322static void
5323PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5324 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5325 SDValue FPOp,
5326 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5327 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5328 // might overwrite each other in case of tail call optimization.
5329 SmallVector<SDValue, 8> MemOpChains2;
5330 // Do not flag preceding copytoreg stuff together with the following stuff.
5331 InGlue = SDValue();
5332 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArgs: TailCallArguments,
5333 MemOpChains&: MemOpChains2, dl);
5334 if (!MemOpChains2.empty())
5335 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
5336
5337 // Store the return address to the appropriate stack slot.
5338 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, OldRetAddr: LROp, OldFP: FPOp, SPDiff, dl);
5339
5340 // Emit callseq_end just before tailcall node.
5341 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue: InGlue, DL: dl);
5342 InGlue = Chain.getValue(R: 1);
5343}
5344
5345// Is this global address that of a function that can be called by name? (as
5346// opposed to something that must hold a descriptor for an indirect call).
5347static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5348 if (GV) {
5349 if (GV->isThreadLocal())
5350 return false;
5351
5352 return GV->getValueType()->isFunctionTy();
5353 }
5354
5355 return false;
5356}
5357
5358SDValue PPCTargetLowering::LowerCallResult(
5359 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5360 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5361 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5362 SmallVector<CCValAssign, 16> RVLocs;
5363 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5364 *DAG.getContext());
5365
5366 CCRetInfo.AnalyzeCallResult(
5367 Ins, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5368 ? RetCC_PPC_Cold
5369 : RetCC_PPC);
5370
5371 // Copy all of the result registers out of their specified physreg.
5372 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5373 CCValAssign &VA = RVLocs[i];
5374 assert(VA.isRegLoc() && "Can only return in registers!");
5375
5376 SDValue Val;
5377
5378 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5379 SDValue Lo = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5380 Glue: InGlue);
5381 Chain = Lo.getValue(R: 1);
5382 InGlue = Lo.getValue(R: 2);
5383 VA = RVLocs[++i]; // skip ahead to next loc
5384 SDValue Hi = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5385 Glue: InGlue);
5386 Chain = Hi.getValue(R: 1);
5387 InGlue = Hi.getValue(R: 2);
5388 if (!Subtarget.isLittleEndian())
5389 std::swap (a&: Lo, b&: Hi);
5390 Val = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
5391 } else {
5392 Val = DAG.getCopyFromReg(Chain, dl,
5393 Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue: InGlue);
5394 Chain = Val.getValue(R: 1);
5395 InGlue = Val.getValue(R: 2);
5396 }
5397
5398 switch (VA.getLocInfo()) {
5399 default: llvm_unreachable("Unknown loc info!");
5400 case CCValAssign::Full: break;
5401 case CCValAssign::AExt:
5402 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5403 break;
5404 case CCValAssign::ZExt:
5405 Val = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: VA.getLocVT(), N1: Val,
5406 N2: DAG.getValueType(VA.getValVT()));
5407 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5408 break;
5409 case CCValAssign::SExt:
5410 Val = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: VA.getLocVT(), N1: Val,
5411 N2: DAG.getValueType(VA.getValVT()));
5412 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5413 break;
5414 }
5415
5416 InVals.push_back(Elt: Val);
5417 }
5418
5419 return Chain;
5420}
5421
5422static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5423 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5424 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5425 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5426
5427 // PatchPoint calls are not indirect.
5428 if (isPatchPoint)
5429 return false;
5430
5431 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Val: Callee))
5432 return false;
5433
5434 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5435 // becuase the immediate function pointer points to a descriptor instead of
5436 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5437 // pointer immediate points to the global entry point, while the BLA would
5438 // need to jump to the local entry point (see rL211174).
5439 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5440 isBLACompatibleAddress(Op: Callee, DAG))
5441 return false;
5442
5443 return true;
5444}
5445
5446// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5447static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5448 return Subtarget.isAIXABI() ||
5449 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5450}
5451
5452static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5453 const Function &Caller, const SDValue &Callee,
5454 const PPCSubtarget &Subtarget,
5455 const TargetMachine &TM,
5456 bool IsStrictFPCall = false) {
5457 if (CFlags.IsTailCall)
5458 return PPCISD::TC_RETURN;
5459
5460 unsigned RetOpc = 0;
5461 // This is a call through a function pointer.
5462 if (CFlags.IsIndirect) {
5463 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5464 // indirect calls. The save of the caller's TOC pointer to the stack will be
5465 // inserted into the DAG as part of call lowering. The restore of the TOC
5466 // pointer is modeled by using a pseudo instruction for the call opcode that
5467 // represents the 2 instruction sequence of an indirect branch and link,
5468 // immediately followed by a load of the TOC pointer from the stack save
5469 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5470 // as it is not saved or used.
5471 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5472 : PPCISD::BCTRL;
5473 } else if (Subtarget.isUsingPCRelativeCalls()) {
5474 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5475 RetOpc = PPCISD::CALL_NOTOC;
5476 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5477 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5478 // immediately following the call instruction if the caller and callee may
5479 // have different TOC bases. At link time if the linker determines the calls
5480 // may not share a TOC base, the call is redirected to a trampoline inserted
5481 // by the linker. The trampoline will (among other things) save the callers
5482 // TOC pointer at an ABI designated offset in the linkage area and the
5483 // linker will rewrite the nop to be a load of the TOC pointer from the
5484 // linkage area into gpr2.
5485 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5486 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5487 RetOpc =
5488 callsShareTOCBase(Caller: &Caller, CalleeGV: GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5489 } else
5490 RetOpc = PPCISD::CALL;
5491 if (IsStrictFPCall) {
5492 switch (RetOpc) {
5493 default:
5494 llvm_unreachable("Unknown call opcode");
5495 case PPCISD::BCTRL_LOAD_TOC:
5496 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5497 break;
5498 case PPCISD::BCTRL:
5499 RetOpc = PPCISD::BCTRL_RM;
5500 break;
5501 case PPCISD::CALL_NOTOC:
5502 RetOpc = PPCISD::CALL_NOTOC_RM;
5503 break;
5504 case PPCISD::CALL:
5505 RetOpc = PPCISD::CALL_RM;
5506 break;
5507 case PPCISD::CALL_NOP:
5508 RetOpc = PPCISD::CALL_NOP_RM;
5509 break;
5510 }
5511 }
5512 return RetOpc;
5513}
5514
5515static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5516 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5517 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5518 if (SDNode *Dest = isBLACompatibleAddress(Op: Callee, DAG))
5519 return SDValue(Dest, 0);
5520
5521 // Returns true if the callee is local, and false otherwise.
5522 auto isLocalCallee = [&]() {
5523 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5524 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5525
5526 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5527 !isa_and_nonnull<GlobalIFunc>(Val: GV);
5528 };
5529
5530 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5531 // a static relocation model causes some versions of GNU LD (2.17.50, at
5532 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5533 // built with secure-PLT.
5534 bool UsePlt =
5535 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5536 Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5537
5538 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5539 const TargetMachine &TM = Subtarget.getTargetMachine();
5540 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5541 MCSymbolXCOFF *S =
5542 cast<MCSymbolXCOFF>(Val: TLOF->getFunctionEntryPointSymbol(Func: GV, TM));
5543
5544 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5545 return DAG.getMCSymbol(Sym: S, VT: PtrVT);
5546 };
5547
5548 auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5549 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5550 if (isFunctionGlobalAddress(GV)) {
5551 const GlobalValue *GV = cast<GlobalAddressSDNode>(Val: Callee)->getGlobal();
5552
5553 if (Subtarget.isAIXABI()) {
5554 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5555 return getAIXFuncEntryPointSymbolSDNode(GV);
5556 }
5557 return DAG.getTargetGlobalAddress(GV, DL: dl, VT: Callee.getValueType(), offset: 0,
5558 TargetFlags: UsePlt ? PPCII::MO_PLT : 0);
5559 }
5560
5561 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val: Callee)) {
5562 const char *SymName = S->getSymbol();
5563 if (Subtarget.isAIXABI()) {
5564 // If there exists a user-declared function whose name is the same as the
5565 // ExternalSymbol's, then we pick up the user-declared version.
5566 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5567 if (const Function *F =
5568 dyn_cast_or_null<Function>(Val: Mod->getNamedValue(Name: SymName)))
5569 return getAIXFuncEntryPointSymbolSDNode(F);
5570
5571 // On AIX, direct function calls reference the symbol for the function's
5572 // entry point, which is named by prepending a "." before the function's
5573 // C-linkage name. A Qualname is returned here because an external
5574 // function entry point is a csect with XTY_ER property.
5575 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5576 auto &Context = DAG.getMachineFunction().getContext();
5577 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5578 Section: (Twine(".") + Twine(SymName)).str(), K: SectionKind::getMetadata(),
5579 CsectProp: XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5580 return Sec->getQualNameSymbol();
5581 };
5582
5583 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5584 }
5585 return DAG.getTargetExternalSymbol(Sym: SymName, VT: Callee.getValueType(),
5586 TargetFlags: UsePlt ? PPCII::MO_PLT : 0);
5587 }
5588
5589 // No transformation needed.
5590 assert(Callee.getNode() && "What no callee?");
5591 return Callee;
5592}
5593
5594static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5595 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5596 "Expected a CALLSEQ_STARTSDNode.");
5597
5598 // The last operand is the chain, except when the node has glue. If the node
5599 // has glue, then the last operand is the glue, and the chain is the second
5600 // last operand.
5601 SDValue LastValue = CallSeqStart.getValue(R: CallSeqStart->getNumValues() - 1);
5602 if (LastValue.getValueType() != MVT::Glue)
5603 return LastValue;
5604
5605 return CallSeqStart.getValue(R: CallSeqStart->getNumValues() - 2);
5606}
5607
5608// Creates the node that moves a functions address into the count register
5609// to prepare for an indirect call instruction.
5610static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5611 SDValue &Glue, SDValue &Chain,
5612 const SDLoc &dl) {
5613 SDValue MTCTROps[] = {Chain, Callee, Glue};
5614 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5615 Chain = DAG.getNode(Opcode: PPCISD::MTCTR, DL: dl, ResultTys: ReturnTypes,
5616 Ops: ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5617 // The glue is the second value produced.
5618 Glue = Chain.getValue(R: 1);
5619}
5620
5621static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5622 SDValue &Glue, SDValue &Chain,
5623 SDValue CallSeqStart,
5624 const CallBase *CB, const SDLoc &dl,
5625 bool hasNest,
5626 const PPCSubtarget &Subtarget) {
5627 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5628 // entry point, but to the function descriptor (the function entry point
5629 // address is part of the function descriptor though).
5630 // The function descriptor is a three doubleword structure with the
5631 // following fields: function entry point, TOC base address and
5632 // environment pointer.
5633 // Thus for a call through a function pointer, the following actions need
5634 // to be performed:
5635 // 1. Save the TOC of the caller in the TOC save area of its stack
5636 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5637 // 2. Load the address of the function entry point from the function
5638 // descriptor.
5639 // 3. Load the TOC of the callee from the function descriptor into r2.
5640 // 4. Load the environment pointer from the function descriptor into
5641 // r11.
5642 // 5. Branch to the function entry point address.
5643 // 6. On return of the callee, the TOC of the caller needs to be
5644 // restored (this is done in FinishCall()).
5645 //
5646 // The loads are scheduled at the beginning of the call sequence, and the
5647 // register copies are flagged together to ensure that no other
5648 // operations can be scheduled in between. E.g. without flagging the
5649 // copies together, a TOC access in the caller could be scheduled between
5650 // the assignment of the callee TOC and the branch to the callee, which leads
5651 // to incorrect code.
5652
5653 // Start by loading the function address from the descriptor.
5654 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5655 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5656 ? (MachineMemOperand::MODereferenceable |
5657 MachineMemOperand::MOInvariant)
5658 : MachineMemOperand::MONone;
5659
5660 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5661
5662 // Registers used in building the DAG.
5663 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5664 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5665
5666 // Offsets of descriptor members.
5667 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5668 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5669
5670 const MVT RegVT = Subtarget.getScalarIntVT();
5671 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5672
5673 // One load for the functions entry point address.
5674 SDValue LoadFuncPtr = DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: Callee, PtrInfo: MPI,
5675 Alignment, MMOFlags);
5676
5677 // One for loading the TOC anchor for the module that contains the called
5678 // function.
5679 SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCAnchorOffset, DL: dl);
5680 SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: TOCOff);
5681 SDValue TOCPtr =
5682 DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddTOC,
5683 PtrInfo: MPI.getWithOffset(O: TOCAnchorOffset), Alignment, MMOFlags);
5684
5685 // One for loading the environment pointer.
5686 SDValue PtrOff = DAG.getIntPtrConstant(Val: EnvPtrOffset, DL: dl);
5687 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: PtrOff);
5688 SDValue LoadEnvPtr =
5689 DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddPtr,
5690 PtrInfo: MPI.getWithOffset(O: EnvPtrOffset), Alignment, MMOFlags);
5691
5692
5693 // Then copy the newly loaded TOC anchor to the TOC pointer.
5694 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, Reg: TOCReg, N: TOCPtr, Glue);
5695 Chain = TOCVal.getValue(R: 0);
5696 Glue = TOCVal.getValue(R: 1);
5697
5698 // If the function call has an explicit 'nest' parameter, it takes the
5699 // place of the environment pointer.
5700 assert((!hasNest || !Subtarget.isAIXABI()) &&
5701 "Nest parameter is not supported on AIX.");
5702 if (!hasNest) {
5703 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, Reg: EnvPtrReg, N: LoadEnvPtr, Glue);
5704 Chain = EnvVal.getValue(R: 0);
5705 Glue = EnvVal.getValue(R: 1);
5706 }
5707
5708 // The rest of the indirect call sequence is the same as the non-descriptor
5709 // DAG.
5710 prepareIndirectCall(DAG, Callee&: LoadFuncPtr, Glue, Chain, dl);
5711}
5712
5713static void
5714buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5715 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5716 SelectionDAG &DAG,
5717 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5718 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5719 const PPCSubtarget &Subtarget) {
5720 const bool IsPPC64 = Subtarget.isPPC64();
5721 // MVT for a general purpose register.
5722 const MVT RegVT = Subtarget.getScalarIntVT();
5723
5724 // First operand is always the chain.
5725 Ops.push_back(Elt: Chain);
5726
5727 // If it's a direct call pass the callee as the second operand.
5728 if (!CFlags.IsIndirect)
5729 Ops.push_back(Elt: Callee);
5730 else {
5731 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5732
5733 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5734 // on the stack (this would have been done in `LowerCall_64SVR4` or
5735 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5736 // represents both the indirect branch and a load that restores the TOC
5737 // pointer from the linkage area. The operand for the TOC restore is an add
5738 // of the TOC save offset to the stack pointer. This must be the second
5739 // operand: after the chain input but before any other variadic arguments.
5740 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5741 // saved or used.
5742 if (isTOCSaveRestoreRequired(Subtarget)) {
5743 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5744
5745 SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: RegVT);
5746 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5747 SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
5748 SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: StackPtr, N2: TOCOff);
5749 Ops.push_back(Elt: AddTOC);
5750 }
5751
5752 // Add the register used for the environment pointer.
5753 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5754 Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getEnvironmentPointerRegister(),
5755 VT: RegVT));
5756
5757
5758 // Add CTR register as callee so a bctr can be emitted later.
5759 if (CFlags.IsTailCall)
5760 Ops.push_back(Elt: DAG.getRegister(Reg: IsPPC64 ? PPC::CTR8 : PPC::CTR, VT: RegVT));
5761 }
5762
5763 // If this is a tail call add stack pointer delta.
5764 if (CFlags.IsTailCall)
5765 Ops.push_back(Elt: DAG.getConstant(Val: SPDiff, DL: dl, VT: MVT::i32));
5766
5767 // Add argument registers to the end of the list so that they are known live
5768 // into the call.
5769 for (const auto &[Reg, N] : RegsToPass)
5770 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
5771
5772 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5773 // no way to mark dependencies as implicit here.
5774 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5775 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5776 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5777 Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getTOCPointerRegister(), VT: RegVT));
5778
5779 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5780 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5781 Ops.push_back(Elt: DAG.getRegister(Reg: PPC::CR1EQ, VT: MVT::i32));
5782
5783 // Add a register mask operand representing the call-preserved registers.
5784 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5785 const uint32_t *Mask =
5786 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CFlags.CallConv);
5787 assert(Mask && "Missing call preserved mask for calling convention");
5788 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5789
5790 // If the glue is valid, it is the last operand.
5791 if (Glue.getNode())
5792 Ops.push_back(Elt: Glue);
5793}
5794
5795SDValue PPCTargetLowering::FinishCall(
5796 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5797 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5798 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5799 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5800 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5801
5802 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5803 Subtarget.isAIXABI())
5804 setUsesTOCBasePtr(DAG);
5805
5806 unsigned CallOpc =
5807 getCallOpcode(CFlags, Caller: DAG.getMachineFunction().getFunction(), Callee,
5808 Subtarget, TM: DAG.getTarget(), IsStrictFPCall: CB ? CB->isStrictFP() : false);
5809
5810 if (!CFlags.IsIndirect)
5811 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5812 else if (Subtarget.usesFunctionDescriptors())
5813 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5814 dl, hasNest: CFlags.HasNest, Subtarget);
5815 else
5816 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5817
5818 // Build the operand list for the call instruction.
5819 SmallVector<SDValue, 8> Ops;
5820 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5821 SPDiff, Subtarget);
5822
5823 // Emit tail call.
5824 if (CFlags.IsTailCall) {
5825 // Indirect tail call when using PC Relative calls do not have the same
5826 // constraints.
5827 assert(((Callee.getOpcode() == ISD::Register &&
5828 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5829 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5830 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5831 isa<ConstantSDNode>(Callee) ||
5832 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5833 "Expecting a global address, external symbol, absolute value, "
5834 "register or an indirect tail call when PC Relative calls are "
5835 "used.");
5836 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5837 assert(CallOpc == PPCISD::TC_RETURN &&
5838 "Unexpected call opcode for a tail call.");
5839 DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5840 SDValue Ret = DAG.getNode(Opcode: CallOpc, DL: dl, VT: MVT::Other, Ops);
5841 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CFlags.NoMerge);
5842 return Ret;
5843 }
5844
5845 std::array<EVT, 2> ReturnTypes = {._M_elems: {MVT::Other, MVT::Glue}};
5846 Chain = DAG.getNode(Opcode: CallOpc, DL: dl, ResultTys: ReturnTypes, Ops);
5847 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CFlags.NoMerge);
5848 Glue = Chain.getValue(R: 1);
5849
5850 // When performing tail call optimization the callee pops its arguments off
5851 // the stack. Account for this here so these bytes can be pushed back on in
5852 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5853 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5854 getTargetMachine().Options.GuaranteedTailCallOpt)
5855 ? NumBytes
5856 : 0;
5857
5858 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: BytesCalleePops, Glue, DL: dl);
5859 Glue = Chain.getValue(R: 1);
5860
5861 return LowerCallResult(Chain, InGlue: Glue, CallConv: CFlags.CallConv, isVarArg: CFlags.IsVarArg, Ins, dl,
5862 DAG, InVals);
5863}
5864
5865bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5866 CallingConv::ID CalleeCC = CB->getCallingConv();
5867 const Function *CallerFunc = CB->getCaller();
5868 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5869 const Function *CalleeFunc = CB->getCalledFunction();
5870 if (!CalleeFunc)
5871 return false;
5872 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(Val: CalleeFunc);
5873
5874 SmallVector<ISD::OutputArg, 2> Outs;
5875 SmallVector<ISD::InputArg, 2> Ins;
5876
5877 GetReturnInfo(CC: CalleeCC, ReturnType: CalleeFunc->getReturnType(),
5878 attr: CalleeFunc->getAttributes(), Outs, TLI: *this,
5879 DL: CalleeFunc->getDataLayout());
5880
5881 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5882 isVarArg: CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5883 isCalleeExternalSymbol: false /*isCalleeExternalSymbol*/);
5884}
5885
5886bool PPCTargetLowering::isEligibleForTCO(
5887 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5888 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5889 const SmallVectorImpl<ISD::OutputArg> &Outs,
5890 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5891 bool isCalleeExternalSymbol) const {
5892 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5893 return false;
5894
5895 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5896 return IsEligibleForTailCallOptimization_64SVR4(
5897 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5898 isCalleeExternalSymbol);
5899 else
5900 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5901 isVarArg, Ins);
5902}
5903
5904SDValue
5905PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5906 SmallVectorImpl<SDValue> &InVals) const {
5907 SelectionDAG &DAG = CLI.DAG;
5908 SDLoc &dl = CLI.DL;
5909 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5910 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5911 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5912 SDValue Chain = CLI.Chain;
5913 SDValue Callee = CLI.Callee;
5914 bool &isTailCall = CLI.IsTailCall;
5915 CallingConv::ID CallConv = CLI.CallConv;
5916 bool isVarArg = CLI.IsVarArg;
5917 bool isPatchPoint = CLI.IsPatchPoint;
5918 const CallBase *CB = CLI.CB;
5919
5920 if (isTailCall) {
5921 MachineFunction &MF = DAG.getMachineFunction();
5922 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5923 auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
5924 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5925 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Val: Callee);
5926
5927 isTailCall =
5928 isEligibleForTCO(CalleeGV: GV, CalleeCC: CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5929 CallerFunc: &(MF.getFunction()), isCalleeExternalSymbol: IsCalleeExternalSymbol);
5930 if (isTailCall) {
5931 ++NumTailCalls;
5932 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5933 ++NumSiblingCalls;
5934
5935 // PC Relative calls no longer guarantee that the callee is a Global
5936 // Address Node. The callee could be an indirect tail call in which
5937 // case the SDValue for the callee could be a load (to load the address
5938 // of a function pointer) or it may be a register copy (to move the
5939 // address of the callee from a function parameter into a virtual
5940 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5941 assert((Subtarget.isUsingPCRelativeCalls() ||
5942 isa<GlobalAddressSDNode>(Callee)) &&
5943 "Callee should be an llvm::Function object.");
5944
5945 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5946 << "\nTCO callee: ");
5947 LLVM_DEBUG(Callee.dump());
5948 }
5949 }
5950
5951 if (!isTailCall && CB && CB->isMustTailCall())
5952 report_fatal_error(reason: "failed to perform tail call elimination on a call "
5953 "site marked musttail");
5954
5955 // When long calls (i.e. indirect calls) are always used, calls are always
5956 // made via function pointer. If we have a function name, first translate it
5957 // into a pointer.
5958 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Val: Callee) &&
5959 !isTailCall)
5960 Callee = LowerGlobalAddress(Op: Callee, DAG);
5961
5962 CallFlags CFlags(
5963 CallConv, isTailCall, isVarArg, isPatchPoint,
5964 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5965 // hasNest
5966 Subtarget.is64BitELFABI() &&
5967 any_of(Range&: Outs, P: [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5968 CLI.NoMerge);
5969
5970 if (Subtarget.isAIXABI())
5971 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5972 InVals, CB);
5973
5974 assert(Subtarget.isSVR4ABI());
5975 if (Subtarget.isPPC64())
5976 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977 InVals, CB);
5978 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5979 InVals, CB);
5980}
5981
5982SDValue PPCTargetLowering::LowerCall_32SVR4(
5983 SDValue Chain, SDValue Callee, CallFlags CFlags,
5984 const SmallVectorImpl<ISD::OutputArg> &Outs,
5985 const SmallVectorImpl<SDValue> &OutVals,
5986 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5987 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5988 const CallBase *CB) const {
5989 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5990 // of the 32-bit SVR4 ABI stack frame layout.
5991
5992 const CallingConv::ID CallConv = CFlags.CallConv;
5993 const bool IsVarArg = CFlags.IsVarArg;
5994 const bool IsTailCall = CFlags.IsTailCall;
5995
5996 assert((CallConv == CallingConv::C ||
5997 CallConv == CallingConv::Cold ||
5998 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5999
6000 const Align PtrAlign(4);
6001
6002 MachineFunction &MF = DAG.getMachineFunction();
6003
6004 // Mark this function as potentially containing a function that contains a
6005 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6006 // and restoring the callers stack pointer in this functions epilog. This is
6007 // done because by tail calling the called function might overwrite the value
6008 // in this function's (MF) stack pointer stack slot 0(SP).
6009 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6010 CallConv == CallingConv::Fast)
6011 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6012
6013 // Count how many bytes are to be pushed on the stack, including the linkage
6014 // area, parameter list area and the part of the local variable space which
6015 // contains copies of aggregates which are passed by value.
6016
6017 // Assign locations to all of the outgoing arguments.
6018 SmallVector<CCValAssign, 16> ArgLocs;
6019 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6020
6021 // Reserve space for the linkage area on the stack.
6022 CCInfo.AllocateStack(Size: Subtarget.getFrameLowering()->getLinkageSize(),
6023 Alignment: PtrAlign);
6024 if (useSoftFloat())
6025 CCInfo.PreAnalyzeCallOperands(Outs);
6026
6027 if (IsVarArg) {
6028 // Handle fixed and variable vector arguments differently.
6029 // Fixed vector arguments go into registers as long as registers are
6030 // available. Variable vector arguments always go into memory.
6031 unsigned NumArgs = Outs.size();
6032
6033 for (unsigned i = 0; i != NumArgs; ++i) {
6034 MVT ArgVT = Outs[i].VT;
6035 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6036 bool Result;
6037
6038 if (Outs[i].IsFixed) {
6039 Result = CC_PPC32_SVR4(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full, ArgFlags,
6040 State&: CCInfo);
6041 } else {
6042 Result = CC_PPC32_SVR4_VarArg(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full,
6043 ArgFlags, State&: CCInfo);
6044 }
6045
6046 if (Result) {
6047#ifndef NDEBUG
6048 errs() << "Call operand #" << i << " has unhandled type "
6049 << ArgVT << "\n";
6050#endif
6051 llvm_unreachable(nullptr);
6052 }
6053 }
6054 } else {
6055 // All arguments are treated the same.
6056 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4);
6057 }
6058 CCInfo.clearWasPPCF128();
6059
6060 // Assign locations to all of the outgoing aggregate by value arguments.
6061 SmallVector<CCValAssign, 16> ByValArgLocs;
6062 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6063
6064 // Reserve stack space for the allocations in CCInfo.
6065 CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
6066
6067 CCByValInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4_ByVal);
6068
6069 // Size of the linkage area, parameter list area and the part of the local
6070 // space variable where copies of aggregates which are passed by value are
6071 // stored.
6072 unsigned NumBytes = CCByValInfo.getStackSize();
6073
6074 // Calculate by how many bytes the stack has to be adjusted in case of tail
6075 // call optimization.
6076 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: IsTailCall, ParamSize: NumBytes);
6077
6078 // Adjust the stack pointer for the new arguments...
6079 // These operations are automatically eliminated by the prolog/epilog pass
6080 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
6081 SDValue CallSeqStart = Chain;
6082
6083 // Load the return address and frame pointer so it can be moved somewhere else
6084 // later.
6085 SDValue LROp, FPOp;
6086 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6087
6088 // Set up a copy of the stack pointer for use loading and storing any
6089 // arguments that may not fit in the registers available for argument
6090 // passing.
6091 SDValue StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
6092
6093 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6094 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6095 SmallVector<SDValue, 8> MemOpChains;
6096
6097 bool seenFloatArg = false;
6098 // Walk the register/memloc assignments, inserting copies/loads.
6099 // i - Tracks the index into the list of registers allocated for the call
6100 // RealArgIdx - Tracks the index into the list of actual function arguments
6101 // j - Tracks the index into the list of byval arguments
6102 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6103 i != e;
6104 ++i, ++RealArgIdx) {
6105 CCValAssign &VA = ArgLocs[i];
6106 SDValue Arg = OutVals[RealArgIdx];
6107 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6108
6109 if (Flags.isByVal()) {
6110 // Argument is an aggregate which is passed by value, thus we need to
6111 // create a copy of it in the local variable space of the current stack
6112 // frame (which is the stack frame of the caller) and pass the address of
6113 // this copy to the callee.
6114 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6115 CCValAssign &ByValVA = ByValArgLocs[j++];
6116 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6117
6118 // Memory reserved in the local variable space of the callers stack frame.
6119 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6120
6121 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6122 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6123 N1: StackPtr, N2: PtrOff);
6124
6125 // Create a copy of the argument in the local area of the current
6126 // stack frame.
6127 SDValue MemcpyCall =
6128 CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6129 Chain: CallSeqStart.getNode()->getOperand(Num: 0),
6130 Flags, DAG, dl);
6131
6132 // This must go outside the CALLSEQ_START..END.
6133 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: NumBytes, OutSize: 0,
6134 DL: SDLoc(MemcpyCall));
6135 DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6136 To: NewCallSeqStart.getNode());
6137 Chain = CallSeqStart = NewCallSeqStart;
6138
6139 // Pass the address of the aggregate copy on the stack either in a
6140 // physical register or in the parameter list area of the current stack
6141 // frame to the callee.
6142 Arg = PtrOff;
6143 }
6144
6145 // When useCRBits() is true, there can be i1 arguments.
6146 // It is because getRegisterType(MVT::i1) => MVT::i1,
6147 // and for other integer types getRegisterType() => MVT::i32.
6148 // Extend i1 and ensure callee will get i32.
6149 if (Arg.getValueType() == MVT::i1)
6150 Arg = DAG.getNode(Opcode: Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6151 DL: dl, VT: MVT::i32, Operand: Arg);
6152
6153 if (VA.isRegLoc()) {
6154 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6155 // Put argument in a physical register.
6156 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6157 bool IsLE = Subtarget.isLittleEndian();
6158 SDValue SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6159 N2: DAG.getIntPtrConstant(Val: IsLE ? 0 : 1, DL: dl));
6160 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y: SVal.getValue(R: 0)));
6161 SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6162 N2: DAG.getIntPtrConstant(Val: IsLE ? 1 : 0, DL: dl));
6163 RegsToPass.push_back(Elt: std::make_pair(x: ArgLocs[++i].getLocReg(),
6164 y: SVal.getValue(R: 0)));
6165 } else
6166 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
6167 } else {
6168 // Put argument in the parameter list area of the current stack frame.
6169 assert(VA.isMemLoc());
6170 unsigned LocMemOffset = VA.getLocMemOffset();
6171
6172 if (!IsTailCall) {
6173 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6174 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6175 N1: StackPtr, N2: PtrOff);
6176
6177 MemOpChains.push_back(
6178 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
6179 } else {
6180 // Calculate and remember argument location.
6181 CalculateTailCallArgDest(DAG, MF, IsPPC64: false, Arg, SPDiff, ArgOffset: LocMemOffset,
6182 TailCallArguments);
6183 }
6184 }
6185 }
6186
6187 if (!MemOpChains.empty())
6188 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6189
6190 // Build a sequence of copy-to-reg nodes chained together with token chain
6191 // and flag operands which copy the outgoing args into the appropriate regs.
6192 SDValue InGlue;
6193 for (const auto &[Reg, N] : RegsToPass) {
6194 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6195 InGlue = Chain.getValue(R: 1);
6196 }
6197
6198 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6199 // registers.
6200 if (IsVarArg) {
6201 SDVTList VTs = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
6202 SDValue Ops[] = { Chain, InGlue };
6203
6204 Chain = DAG.getNode(Opcode: seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, DL: dl,
6205 VTList: VTs, Ops: ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6206
6207 InGlue = Chain.getValue(R: 1);
6208 }
6209
6210 if (IsTailCall)
6211 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6212 TailCallArguments);
6213
6214 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6215 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6216}
6217
6218// Copy an argument into memory, being careful to do this outside the
6219// call sequence for the call to which the argument belongs.
6220SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6221 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6222 SelectionDAG &DAG, const SDLoc &dl) const {
6223 SDValue MemcpyCall = CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6224 Chain: CallSeqStart.getNode()->getOperand(Num: 0),
6225 Flags, DAG, dl);
6226 // The MEMCPY must go outside the CALLSEQ_START..END.
6227 int64_t FrameSize = CallSeqStart.getConstantOperandVal(i: 1);
6228 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: FrameSize, OutSize: 0,
6229 DL: SDLoc(MemcpyCall));
6230 DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6231 To: NewCallSeqStart.getNode());
6232 return NewCallSeqStart;
6233}
6234
6235SDValue PPCTargetLowering::LowerCall_64SVR4(
6236 SDValue Chain, SDValue Callee, CallFlags CFlags,
6237 const SmallVectorImpl<ISD::OutputArg> &Outs,
6238 const SmallVectorImpl<SDValue> &OutVals,
6239 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6240 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6241 const CallBase *CB) const {
6242 bool isELFv2ABI = Subtarget.isELFv2ABI();
6243 bool isLittleEndian = Subtarget.isLittleEndian();
6244 unsigned NumOps = Outs.size();
6245 bool IsSibCall = false;
6246 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6247
6248 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
6249 unsigned PtrByteSize = 8;
6250
6251 MachineFunction &MF = DAG.getMachineFunction();
6252
6253 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6254 IsSibCall = true;
6255
6256 // Mark this function as potentially containing a function that contains a
6257 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6258 // and restoring the callers stack pointer in this functions epilog. This is
6259 // done because by tail calling the called function might overwrite the value
6260 // in this function's (MF) stack pointer stack slot 0(SP).
6261 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6262 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6263
6264 assert(!(IsFastCall && CFlags.IsVarArg) &&
6265 "fastcc not supported on varargs functions");
6266
6267 // Count how many bytes are to be pushed on the stack, including the linkage
6268 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6269 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6270 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6271 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6272 unsigned NumBytes = LinkageSize;
6273 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6274
6275 static const MCPhysReg GPR[] = {
6276 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6277 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6278 };
6279 static const MCPhysReg VR[] = {
6280 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6281 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6282 };
6283
6284 const unsigned NumGPRs = std::size(GPR);
6285 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6286 const unsigned NumVRs = std::size(VR);
6287
6288 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6289 // can be passed to the callee in registers.
6290 // For the fast calling convention, there is another check below.
6291 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6292 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6293 if (!HasParameterArea) {
6294 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6295 unsigned AvailableFPRs = NumFPRs;
6296 unsigned AvailableVRs = NumVRs;
6297 unsigned NumBytesTmp = NumBytes;
6298 for (unsigned i = 0; i != NumOps; ++i) {
6299 if (Outs[i].Flags.isNest()) continue;
6300 if (CalculateStackSlotUsed(ArgVT: Outs[i].VT, OrigVT: Outs[i].ArgVT, Flags: Outs[i].Flags,
6301 PtrByteSize, LinkageSize, ParamAreaSize,
6302 ArgOffset&: NumBytesTmp, AvailableFPRs, AvailableVRs))
6303 HasParameterArea = true;
6304 }
6305 }
6306
6307 // When using the fast calling convention, we don't provide backing for
6308 // arguments that will be in registers.
6309 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6310
6311 // Avoid allocating parameter area for fastcc functions if all the arguments
6312 // can be passed in the registers.
6313 if (IsFastCall)
6314 HasParameterArea = false;
6315
6316 // Add up all the space actually used.
6317 for (unsigned i = 0; i != NumOps; ++i) {
6318 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6319 EVT ArgVT = Outs[i].VT;
6320 EVT OrigVT = Outs[i].ArgVT;
6321
6322 if (Flags.isNest())
6323 continue;
6324
6325 if (IsFastCall) {
6326 if (Flags.isByVal()) {
6327 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6328 if (NumGPRsUsed > NumGPRs)
6329 HasParameterArea = true;
6330 } else {
6331 switch (ArgVT.getSimpleVT().SimpleTy) {
6332 default: llvm_unreachable("Unexpected ValueType for argument!");
6333 case MVT::i1:
6334 case MVT::i32:
6335 case MVT::i64:
6336 if (++NumGPRsUsed <= NumGPRs)
6337 continue;
6338 break;
6339 case MVT::v4i32:
6340 case MVT::v8i16:
6341 case MVT::v16i8:
6342 case MVT::v2f64:
6343 case MVT::v2i64:
6344 case MVT::v1i128:
6345 case MVT::f128:
6346 if (++NumVRsUsed <= NumVRs)
6347 continue;
6348 break;
6349 case MVT::v4f32:
6350 if (++NumVRsUsed <= NumVRs)
6351 continue;
6352 break;
6353 case MVT::f32:
6354 case MVT::f64:
6355 if (++NumFPRsUsed <= NumFPRs)
6356 continue;
6357 break;
6358 }
6359 HasParameterArea = true;
6360 }
6361 }
6362
6363 /* Respect alignment of argument on the stack. */
6364 auto Alignement =
6365 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6366 NumBytes = alignTo(Size: NumBytes, A: Alignement);
6367
6368 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6369 if (Flags.isInConsecutiveRegsLast())
6370 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6371 }
6372
6373 unsigned NumBytesActuallyUsed = NumBytes;
6374
6375 // In the old ELFv1 ABI,
6376 // the prolog code of the callee may store up to 8 GPR argument registers to
6377 // the stack, allowing va_start to index over them in memory if its varargs.
6378 // Because we cannot tell if this is needed on the caller side, we have to
6379 // conservatively assume that it is needed. As such, make sure we have at
6380 // least enough stack space for the caller to store the 8 GPRs.
6381 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6382 // really requires memory operands, e.g. a vararg function.
6383 if (HasParameterArea)
6384 NumBytes = std::max(a: NumBytes, b: LinkageSize + 8 * PtrByteSize);
6385 else
6386 NumBytes = LinkageSize;
6387
6388 // Tail call needs the stack to be aligned.
6389 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6390 NumBytes = EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes);
6391
6392 int SPDiff = 0;
6393
6394 // Calculate by how many bytes the stack has to be adjusted in case of tail
6395 // call optimization.
6396 if (!IsSibCall)
6397 SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: CFlags.IsTailCall, ParamSize: NumBytes);
6398
6399 // To protect arguments on the stack from being clobbered in a tail call,
6400 // force all the loads to happen before doing any other lowering.
6401 if (CFlags.IsTailCall)
6402 Chain = DAG.getStackArgumentTokenFactor(Chain);
6403
6404 // Adjust the stack pointer for the new arguments...
6405 // These operations are automatically eliminated by the prolog/epilog pass
6406 if (!IsSibCall)
6407 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
6408 SDValue CallSeqStart = Chain;
6409
6410 // Load the return address and frame pointer so it can be move somewhere else
6411 // later.
6412 SDValue LROp, FPOp;
6413 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6414
6415 // Set up a copy of the stack pointer for use loading and storing any
6416 // arguments that may not fit in the registers available for argument
6417 // passing.
6418 SDValue StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
6419
6420 // Figure out which arguments are going to go in registers, and which in
6421 // memory. Also, if this is a vararg function, floating point operations
6422 // must be stored to our stack, and loaded into integer regs as well, if
6423 // any integer regs are available for argument passing.
6424 unsigned ArgOffset = LinkageSize;
6425
6426 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6427 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6428
6429 SmallVector<SDValue, 8> MemOpChains;
6430 for (unsigned i = 0; i != NumOps; ++i) {
6431 SDValue Arg = OutVals[i];
6432 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6433 EVT ArgVT = Outs[i].VT;
6434 EVT OrigVT = Outs[i].ArgVT;
6435
6436 // PtrOff will be used to store the current argument to the stack if a
6437 // register cannot be found for it.
6438 SDValue PtrOff;
6439
6440 // We re-align the argument offset for each argument, except when using the
6441 // fast calling convention, when we need to make sure we do that only when
6442 // we'll actually use a stack slot.
6443 auto ComputePtrOff = [&]() {
6444 /* Respect alignment of argument on the stack. */
6445 auto Alignment =
6446 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6447 ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
6448
6449 PtrOff = DAG.getConstant(Val: ArgOffset, DL: dl, VT: StackPtr.getValueType());
6450
6451 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6452 };
6453
6454 if (!IsFastCall) {
6455 ComputePtrOff();
6456
6457 /* Compute GPR index associated with argument offset. */
6458 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6459 GPR_idx = std::min(a: GPR_idx, b: NumGPRs);
6460 }
6461
6462 // Promote integers to 64-bit values.
6463 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6464 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6465 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6466 Arg = DAG.getNode(Opcode: ExtOp, DL: dl, VT: MVT::i64, Operand: Arg);
6467 }
6468
6469 // FIXME memcpy is used way more than necessary. Correctness first.
6470 // Note: "by value" is code for passing a structure by value, not
6471 // basic types.
6472 if (Flags.isByVal()) {
6473 // Note: Size includes alignment padding, so
6474 // struct x { short a; char b; }
6475 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6476 // These are the proper values we need for right-justifying the
6477 // aggregate in a parameter register.
6478 unsigned Size = Flags.getByValSize();
6479
6480 // An empty aggregate parameter takes up no storage and no
6481 // registers.
6482 if (Size == 0)
6483 continue;
6484
6485 if (IsFastCall)
6486 ComputePtrOff();
6487
6488 // All aggregates smaller than 8 bytes must be passed right-justified.
6489 if (Size==1 || Size==2 || Size==4) {
6490 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6491 if (GPR_idx != NumGPRs) {
6492 SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: Arg,
6493 PtrInfo: MachinePointerInfo(), MemVT: VT);
6494 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6495 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6496
6497 ArgOffset += PtrByteSize;
6498 continue;
6499 }
6500 }
6501
6502 if (GPR_idx == NumGPRs && Size < 8) {
6503 SDValue AddPtr = PtrOff;
6504 if (!isLittleEndian) {
6505 SDValue Const = DAG.getConstant(Val: PtrByteSize - Size, DL: dl,
6506 VT: PtrOff.getValueType());
6507 AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6508 }
6509 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6510 CallSeqStart,
6511 Flags, DAG, dl);
6512 ArgOffset += PtrByteSize;
6513 continue;
6514 }
6515 // Copy the object to parameter save area if it can not be entirely passed
6516 // by registers.
6517 // FIXME: we only need to copy the parts which need to be passed in
6518 // parameter save area. For the parts passed by registers, we don't need
6519 // to copy them to the stack although we need to allocate space for them
6520 // in parameter save area.
6521 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6522 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6523 CallSeqStart,
6524 Flags, DAG, dl);
6525
6526 // When a register is available, pass a small aggregate right-justified.
6527 if (Size < 8 && GPR_idx != NumGPRs) {
6528 // The easiest way to get this right-justified in a register
6529 // is to copy the structure into the rightmost portion of a
6530 // local variable slot, then load the whole slot into the
6531 // register.
6532 // FIXME: The memcpy seems to produce pretty awful code for
6533 // small aggregates, particularly for packed ones.
6534 // FIXME: It would be preferable to use the slot in the
6535 // parameter save area instead of a new local variable.
6536 SDValue AddPtr = PtrOff;
6537 if (!isLittleEndian) {
6538 SDValue Const = DAG.getConstant(Val: 8 - Size, DL: dl, VT: PtrOff.getValueType());
6539 AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6540 }
6541 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6542 CallSeqStart,
6543 Flags, DAG, dl);
6544
6545 // Load the slot into the register.
6546 SDValue Load =
6547 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6548 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6549 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6550
6551 // Done with this argument.
6552 ArgOffset += PtrByteSize;
6553 continue;
6554 }
6555
6556 // For aggregates larger than PtrByteSize, copy the pieces of the
6557 // object that fit into registers from the parameter save area.
6558 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6559 SDValue Const = DAG.getConstant(Val: j, DL: dl, VT: PtrOff.getValueType());
6560 SDValue AddArg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Arg, N2: Const);
6561 if (GPR_idx != NumGPRs) {
6562 unsigned LoadSizeInBits = std::min(a: PtrByteSize, b: (Size - j)) * 8;
6563 EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadSizeInBits);
6564 SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: AddArg,
6565 PtrInfo: MachinePointerInfo(), MemVT: ObjType);
6566
6567 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6568 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6569 ArgOffset += PtrByteSize;
6570 } else {
6571 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6572 break;
6573 }
6574 }
6575 continue;
6576 }
6577
6578 switch (Arg.getSimpleValueType().SimpleTy) {
6579 default: llvm_unreachable("Unexpected ValueType for argument!");
6580 case MVT::i1:
6581 case MVT::i32:
6582 case MVT::i64:
6583 if (Flags.isNest()) {
6584 // The 'nest' parameter, if any, is passed in R11.
6585 RegsToPass.push_back(Elt: std::make_pair(x: PPC::X11, y&: Arg));
6586 break;
6587 }
6588
6589 // These can be scalar arguments or elements of an integer array type
6590 // passed directly. Clang may use those instead of "byval" aggregate
6591 // types to avoid forcing arguments to memory unnecessarily.
6592 if (GPR_idx != NumGPRs) {
6593 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Arg));
6594 } else {
6595 if (IsFastCall)
6596 ComputePtrOff();
6597
6598 assert(HasParameterArea &&
6599 "Parameter area must exist to pass an argument in memory.");
6600 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6601 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6602 TailCallArguments, dl);
6603 if (IsFastCall)
6604 ArgOffset += PtrByteSize;
6605 }
6606 if (!IsFastCall)
6607 ArgOffset += PtrByteSize;
6608 break;
6609 case MVT::f32:
6610 case MVT::f64: {
6611 // These can be scalar arguments or elements of a float array type
6612 // passed directly. The latter are used to implement ELFv2 homogenous
6613 // float aggregates.
6614
6615 // Named arguments go into FPRs first, and once they overflow, the
6616 // remaining arguments go into GPRs and then the parameter save area.
6617 // Unnamed arguments for vararg functions always go to GPRs and
6618 // then the parameter save area. For now, put all arguments to vararg
6619 // routines always in both locations (FPR *and* GPR or stack slot).
6620 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6621 bool NeededLoad = false;
6622
6623 // First load the argument into the next available FPR.
6624 if (FPR_idx != NumFPRs)
6625 RegsToPass.push_back(Elt: std::make_pair(x: FPR[FPR_idx++], y&: Arg));
6626
6627 // Next, load the argument into GPR or stack slot if needed.
6628 if (!NeedGPROrStack)
6629 ;
6630 else if (GPR_idx != NumGPRs && !IsFastCall) {
6631 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6632 // once we support fp <-> gpr moves.
6633
6634 // In the non-vararg case, this can only ever happen in the
6635 // presence of f32 array types, since otherwise we never run
6636 // out of FPRs before running out of GPRs.
6637 SDValue ArgVal;
6638
6639 // Double values are always passed in a single GPR.
6640 if (Arg.getValueType() != MVT::f32) {
6641 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: Arg);
6642
6643 // Non-array float values are extended and passed in a GPR.
6644 } else if (!Flags.isInConsecutiveRegs()) {
6645 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6646 ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6647
6648 // If we have an array of floats, we collect every odd element
6649 // together with its predecessor into one GPR.
6650 } else if (ArgOffset % PtrByteSize != 0) {
6651 SDValue Lo, Hi;
6652 Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: OutVals[i - 1]);
6653 Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6654 if (!isLittleEndian)
6655 std::swap(a&: Lo, b&: Hi);
6656 ArgVal = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
6657
6658 // The final element, if even, goes into the first half of a GPR.
6659 } else if (Flags.isInConsecutiveRegsLast()) {
6660 ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6661 ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6662 if (!isLittleEndian)
6663 ArgVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: ArgVal,
6664 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i32));
6665
6666 // Non-final even elements are skipped; they will be handled
6667 // together the with subsequent argument on the next go-around.
6668 } else
6669 ArgVal = SDValue();
6670
6671 if (ArgVal.getNode())
6672 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: ArgVal));
6673 } else {
6674 if (IsFastCall)
6675 ComputePtrOff();
6676
6677 // Single-precision floating-point values are mapped to the
6678 // second (rightmost) word of the stack doubleword.
6679 if (Arg.getValueType() == MVT::f32 &&
6680 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6681 SDValue ConstFour = DAG.getConstant(Val: 4, DL: dl, VT: PtrOff.getValueType());
6682 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: ConstFour);
6683 }
6684
6685 assert(HasParameterArea &&
6686 "Parameter area must exist to pass an argument in memory.");
6687 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6688 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6689 TailCallArguments, dl);
6690
6691 NeededLoad = true;
6692 }
6693 // When passing an array of floats, the array occupies consecutive
6694 // space in the argument area; only round up to the next doubleword
6695 // at the end of the array. Otherwise, each float takes 8 bytes.
6696 if (!IsFastCall || NeededLoad) {
6697 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6698 Flags.isInConsecutiveRegs()) ? 4 : 8;
6699 if (Flags.isInConsecutiveRegsLast())
6700 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6701 }
6702 break;
6703 }
6704 case MVT::v4f32:
6705 case MVT::v4i32:
6706 case MVT::v8i16:
6707 case MVT::v16i8:
6708 case MVT::v2f64:
6709 case MVT::v2i64:
6710 case MVT::v1i128:
6711 case MVT::f128:
6712 // These can be scalar arguments or elements of a vector array type
6713 // passed directly. The latter are used to implement ELFv2 homogenous
6714 // vector aggregates.
6715
6716 // For a varargs call, named arguments go into VRs or on the stack as
6717 // usual; unnamed arguments always go to the stack or the corresponding
6718 // GPRs when within range. For now, we always put the value in both
6719 // locations (or even all three).
6720 if (CFlags.IsVarArg) {
6721 assert(HasParameterArea &&
6722 "Parameter area must exist if we have a varargs call.");
6723 // We could elide this store in the case where the object fits
6724 // entirely in R registers. Maybe later.
6725 SDValue Store =
6726 DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6727 MemOpChains.push_back(Elt: Store);
6728 if (VR_idx != NumVRs) {
6729 SDValue Load =
6730 DAG.getLoad(VT: MVT::v4f32, dl, Chain: Store, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
6731 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6732 RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Load));
6733 }
6734 ArgOffset += 16;
6735 for (unsigned i=0; i<16; i+=PtrByteSize) {
6736 if (GPR_idx == NumGPRs)
6737 break;
6738 SDValue Ix = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
6739 N2: DAG.getConstant(Val: i, DL: dl, VT: PtrVT));
6740 SDValue Load =
6741 DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Ix, PtrInfo: MachinePointerInfo());
6742 MemOpChains.push_back(Elt: Load.getValue(R: 1));
6743 RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6744 }
6745 break;
6746 }
6747
6748 // Non-varargs Altivec params go into VRs or on the stack.
6749 if (VR_idx != NumVRs) {
6750 RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Arg));
6751 } else {
6752 if (IsFastCall)
6753 ComputePtrOff();
6754
6755 assert(HasParameterArea &&
6756 "Parameter area must exist to pass an argument in memory.");
6757 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6758 isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: true, MemOpChains,
6759 TailCallArguments, dl);
6760 if (IsFastCall)
6761 ArgOffset += 16;
6762 }
6763
6764 if (!IsFastCall)
6765 ArgOffset += 16;
6766 break;
6767 }
6768 }
6769
6770 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6771 "mismatch in size of parameter area");
6772 (void)NumBytesActuallyUsed;
6773
6774 if (!MemOpChains.empty())
6775 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6776
6777 // Check if this is an indirect call (MTCTR/BCTRL).
6778 // See prepareDescriptorIndirectCall and buildCallOperands for more
6779 // information about calls through function pointers in the 64-bit SVR4 ABI.
6780 if (CFlags.IsIndirect) {
6781 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6782 // caller in the TOC save area.
6783 if (isTOCSaveRestoreRequired(Subtarget)) {
6784 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6785 // Load r2 into a virtual register and store it to the TOC save area.
6786 setUsesTOCBasePtr(DAG);
6787 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: PPC::X2, VT: MVT::i64);
6788 // TOC save area offset.
6789 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6790 SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
6791 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6792 Chain = DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: AddPtr,
6793 PtrInfo: MachinePointerInfo::getStack(
6794 MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
6795 }
6796 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6797 // This does not mean the MTCTR instruction must use R12; it's easier
6798 // to model this as an extra parameter, so do that.
6799 if (isELFv2ABI && !CFlags.IsPatchPoint)
6800 RegsToPass.push_back(Elt: std::make_pair(x: (unsigned)PPC::X12, y&: Callee));
6801 }
6802
6803 // Build a sequence of copy-to-reg nodes chained together with token chain
6804 // and flag operands which copy the outgoing args into the appropriate regs.
6805 SDValue InGlue;
6806 for (const auto &[Reg, N] : RegsToPass) {
6807 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6808 InGlue = Chain.getValue(R: 1);
6809 }
6810
6811 if (CFlags.IsTailCall && !IsSibCall)
6812 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6813 TailCallArguments);
6814
6815 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6816 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6817}
6818
6819// Returns true when the shadow of a general purpose argument register
6820// in the parameter save area is aligned to at least 'RequiredAlign'.
6821static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6822 assert(RequiredAlign.value() <= 16 &&
6823 "Required alignment greater than stack alignment.");
6824 switch (Reg) {
6825 default:
6826 report_fatal_error(reason: "called on invalid register.");
6827 case PPC::R5:
6828 case PPC::R9:
6829 case PPC::X3:
6830 case PPC::X5:
6831 case PPC::X7:
6832 case PPC::X9:
6833 // These registers are 16 byte aligned which is the most strict aligment
6834 // we can support.
6835 return true;
6836 case PPC::R3:
6837 case PPC::R7:
6838 case PPC::X4:
6839 case PPC::X6:
6840 case PPC::X8:
6841 case PPC::X10:
6842 // The shadow of these registers in the PSA is 8 byte aligned.
6843 return RequiredAlign <= 8;
6844 case PPC::R4:
6845 case PPC::R6:
6846 case PPC::R8:
6847 case PPC::R10:
6848 return RequiredAlign <= 4;
6849 }
6850}
6851
6852static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6853 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6854 CCState &S) {
6855 AIXCCState &State = static_cast<AIXCCState &>(S);
6856 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6857 State.getMachineFunction().getSubtarget());
6858 const bool IsPPC64 = Subtarget.isPPC64();
6859 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6860 const Align PtrAlign(PtrSize);
6861 const Align StackAlign(16);
6862 const MVT RegVT = Subtarget.getScalarIntVT();
6863
6864 if (ValVT == MVT::f128)
6865 report_fatal_error(reason: "f128 is unimplemented on AIX.");
6866
6867 if (ArgFlags.isNest())
6868 report_fatal_error(reason: "Nest arguments are unimplemented.");
6869
6870 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6871 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6872 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6873 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6874 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6875 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6876
6877 static const MCPhysReg VR[] = {// Vector registers.
6878 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6879 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6880 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6881
6882 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6883
6884 if (ArgFlags.isByVal()) {
6885 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6886 if (ByValAlign > StackAlign)
6887 report_fatal_error(reason: "Pass-by-value arguments with alignment greater than "
6888 "16 are not supported.");
6889
6890 const unsigned ByValSize = ArgFlags.getByValSize();
6891 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6892
6893 // An empty aggregate parameter takes up no storage and no registers,
6894 // but needs a MemLoc for a stack slot for the formal arguments side.
6895 if (ByValSize == 0) {
6896 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6897 Offset: State.getStackSize(), LocVT: RegVT, HTP: LocInfo));
6898 return false;
6899 }
6900
6901 // Shadow allocate any registers that are not properly aligned.
6902 unsigned NextReg = State.getFirstUnallocated(Regs: GPRs);
6903 while (NextReg != GPRs.size() &&
6904 !isGPRShadowAligned(Reg: GPRs[NextReg], RequiredAlign: ObjAlign)) {
6905 // Shadow allocate next registers since its aligment is not strict enough.
6906 MCRegister Reg = State.AllocateReg(Regs: GPRs);
6907 // Allocate the stack space shadowed by said register.
6908 State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6909 assert(Reg && "Alocating register unexpectedly failed.");
6910 (void)Reg;
6911 NextReg = State.getFirstUnallocated(Regs: GPRs);
6912 }
6913
6914 const unsigned StackSize = alignTo(Size: ByValSize, A: ObjAlign);
6915 unsigned Offset = State.AllocateStack(Size: StackSize, Alignment: ObjAlign);
6916 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6917 if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6918 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6919 else {
6920 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6921 Offset, LocVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6922 HTP: LocInfo));
6923 break;
6924 }
6925 }
6926 return false;
6927 }
6928
6929 // Arguments always reserve parameter save area.
6930 switch (ValVT.SimpleTy) {
6931 default:
6932 report_fatal_error(reason: "Unhandled value type for argument.");
6933 case MVT::i64:
6934 // i64 arguments should have been split to i32 for PPC32.
6935 assert(IsPPC64 && "PPC32 should have split i64 values.");
6936 [[fallthrough]];
6937 case MVT::i1:
6938 case MVT::i32: {
6939 const unsigned Offset = State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6940 // AIX integer arguments are always passed in register width.
6941 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6942 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6943 : CCValAssign::LocInfo::ZExt;
6944 if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6945 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6946 else
6947 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT: RegVT, HTP: LocInfo));
6948
6949 return false;
6950 }
6951 case MVT::f32:
6952 case MVT::f64: {
6953 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6954 const unsigned StoreSize = LocVT.getStoreSize();
6955 // Floats are always 4-byte aligned in the PSA on AIX.
6956 // This includes f64 in 64-bit mode for ABI compatibility.
6957 const unsigned Offset =
6958 State.AllocateStack(Size: IsPPC64 ? 8 : StoreSize, Alignment: Align(4));
6959 MCRegister FReg = State.AllocateReg(Regs: FPR);
6960 if (FReg)
6961 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: FReg, LocVT, HTP: LocInfo));
6962
6963 // Reserve and initialize GPRs or initialize the PSA as required.
6964 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6965 if (MCRegister Reg = State.AllocateReg(Regs: GPRs)) {
6966 assert(FReg && "An FPR should be available when a GPR is reserved.");
6967 if (State.isVarArg()) {
6968 // Successfully reserved GPRs are only initialized for vararg calls.
6969 // Custom handling is required for:
6970 // f64 in PPC32 needs to be split into 2 GPRs.
6971 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6972 State.addLoc(
6973 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6974 }
6975 } else {
6976 // If there are insufficient GPRs, the PSA needs to be initialized.
6977 // Initialization occurs even if an FPR was initialized for
6978 // compatibility with the AIX XL compiler. The full memory for the
6979 // argument will be initialized even if a prior word is saved in GPR.
6980 // A custom memLoc is used when the argument also passes in FPR so
6981 // that the callee handling can skip over it easily.
6982 State.addLoc(
6983 V: FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6984 HTP: LocInfo)
6985 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6986 break;
6987 }
6988 }
6989
6990 return false;
6991 }
6992 case MVT::v4f32:
6993 case MVT::v4i32:
6994 case MVT::v8i16:
6995 case MVT::v16i8:
6996 case MVT::v2i64:
6997 case MVT::v2f64:
6998 case MVT::v1i128: {
6999 const unsigned VecSize = 16;
7000 const Align VecAlign(VecSize);
7001
7002 if (!State.isVarArg()) {
7003 // If there are vector registers remaining we don't consume any stack
7004 // space.
7005 if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
7006 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
7007 return false;
7008 }
7009 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7010 // might be allocated in the portion of the PSA that is shadowed by the
7011 // GPRs.
7012 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7013 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7014 return false;
7015 }
7016
7017 unsigned NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
7018 // Burn any underaligned registers and their shadowed stack space until
7019 // we reach the required alignment.
7020 while (NextRegIndex != GPRs.size() &&
7021 !isGPRShadowAligned(Reg: GPRs[NextRegIndex], RequiredAlign: VecAlign)) {
7022 // Shadow allocate register and its stack shadow.
7023 MCRegister Reg = State.AllocateReg(Regs: GPRs);
7024 State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
7025 assert(Reg && "Allocating register unexpectedly failed.");
7026 (void)Reg;
7027 NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
7028 }
7029
7030 // Vectors that are passed as fixed arguments are handled differently.
7031 // They are passed in VRs if any are available (unlike arguments passed
7032 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7033 // functions)
7034 if (State.isFixed(ValNo)) {
7035 if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
7036 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
7037 // Shadow allocate GPRs and stack space even though we pass in a VR.
7038 for (unsigned I = 0; I != VecSize; I += PtrSize)
7039 State.AllocateReg(Regs: GPRs);
7040 State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7041 return false;
7042 }
7043 // No vector registers remain so pass on the stack.
7044 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7045 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7046 return false;
7047 }
7048
7049 // If all GPRS are consumed then we pass the argument fully on the stack.
7050 if (NextRegIndex == GPRs.size()) {
7051 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7052 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7053 return false;
7054 }
7055
7056 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7057 // half of the argument, and then need to pass the remaining half on the
7058 // stack.
7059 if (GPRs[NextRegIndex] == PPC::R9) {
7060 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7061 State.addLoc(
7062 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7063
7064 const MCRegister FirstReg = State.AllocateReg(Reg: PPC::R9);
7065 const MCRegister SecondReg = State.AllocateReg(Reg: PPC::R10);
7066 assert(FirstReg && SecondReg &&
7067 "Allocating R9 or R10 unexpectedly failed.");
7068 State.addLoc(
7069 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: FirstReg, LocVT: RegVT, HTP: LocInfo));
7070 State.addLoc(
7071 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: SecondReg, LocVT: RegVT, HTP: LocInfo));
7072 return false;
7073 }
7074
7075 // We have enough GPRs to fully pass the vector argument, and we have
7076 // already consumed any underaligned registers. Start with the custom
7077 // MemLoc and then the custom RegLocs.
7078 const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7079 State.addLoc(
7080 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7081 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7082 const MCRegister Reg = State.AllocateReg(Regs: GPRs);
7083 assert(Reg && "Failed to allocated register for vararg vector argument");
7084 State.addLoc(
7085 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
7086 }
7087 return false;
7088 }
7089 }
7090 return true;
7091}
7092
7093// So far, this function is only used by LowerFormalArguments_AIX()
7094static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7095 bool IsPPC64,
7096 bool HasP8Vector,
7097 bool HasVSX) {
7098 assert((IsPPC64 || SVT != MVT::i64) &&
7099 "i64 should have been split for 32-bit codegen.");
7100
7101 switch (SVT) {
7102 default:
7103 report_fatal_error(reason: "Unexpected value type for formal argument");
7104 case MVT::i1:
7105 case MVT::i32:
7106 case MVT::i64:
7107 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7108 case MVT::f32:
7109 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7110 case MVT::f64:
7111 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7112 case MVT::v4f32:
7113 case MVT::v4i32:
7114 case MVT::v8i16:
7115 case MVT::v16i8:
7116 case MVT::v2i64:
7117 case MVT::v2f64:
7118 case MVT::v1i128:
7119 return &PPC::VRRCRegClass;
7120 }
7121}
7122
7123static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7124 SelectionDAG &DAG, SDValue ArgValue,
7125 MVT LocVT, const SDLoc &dl) {
7126 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7127 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7128
7129 if (Flags.isSExt())
7130 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: LocVT, N1: ArgValue,
7131 N2: DAG.getValueType(ValVT));
7132 else if (Flags.isZExt())
7133 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: LocVT, N1: ArgValue,
7134 N2: DAG.getValueType(ValVT));
7135
7136 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ValVT, Operand: ArgValue);
7137}
7138
7139static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7140 const unsigned LASize = FL->getLinkageSize();
7141
7142 if (PPC::GPRCRegClass.contains(Reg)) {
7143 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7144 "Reg must be a valid argument register!");
7145 return LASize + 4 * (Reg - PPC::R3);
7146 }
7147
7148 if (PPC::G8RCRegClass.contains(Reg)) {
7149 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7150 "Reg must be a valid argument register!");
7151 return LASize + 8 * (Reg - PPC::X3);
7152 }
7153
7154 llvm_unreachable("Only general purpose registers expected.");
7155}
7156
7157// AIX ABI Stack Frame Layout:
7158//
7159// Low Memory +--------------------------------------------+
7160// SP +---> | Back chain | ---+
7161// | +--------------------------------------------+ |
7162// | | Saved Condition Register | |
7163// | +--------------------------------------------+ |
7164// | | Saved Linkage Register | |
7165// | +--------------------------------------------+ | Linkage Area
7166// | | Reserved for compilers | |
7167// | +--------------------------------------------+ |
7168// | | Reserved for binders | |
7169// | +--------------------------------------------+ |
7170// | | Saved TOC pointer | ---+
7171// | +--------------------------------------------+
7172// | | Parameter save area |
7173// | +--------------------------------------------+
7174// | | Alloca space |
7175// | +--------------------------------------------+
7176// | | Local variable space |
7177// | +--------------------------------------------+
7178// | | Float/int conversion temporary |
7179// | +--------------------------------------------+
7180// | | Save area for AltiVec registers |
7181// | +--------------------------------------------+
7182// | | AltiVec alignment padding |
7183// | +--------------------------------------------+
7184// | | Save area for VRSAVE register |
7185// | +--------------------------------------------+
7186// | | Save area for General Purpose registers |
7187// | +--------------------------------------------+
7188// | | Save area for Floating Point registers |
7189// | +--------------------------------------------+
7190// +---- | Back chain |
7191// High Memory +--------------------------------------------+
7192//
7193// Specifications:
7194// AIX 7.2 Assembler Language Reference
7195// Subroutine linkage convention
7196
7197SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7198 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7199 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7200 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7201
7202 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7203 CallConv == CallingConv::Fast) &&
7204 "Unexpected calling convention!");
7205
7206 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7207 report_fatal_error(reason: "Tail call support is unimplemented on AIX.");
7208
7209 if (useSoftFloat())
7210 report_fatal_error(reason: "Soft float support is unimplemented on AIX.");
7211
7212 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7213
7214 const bool IsPPC64 = Subtarget.isPPC64();
7215 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7216
7217 // Assign locations to all of the incoming arguments.
7218 SmallVector<CCValAssign, 16> ArgLocs;
7219 MachineFunction &MF = DAG.getMachineFunction();
7220 MachineFrameInfo &MFI = MF.getFrameInfo();
7221 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7222 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7223
7224 const EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7225 // Reserve space for the linkage area on the stack.
7226 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7227 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(PtrByteSize));
7228 uint64_t SaveStackPos = CCInfo.getStackSize();
7229 bool SaveParams = MF.getFunction().hasFnAttribute(Kind: "save-reg-params");
7230 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_AIX);
7231
7232 SmallVector<SDValue, 8> MemOps;
7233
7234 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7235 CCValAssign &VA = ArgLocs[I++];
7236 MVT LocVT = VA.getLocVT();
7237 MVT ValVT = VA.getValVT();
7238 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7239
7240 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7241 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7242 // For compatibility with the AIX XL compiler, the float args in the
7243 // parameter save area are initialized even if the argument is available
7244 // in register. The caller is required to initialize both the register
7245 // and memory, however, the callee can choose to expect it in either.
7246 // The memloc is dismissed here because the argument is retrieved from
7247 // the register.
7248 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7249 continue;
7250
7251 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7252 const TargetRegisterClass *RegClass = getRegClassForSVT(
7253 SVT: LocVT.SimpleTy, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(), HasVSX: Subtarget.hasVSX());
7254 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7255 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7256 const Register VReg = MF.addLiveIn(PReg: VA.getLocReg(), RC: RegClass);
7257 SDValue Parm = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: SaveVT);
7258 int FI = MFI.CreateFixedObject(Size: SaveVT.getStoreSize(), SPOffset: SaveStackPos, IsImmutable: true);
7259 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7260 SDValue StoreReg = DAG.getStore(Chain, dl, Val: Parm, Ptr: FIN,
7261 PtrInfo: MachinePointerInfo(), Alignment: Align(PtrByteSize));
7262 SaveStackPos = alignTo(Value: SaveStackPos + SaveVT.getStoreSize(), Align: PtrByteSize);
7263 MemOps.push_back(Elt: StoreReg);
7264 }
7265
7266 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7267 unsigned StoreSize =
7268 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7269 SaveStackPos = alignTo(Value: SaveStackPos + StoreSize, Align: PtrByteSize);
7270 }
7271
7272 auto HandleMemLoc = [&]() {
7273 const unsigned LocSize = LocVT.getStoreSize();
7274 const unsigned ValSize = ValVT.getStoreSize();
7275 assert((ValSize <= LocSize) &&
7276 "Object size is larger than size of MemLoc");
7277 int CurArgOffset = VA.getLocMemOffset();
7278 // Objects are right-justified because AIX is big-endian.
7279 if (LocSize > ValSize)
7280 CurArgOffset += LocSize - ValSize;
7281 // Potential tail calls could cause overwriting of argument stack slots.
7282 const bool IsImmutable =
7283 !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7284 (CallConv == CallingConv::Fast));
7285 int FI = MFI.CreateFixedObject(Size: ValSize, SPOffset: CurArgOffset, IsImmutable);
7286 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7287 SDValue ArgValue =
7288 DAG.getLoad(VT: ValVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo());
7289
7290 // While the ABI specifies the argument type is (sign or zero) extended
7291 // out to register width, not all code is compliant. We truncate and
7292 // re-extend to be more forgiving of these callers when the argument type
7293 // is smaller than register width.
7294 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7295 ValVT.isInteger() &&
7296 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7297 SDValue ArgValueTrunc = DAG.getNode(
7298 Opcode: ISD::TRUNCATE, DL: dl, VT: ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7299 Operand: ArgValue);
7300 SDValue ArgValueExt =
7301 ArgSignExt ? DAG.getSExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT)
7302 : DAG.getZExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT);
7303 InVals.push_back(Elt: ArgValueExt);
7304 } else {
7305 InVals.push_back(Elt: ArgValue);
7306 }
7307 };
7308
7309 // Vector arguments to VaArg functions are passed both on the stack, and
7310 // in any available GPRs. Load the value from the stack and add the GPRs
7311 // as live ins.
7312 if (VA.isMemLoc() && VA.needsCustom()) {
7313 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7314 assert(isVarArg && "Only use custom memloc for vararg.");
7315 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7316 // matching custom RegLocs.
7317 const unsigned OriginalValNo = VA.getValNo();
7318 (void)OriginalValNo;
7319
7320 auto HandleCustomVecRegLoc = [&]() {
7321 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7322 "Missing custom RegLoc.");
7323 VA = ArgLocs[I++];
7324 assert(VA.getValVT().isVector() &&
7325 "Unexpected Val type for custom RegLoc.");
7326 assert(VA.getValNo() == OriginalValNo &&
7327 "ValNo mismatch between custom MemLoc and RegLoc.");
7328 MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7329 MF.addLiveIn(PReg: VA.getLocReg(),
7330 RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7331 HasVSX: Subtarget.hasVSX()));
7332 };
7333
7334 HandleMemLoc();
7335 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7336 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7337 // R10.
7338 HandleCustomVecRegLoc();
7339 HandleCustomVecRegLoc();
7340
7341 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7342 // we passed the vector in R5, R6, R7 and R8.
7343 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7344 assert(!IsPPC64 &&
7345 "Only 2 custom RegLocs expected for 64-bit codegen.");
7346 HandleCustomVecRegLoc();
7347 HandleCustomVecRegLoc();
7348 }
7349
7350 continue;
7351 }
7352
7353 if (VA.isRegLoc()) {
7354 if (VA.getValVT().isScalarInteger())
7355 FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7356 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7357 switch (VA.getValVT().SimpleTy) {
7358 default:
7359 report_fatal_error(reason: "Unhandled value type for argument.");
7360 case MVT::f32:
7361 FuncInfo->appendParameterType(Type: PPCFunctionInfo::ShortFloatingPoint);
7362 break;
7363 case MVT::f64:
7364 FuncInfo->appendParameterType(Type: PPCFunctionInfo::LongFloatingPoint);
7365 break;
7366 }
7367 } else if (VA.getValVT().isVector()) {
7368 switch (VA.getValVT().SimpleTy) {
7369 default:
7370 report_fatal_error(reason: "Unhandled value type for argument.");
7371 case MVT::v16i8:
7372 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorChar);
7373 break;
7374 case MVT::v8i16:
7375 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorShort);
7376 break;
7377 case MVT::v4i32:
7378 case MVT::v2i64:
7379 case MVT::v1i128:
7380 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorInt);
7381 break;
7382 case MVT::v4f32:
7383 case MVT::v2f64:
7384 FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorFloat);
7385 break;
7386 }
7387 }
7388 }
7389
7390 if (Flags.isByVal() && VA.isMemLoc()) {
7391 const unsigned Size =
7392 alignTo(Value: Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7393 Align: PtrByteSize);
7394 const int FI = MF.getFrameInfo().CreateFixedObject(
7395 Size, SPOffset: VA.getLocMemOffset(), /* IsImmutable */ false,
7396 /* IsAliased */ isAliased: true);
7397 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7398 InVals.push_back(Elt: FIN);
7399
7400 continue;
7401 }
7402
7403 if (Flags.isByVal()) {
7404 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7405
7406 const MCPhysReg ArgReg = VA.getLocReg();
7407 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7408
7409 const unsigned StackSize = alignTo(Value: Flags.getByValSize(), Align: PtrByteSize);
7410 const int FI = MF.getFrameInfo().CreateFixedObject(
7411 Size: StackSize, SPOffset: mapArgRegToOffsetAIX(Reg: ArgReg, FL), /* IsImmutable */ false,
7412 /* IsAliased */ isAliased: true);
7413 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7414 InVals.push_back(Elt: FIN);
7415
7416 // Add live ins for all the RegLocs for the same ByVal.
7417 const TargetRegisterClass *RegClass =
7418 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7419
7420 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7421 unsigned Offset) {
7422 const Register VReg = MF.addLiveIn(PReg: PhysReg, RC: RegClass);
7423 // Since the callers side has left justified the aggregate in the
7424 // register, we can simply store the entire register into the stack
7425 // slot.
7426 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7427 // The store to the fixedstack object is needed becuase accessing a
7428 // field of the ByVal will use a gep and load. Ideally we will optimize
7429 // to extracting the value from the register directly, and elide the
7430 // stores when the arguments address is not taken, but that will need to
7431 // be future work.
7432 SDValue Store = DAG.getStore(
7433 Chain: CopyFrom.getValue(R: 1), dl, Val: CopyFrom,
7434 Ptr: DAG.getObjectPtrOffset(SL: dl, Ptr: FIN, Offset: TypeSize::getFixed(ExactSize: Offset)),
7435 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset));
7436
7437 MemOps.push_back(Elt: Store);
7438 };
7439
7440 unsigned Offset = 0;
7441 HandleRegLoc(VA.getLocReg(), Offset);
7442 Offset += PtrByteSize;
7443 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7444 Offset += PtrByteSize) {
7445 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7446 "RegLocs should be for ByVal argument.");
7447
7448 const CCValAssign RL = ArgLocs[I++];
7449 HandleRegLoc(RL.getLocReg(), Offset);
7450 FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7451 }
7452
7453 if (Offset != StackSize) {
7454 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7455 "Expected MemLoc for remaining bytes.");
7456 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7457 // Consume the MemLoc.The InVal has already been emitted, so nothing
7458 // more needs to be done.
7459 ++I;
7460 }
7461
7462 continue;
7463 }
7464
7465 if (VA.isRegLoc() && !VA.needsCustom()) {
7466 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7467 Register VReg =
7468 MF.addLiveIn(PReg: VA.getLocReg(),
7469 RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7470 HasVSX: Subtarget.hasVSX()));
7471 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7472 if (ValVT.isScalarInteger() &&
7473 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7474 ArgValue =
7475 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7476 }
7477 InVals.push_back(Elt: ArgValue);
7478 continue;
7479 }
7480 if (VA.isMemLoc()) {
7481 HandleMemLoc();
7482 continue;
7483 }
7484 }
7485
7486 // On AIX a minimum of 8 words is saved to the parameter save area.
7487 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7488 // Area that is at least reserved in the caller of this function.
7489 unsigned CallerReservedArea = std::max<unsigned>(
7490 a: CCInfo.getStackSize(), b: LinkageSize + MinParameterSaveArea);
7491
7492 // Set the size that is at least reserved in caller of this function. Tail
7493 // call optimized function's reserved stack space needs to be aligned so
7494 // that taking the difference between two stack areas will result in an
7495 // aligned stack.
7496 CallerReservedArea =
7497 EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: CallerReservedArea);
7498 FuncInfo->setMinReservedArea(CallerReservedArea);
7499
7500 if (isVarArg) {
7501 FuncInfo->setVarArgsFrameIndex(
7502 MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
7503 SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
7504
7505 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7506 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7507
7508 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7509 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7510 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7511
7512 // The fixed integer arguments of a variadic function are stored to the
7513 // VarArgsFrameIndex on the stack so that they may be loaded by
7514 // dereferencing the result of va_next.
7515 for (unsigned GPRIndex =
7516 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7517 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7518
7519 const Register VReg =
7520 IsPPC64 ? MF.addLiveIn(PReg: GPR_64[GPRIndex], RC: &PPC::G8RCRegClass)
7521 : MF.addLiveIn(PReg: GPR_32[GPRIndex], RC: &PPC::GPRCRegClass);
7522
7523 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
7524 SDValue Store =
7525 DAG.getStore(Chain: Val.getValue(R: 1), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo());
7526 MemOps.push_back(Elt: Store);
7527 // Increment the address for the next argument to store.
7528 SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
7529 FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
7530 }
7531 }
7532
7533 if (!MemOps.empty())
7534 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
7535
7536 return Chain;
7537}
7538
7539SDValue PPCTargetLowering::LowerCall_AIX(
7540 SDValue Chain, SDValue Callee, CallFlags CFlags,
7541 const SmallVectorImpl<ISD::OutputArg> &Outs,
7542 const SmallVectorImpl<SDValue> &OutVals,
7543 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7544 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7545 const CallBase *CB) const {
7546 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7547 // AIX ABI stack frame layout.
7548
7549 assert((CFlags.CallConv == CallingConv::C ||
7550 CFlags.CallConv == CallingConv::Cold ||
7551 CFlags.CallConv == CallingConv::Fast) &&
7552 "Unexpected calling convention!");
7553
7554 if (CFlags.IsPatchPoint)
7555 report_fatal_error(reason: "This call type is unimplemented on AIX.");
7556
7557 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7558
7559 MachineFunction &MF = DAG.getMachineFunction();
7560 SmallVector<CCValAssign, 16> ArgLocs;
7561 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7562 *DAG.getContext());
7563
7564 // Reserve space for the linkage save area (LSA) on the stack.
7565 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7566 // [SP][CR][LR][2 x reserved][TOC].
7567 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7568 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7569 const bool IsPPC64 = Subtarget.isPPC64();
7570 const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7571 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7572 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(PtrByteSize));
7573 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_AIX);
7574
7575 // The prolog code of the callee may store up to 8 GPR argument registers to
7576 // the stack, allowing va_start to index over them in memory if the callee
7577 // is variadic.
7578 // Because we cannot tell if this is needed on the caller side, we have to
7579 // conservatively assume that it is needed. As such, make sure we have at
7580 // least enough stack space for the caller to store the 8 GPRs.
7581 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7582 const unsigned NumBytes = std::max<unsigned>(
7583 a: LinkageSize + MinParameterSaveAreaSize, b: CCInfo.getStackSize());
7584
7585 // Adjust the stack pointer for the new arguments...
7586 // These operations are automatically eliminated by the prolog/epilog pass.
7587 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
7588 SDValue CallSeqStart = Chain;
7589
7590 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7591 SmallVector<SDValue, 8> MemOpChains;
7592
7593 // Set up a copy of the stack pointer for loading and storing any
7594 // arguments that may not fit in the registers available for argument
7595 // passing.
7596 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(Reg: PPC::X1, VT: MVT::i64)
7597 : DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
7598
7599 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7600 const unsigned ValNo = ArgLocs[I].getValNo();
7601 SDValue Arg = OutVals[ValNo];
7602 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7603
7604 if (Flags.isByVal()) {
7605 const unsigned ByValSize = Flags.getByValSize();
7606
7607 // Nothing to do for zero-sized ByVals on the caller side.
7608 if (!ByValSize) {
7609 ++I;
7610 continue;
7611 }
7612
7613 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7614 return DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: PtrVT, Chain,
7615 Ptr: (LoadOffset != 0)
7616 ? DAG.getObjectPtrOffset(
7617 SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7618 : Arg,
7619 PtrInfo: MachinePointerInfo(), MemVT: VT);
7620 };
7621
7622 unsigned LoadOffset = 0;
7623
7624 // Initialize registers, which are fully occupied by the by-val argument.
7625 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7626 SDValue Load = GetLoad(PtrVT, LoadOffset);
7627 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7628 LoadOffset += PtrByteSize;
7629 const CCValAssign &ByValVA = ArgLocs[I++];
7630 assert(ByValVA.getValNo() == ValNo &&
7631 "Unexpected location for pass-by-value argument.");
7632 RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: Load));
7633 }
7634
7635 if (LoadOffset == ByValSize)
7636 continue;
7637
7638 // There must be one more loc to handle the remainder.
7639 assert(ArgLocs[I].getValNo() == ValNo &&
7640 "Expected additional location for by-value argument.");
7641
7642 if (ArgLocs[I].isMemLoc()) {
7643 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7644 const CCValAssign &ByValVA = ArgLocs[I++];
7645 ISD::ArgFlagsTy MemcpyFlags = Flags;
7646 // Only memcpy the bytes that don't pass in register.
7647 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7648 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7649 Arg: (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7650 SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7651 : Arg,
7652 PtrOff: DAG.getObjectPtrOffset(
7653 SL: dl, Ptr: StackPtr, Offset: TypeSize::getFixed(ExactSize: ByValVA.getLocMemOffset())),
7654 CallSeqStart, Flags: MemcpyFlags, DAG, dl);
7655 continue;
7656 }
7657
7658 // Initialize the final register residue.
7659 // Any residue that occupies the final by-val arg register must be
7660 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7661 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7662 // 2 and 1 byte loads.
7663 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7664 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7665 "Unexpected register residue for by-value argument.");
7666 SDValue ResidueVal;
7667 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7668 const unsigned N = llvm::bit_floor(Value: ResidueBytes - Bytes);
7669 const MVT VT =
7670 N == 1 ? MVT::i8
7671 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7672 SDValue Load = GetLoad(VT, LoadOffset);
7673 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7674 LoadOffset += N;
7675 Bytes += N;
7676
7677 // By-val arguments are passed left-justfied in register.
7678 // Every load here needs to be shifted, otherwise a full register load
7679 // should have been used.
7680 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7681 "Unexpected load emitted during handling of pass-by-value "
7682 "argument.");
7683 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7684 EVT ShiftAmountTy =
7685 getShiftAmountTy(LHSTy: Load->getValueType(ResNo: 0), DL: DAG.getDataLayout());
7686 SDValue SHLAmt = DAG.getConstant(Val: NumSHLBits, DL: dl, VT: ShiftAmountTy);
7687 SDValue ShiftedLoad =
7688 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: Load.getValueType(), N1: Load, N2: SHLAmt);
7689 ResidueVal = ResidueVal ? DAG.getNode(Opcode: ISD::OR, DL: dl, VT: PtrVT, N1: ResidueVal,
7690 N2: ShiftedLoad)
7691 : ShiftedLoad;
7692 }
7693
7694 const CCValAssign &ByValVA = ArgLocs[I++];
7695 RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: ResidueVal));
7696 continue;
7697 }
7698
7699 CCValAssign &VA = ArgLocs[I++];
7700 const MVT LocVT = VA.getLocVT();
7701 const MVT ValVT = VA.getValVT();
7702
7703 switch (VA.getLocInfo()) {
7704 default:
7705 report_fatal_error(reason: "Unexpected argument extension type.");
7706 case CCValAssign::Full:
7707 break;
7708 case CCValAssign::ZExt:
7709 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7710 break;
7711 case CCValAssign::SExt:
7712 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7713 break;
7714 }
7715
7716 if (VA.isRegLoc() && !VA.needsCustom()) {
7717 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
7718 continue;
7719 }
7720
7721 // Vector arguments passed to VarArg functions need custom handling when
7722 // they are passed (at least partially) in GPRs.
7723 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7724 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7725 // Store value to its stack slot.
7726 SDValue PtrOff =
7727 DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7728 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7729 SDValue Store =
7730 DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo());
7731 MemOpChains.push_back(Elt: Store);
7732 const unsigned OriginalValNo = VA.getValNo();
7733 // Then load the GPRs from the stack
7734 unsigned LoadOffset = 0;
7735 auto HandleCustomVecRegLoc = [&]() {
7736 assert(I != E && "Unexpected end of CCvalAssigns.");
7737 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7738 "Expected custom RegLoc.");
7739 CCValAssign RegVA = ArgLocs[I++];
7740 assert(RegVA.getValNo() == OriginalValNo &&
7741 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7742 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
7743 N2: DAG.getConstant(Val: LoadOffset, DL: dl, VT: PtrVT));
7744 SDValue Load = DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Add, PtrInfo: MachinePointerInfo());
7745 MemOpChains.push_back(Elt: Load.getValue(R: 1));
7746 RegsToPass.push_back(Elt: std::make_pair(x: RegVA.getLocReg(), y&: Load));
7747 LoadOffset += PtrByteSize;
7748 };
7749
7750 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7751 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7752 // R10.
7753 HandleCustomVecRegLoc();
7754 HandleCustomVecRegLoc();
7755
7756 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7757 ArgLocs[I].getValNo() == OriginalValNo) {
7758 assert(!IsPPC64 &&
7759 "Only 2 custom RegLocs expected for 64-bit codegen.");
7760 HandleCustomVecRegLoc();
7761 HandleCustomVecRegLoc();
7762 }
7763
7764 continue;
7765 }
7766
7767 if (VA.isMemLoc()) {
7768 SDValue PtrOff =
7769 DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7770 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7771 MemOpChains.push_back(
7772 Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff,
7773 PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset()),
7774 Alignment: Subtarget.getFrameLowering()->getStackAlign()));
7775
7776 continue;
7777 }
7778
7779 if (!ValVT.isFloatingPoint())
7780 report_fatal_error(
7781 reason: "Unexpected register handling for calling convention.");
7782
7783 // Custom handling is used for GPR initializations for vararg float
7784 // arguments.
7785 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7786 LocVT.isInteger() &&
7787 "Custom register handling only expected for VarArg.");
7788
7789 SDValue ArgAsInt =
7790 DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: ValVT.getSizeInBits()), V: Arg);
7791
7792 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7793 // f32 in 32-bit GPR
7794 // f64 in 64-bit GPR
7795 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgAsInt));
7796 else if (Arg.getValueType().getFixedSizeInBits() <
7797 LocVT.getFixedSizeInBits())
7798 // f32 in 64-bit GPR.
7799 RegsToPass.push_back(Elt: std::make_pair(
7800 x: VA.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: LocVT)));
7801 else {
7802 // f64 in two 32-bit GPRs
7803 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7804 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7805 "Unexpected custom register for argument!");
7806 CCValAssign &GPR1 = VA;
7807 SDValue MSWAsI64 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgAsInt,
7808 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i8));
7809 RegsToPass.push_back(Elt: std::make_pair(
7810 x: GPR1.getLocReg(), y: DAG.getZExtOrTrunc(Op: MSWAsI64, DL: dl, VT: MVT::i32)));
7811
7812 if (I != E) {
7813 // If only 1 GPR was available, there will only be one custom GPR and
7814 // the argument will also pass in memory.
7815 CCValAssign &PeekArg = ArgLocs[I];
7816 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7817 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7818 CCValAssign &GPR2 = ArgLocs[I++];
7819 RegsToPass.push_back(Elt: std::make_pair(
7820 x: GPR2.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: MVT::i32)));
7821 }
7822 }
7823 }
7824 }
7825
7826 if (!MemOpChains.empty())
7827 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
7828
7829 // For indirect calls, we need to save the TOC base to the stack for
7830 // restoration after the call.
7831 if (CFlags.IsIndirect) {
7832 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7833 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7834 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7835 const MVT PtrVT = Subtarget.getScalarIntVT();
7836 const unsigned TOCSaveOffset =
7837 Subtarget.getFrameLowering()->getTOCSaveOffset();
7838
7839 setUsesTOCBasePtr(DAG);
7840 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: TOCBaseReg, VT: PtrVT);
7841 SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
7842 SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: PtrVT);
7843 SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7844 Chain = DAG.getStore(
7845 Chain: Val.getValue(R: 1), dl, Val, Ptr: AddPtr,
7846 PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
7847 }
7848
7849 // Build a sequence of copy-to-reg nodes chained together with token chain
7850 // and flag operands which copy the outgoing args into the appropriate regs.
7851 SDValue InGlue;
7852 for (auto Reg : RegsToPass) {
7853 Chain = DAG.getCopyToReg(Chain, dl, Reg: Reg.first, N: Reg.second, Glue: InGlue);
7854 InGlue = Chain.getValue(R: 1);
7855 }
7856
7857 const int SPDiff = 0;
7858 return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
7859 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7860}
7861
7862bool
7863PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7864 MachineFunction &MF, bool isVarArg,
7865 const SmallVectorImpl<ISD::OutputArg> &Outs,
7866 LLVMContext &Context,
7867 const Type *RetTy) const {
7868 SmallVector<CCValAssign, 16> RVLocs;
7869 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7870 return CCInfo.CheckReturn(
7871 Outs, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7872 ? RetCC_PPC_Cold
7873 : RetCC_PPC);
7874}
7875
7876SDValue
7877PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7878 bool isVarArg,
7879 const SmallVectorImpl<ISD::OutputArg> &Outs,
7880 const SmallVectorImpl<SDValue> &OutVals,
7881 const SDLoc &dl, SelectionDAG &DAG) const {
7882 SmallVector<CCValAssign, 16> RVLocs;
7883 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7884 *DAG.getContext());
7885 CCInfo.AnalyzeReturn(Outs,
7886 Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7887 ? RetCC_PPC_Cold
7888 : RetCC_PPC);
7889
7890 SDValue Glue;
7891 SmallVector<SDValue, 4> RetOps(1, Chain);
7892
7893 // Copy the result values into the output registers.
7894 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7895 CCValAssign &VA = RVLocs[i];
7896 assert(VA.isRegLoc() && "Can only return in registers!");
7897
7898 SDValue Arg = OutVals[RealResIdx];
7899
7900 switch (VA.getLocInfo()) {
7901 default: llvm_unreachable("Unknown loc info!");
7902 case CCValAssign::Full: break;
7903 case CCValAssign::AExt:
7904 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7905 break;
7906 case CCValAssign::ZExt:
7907 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7908 break;
7909 case CCValAssign::SExt:
7910 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7911 break;
7912 }
7913 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7914 bool isLittleEndian = Subtarget.isLittleEndian();
7915 // Legalize ret f64 -> ret 2 x i32.
7916 SDValue SVal =
7917 DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7918 N2: DAG.getIntPtrConstant(Val: isLittleEndian ? 0 : 1, DL: dl));
7919 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7920 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7921 SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7922 N2: DAG.getIntPtrConstant(Val: isLittleEndian ? 1 : 0, DL: dl));
7923 Glue = Chain.getValue(R: 1);
7924 VA = RVLocs[++i]; // skip ahead to next loc
7925 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7926 } else
7927 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Arg, Glue);
7928 Glue = Chain.getValue(R: 1);
7929 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7930 }
7931
7932 RetOps[0] = Chain; // Update chain.
7933
7934 // Add the glue if we have it.
7935 if (Glue.getNode())
7936 RetOps.push_back(Elt: Glue);
7937
7938 return DAG.getNode(Opcode: PPCISD::RET_GLUE, DL: dl, VT: MVT::Other, Ops: RetOps);
7939}
7940
7941SDValue
7942PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7943 SelectionDAG &DAG) const {
7944 SDLoc dl(Op);
7945
7946 // Get the correct type for integers.
7947 EVT IntVT = Op.getValueType();
7948
7949 // Get the inputs.
7950 SDValue Chain = Op.getOperand(i: 0);
7951 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7952 // Build a DYNAREAOFFSET node.
7953 SDValue Ops[2] = {Chain, FPSIdx};
7954 SDVTList VTs = DAG.getVTList(VT: IntVT);
7955 return DAG.getNode(Opcode: PPCISD::DYNAREAOFFSET, DL: dl, VTList: VTs, Ops);
7956}
7957
7958SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7959 SelectionDAG &DAG) const {
7960 // When we pop the dynamic allocation we need to restore the SP link.
7961 SDLoc dl(Op);
7962
7963 // Get the correct type for pointers.
7964 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7965
7966 // Construct the stack pointer operand.
7967 bool isPPC64 = Subtarget.isPPC64();
7968 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7969 SDValue StackPtr = DAG.getRegister(Reg: SP, VT: PtrVT);
7970
7971 // Get the operands for the STACKRESTORE.
7972 SDValue Chain = Op.getOperand(i: 0);
7973 SDValue SaveSP = Op.getOperand(i: 1);
7974
7975 // Load the old link SP.
7976 SDValue LoadLinkSP =
7977 DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: StackPtr, PtrInfo: MachinePointerInfo());
7978
7979 // Restore the stack pointer.
7980 Chain = DAG.getCopyToReg(Chain: LoadLinkSP.getValue(R: 1), dl, Reg: SP, N: SaveSP);
7981
7982 // Store the old link SP.
7983 return DAG.getStore(Chain, dl, Val: LoadLinkSP, Ptr: StackPtr, PtrInfo: MachinePointerInfo());
7984}
7985
7986SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7987 MachineFunction &MF = DAG.getMachineFunction();
7988 bool isPPC64 = Subtarget.isPPC64();
7989 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7990
7991 // Get current frame pointer save index. The users of this index will be
7992 // primarily DYNALLOC instructions.
7993 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7994 int RASI = FI->getReturnAddrSaveIndex();
7995
7996 // If the frame pointer save index hasn't been defined yet.
7997 if (!RASI) {
7998 // Find out what the fix offset of the frame pointer save area.
7999 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8000 // Allocate the frame index for frame pointer save area.
8001 RASI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: LROffset, IsImmutable: false);
8002 // Save the result.
8003 FI->setReturnAddrSaveIndex(RASI);
8004 }
8005 return DAG.getFrameIndex(FI: RASI, VT: PtrVT);
8006}
8007
8008SDValue
8009PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8010 MachineFunction &MF = DAG.getMachineFunction();
8011 bool isPPC64 = Subtarget.isPPC64();
8012 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8013
8014 // Get current frame pointer save index. The users of this index will be
8015 // primarily DYNALLOC instructions.
8016 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8017 int FPSI = FI->getFramePointerSaveIndex();
8018
8019 // If the frame pointer save index hasn't been defined yet.
8020 if (!FPSI) {
8021 // Find out what the fix offset of the frame pointer save area.
8022 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8023 // Allocate the frame index for frame pointer save area.
8024 FPSI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: FPOffset, IsImmutable: true);
8025 // Save the result.
8026 FI->setFramePointerSaveIndex(FPSI);
8027 }
8028 return DAG.getFrameIndex(FI: FPSI, VT: PtrVT);
8029}
8030
8031SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8032 SelectionDAG &DAG) const {
8033 MachineFunction &MF = DAG.getMachineFunction();
8034 // Get the inputs.
8035 SDValue Chain = Op.getOperand(i: 0);
8036 SDValue Size = Op.getOperand(i: 1);
8037 SDLoc dl(Op);
8038
8039 // Get the correct type for pointers.
8040 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8041 // Negate the size.
8042 SDValue NegSize = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: PtrVT,
8043 N1: DAG.getConstant(Val: 0, DL: dl, VT: PtrVT), N2: Size);
8044 // Construct a node for the frame pointer save index.
8045 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8046 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8047 SDVTList VTs = DAG.getVTList(VT1: PtrVT, VT2: MVT::Other);
8048 if (hasInlineStackProbe(MF))
8049 return DAG.getNode(Opcode: PPCISD::PROBED_ALLOCA, DL: dl, VTList: VTs, Ops);
8050 return DAG.getNode(Opcode: PPCISD::DYNALLOC, DL: dl, VTList: VTs, Ops);
8051}
8052
8053SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8054 SelectionDAG &DAG) const {
8055 MachineFunction &MF = DAG.getMachineFunction();
8056
8057 bool isPPC64 = Subtarget.isPPC64();
8058 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8059
8060 int FI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64 ? 8 : 4, SPOffset: 0, IsImmutable: false);
8061 return DAG.getFrameIndex(FI, VT: PtrVT);
8062}
8063
8064SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8065 SelectionDAG &DAG) const {
8066 SDLoc DL(Op);
8067 return DAG.getNode(Opcode: PPCISD::EH_SJLJ_SETJMP, DL,
8068 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8069 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
8070}
8071
8072SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8073 SelectionDAG &DAG) const {
8074 SDLoc DL(Op);
8075 return DAG.getNode(Opcode: PPCISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other,
8076 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
8077}
8078
8079SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8080 if (Op.getValueType().isVector())
8081 return LowerVectorLoad(Op, DAG);
8082
8083 assert(Op.getValueType() == MVT::i1 &&
8084 "Custom lowering only for i1 loads");
8085
8086 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8087
8088 SDLoc dl(Op);
8089 LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
8090
8091 SDValue Chain = LD->getChain();
8092 SDValue BasePtr = LD->getBasePtr();
8093 MachineMemOperand *MMO = LD->getMemOperand();
8094
8095 SDValue NewLD =
8096 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: getPointerTy(DL: DAG.getDataLayout()), Chain,
8097 Ptr: BasePtr, MemVT: MVT::i8, MMO);
8098 SDValue Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewLD);
8099
8100 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8101 return DAG.getMergeValues(Ops, dl);
8102}
8103
8104SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8105 if (Op.getOperand(i: 1).getValueType().isVector())
8106 return LowerVectorStore(Op, DAG);
8107
8108 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8109 "Custom lowering only for i1 stores");
8110
8111 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8112
8113 SDLoc dl(Op);
8114 StoreSDNode *ST = cast<StoreSDNode>(Val&: Op);
8115
8116 SDValue Chain = ST->getChain();
8117 SDValue BasePtr = ST->getBasePtr();
8118 SDValue Value = ST->getValue();
8119 MachineMemOperand *MMO = ST->getMemOperand();
8120
8121 Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
8122 Operand: Value);
8123 return DAG.getTruncStore(Chain, dl, Val: Value, Ptr: BasePtr, SVT: MVT::i8, MMO);
8124}
8125
8126// FIXME: Remove this once the ANDI glue bug is fixed:
8127SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8128 assert(Op.getValueType() == MVT::i1 &&
8129 "Custom lowering only for i1 results");
8130
8131 SDLoc DL(Op);
8132 return DAG.getNode(Opcode: PPCISD::ANDI_rec_1_GT_BIT, DL, VT: MVT::i1, Operand: Op.getOperand(i: 0));
8133}
8134
8135SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8136 SelectionDAG &DAG) const {
8137
8138 // Implements a vector truncate that fits in a vector register as a shuffle.
8139 // We want to legalize vector truncates down to where the source fits in
8140 // a vector register (and target is therefore smaller than vector register
8141 // size). At that point legalization will try to custom lower the sub-legal
8142 // result and get here - where we can contain the truncate as a single target
8143 // operation.
8144
8145 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8146 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8147 //
8148 // We will implement it for big-endian ordering as this (where x denotes
8149 // undefined):
8150 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8151 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8152 //
8153 // The same operation in little-endian ordering will be:
8154 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8155 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8156
8157 EVT TrgVT = Op.getValueType();
8158 assert(TrgVT.isVector() && "Vector type expected.");
8159 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8160 EVT EltVT = TrgVT.getVectorElementType();
8161 if (!isOperationCustom(Op: Op.getOpcode(), VT: TrgVT) ||
8162 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(Value: TrgNumElts) ||
8163 !llvm::has_single_bit<uint32_t>(Value: EltVT.getSizeInBits()))
8164 return SDValue();
8165
8166 SDValue N1 = Op.getOperand(i: 0);
8167 EVT SrcVT = N1.getValueType();
8168 unsigned SrcSize = SrcVT.getSizeInBits();
8169 if (SrcSize > 256 || !isPowerOf2_32(Value: SrcVT.getVectorNumElements()) ||
8170 !llvm::has_single_bit<uint32_t>(
8171 Value: SrcVT.getVectorElementType().getSizeInBits()))
8172 return SDValue();
8173 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8174 return SDValue();
8175
8176 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8177 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8178
8179 SDLoc DL(Op);
8180 SDValue Op1, Op2;
8181 if (SrcSize == 256) {
8182 EVT VecIdxTy = getVectorIdxTy(DL: DAG.getDataLayout());
8183 EVT SplitVT =
8184 N1.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
8185 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8186 Op1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8187 N2: DAG.getConstant(Val: 0, DL, VT: VecIdxTy));
8188 Op2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8189 N2: DAG.getConstant(Val: SplitNumElts, DL, VT: VecIdxTy));
8190 }
8191 else {
8192 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, Vec: N1, dl: DL);
8193 Op2 = DAG.getUNDEF(VT: WideVT);
8194 }
8195
8196 // First list the elements we want to keep.
8197 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8198 SmallVector<int, 16> ShuffV;
8199 if (Subtarget.isLittleEndian())
8200 for (unsigned i = 0; i < TrgNumElts; ++i)
8201 ShuffV.push_back(Elt: i * SizeMult);
8202 else
8203 for (unsigned i = 1; i <= TrgNumElts; ++i)
8204 ShuffV.push_back(Elt: i * SizeMult - 1);
8205
8206 // Populate the remaining elements with undefs.
8207 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8208 // ShuffV.push_back(i + WideNumElts);
8209 ShuffV.push_back(Elt: WideNumElts + 1);
8210
8211 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op1);
8212 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op2);
8213 return DAG.getVectorShuffle(VT: WideVT, dl: DL, N1: Op1, N2: Op2, Mask: ShuffV);
8214}
8215
8216/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8217/// possible.
8218SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8219 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get();
8220 EVT ResVT = Op.getValueType();
8221 EVT CmpVT = Op.getOperand(i: 0).getValueType();
8222 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
8223 SDValue TV = Op.getOperand(i: 2), FV = Op.getOperand(i: 3);
8224 SDLoc dl(Op);
8225
8226 // Without power9-vector, we don't have native instruction for f128 comparison.
8227 // Following transformation to libcall is needed for setcc:
8228 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8229 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8230 SDValue Z = DAG.getSetCC(
8231 DL: dl, VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: CmpVT),
8232 LHS, RHS, Cond: CC);
8233 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: Z.getValueType());
8234 return DAG.getSelectCC(DL: dl, LHS: Z, RHS: Zero, True: TV, False: FV, Cond: ISD::SETNE);
8235 }
8236
8237 // Not FP, or using SPE? Not a fsel.
8238 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8239 Subtarget.hasSPE())
8240 return Op;
8241
8242 SDNodeFlags Flags = Op.getNode()->getFlags();
8243
8244 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8245 // presence of infinities.
8246 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8247 switch (CC) {
8248 default:
8249 break;
8250 case ISD::SETOGT:
8251 case ISD::SETGT:
8252 return DAG.getNode(Opcode: PPCISD::XSMAXC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8253 case ISD::SETOLT:
8254 case ISD::SETLT:
8255 return DAG.getNode(Opcode: PPCISD::XSMINC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8256 }
8257 }
8258
8259 // We might be able to do better than this under some circumstances, but in
8260 // general, fsel-based lowering of select is a finite-math-only optimization.
8261 // For more information, see section F.3 of the 2.06 ISA specification.
8262 // With ISA 3.0
8263 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8264 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8265 ResVT == MVT::f128)
8266 return Op;
8267
8268 // If the RHS of the comparison is a 0.0, we don't need to do the
8269 // subtraction at all.
8270 SDValue Sel1;
8271 if (isFloatingPointZero(Op: RHS))
8272 switch (CC) {
8273 default: break; // SETUO etc aren't handled by fsel.
8274 case ISD::SETNE:
8275 std::swap(a&: TV, b&: FV);
8276 [[fallthrough]];
8277 case ISD::SETEQ:
8278 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8279 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8280 Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8281 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8282 Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8283 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8284 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: Sel1, N3: FV);
8285 case ISD::SETULT:
8286 case ISD::SETLT:
8287 std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8288 [[fallthrough]];
8289 case ISD::SETOGE:
8290 case ISD::SETGE:
8291 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8292 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8293 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8294 case ISD::SETUGT:
8295 case ISD::SETGT:
8296 std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8297 [[fallthrough]];
8298 case ISD::SETOLE:
8299 case ISD::SETLE:
8300 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8301 LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8302 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8303 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: TV, N3: FV);
8304 }
8305
8306 SDValue Cmp;
8307 switch (CC) {
8308 default: break; // SETUO etc aren't handled by fsel.
8309 case ISD::SETNE:
8310 std::swap(a&: TV, b&: FV);
8311 [[fallthrough]];
8312 case ISD::SETEQ:
8313 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8314 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8315 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8316 Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8317 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8318 Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8319 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8320 N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: Cmp), N2: Sel1, N3: FV);
8321 case ISD::SETULT:
8322 case ISD::SETLT:
8323 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8324 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8325 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8326 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8327 case ISD::SETOGE:
8328 case ISD::SETGE:
8329 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8330 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8331 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8332 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8333 case ISD::SETUGT:
8334 case ISD::SETGT:
8335 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8336 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8337 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8338 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8339 case ISD::SETOLE:
8340 case ISD::SETLE:
8341 Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8342 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8343 Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8344 return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8345 }
8346 return Op;
8347}
8348
8349static unsigned getPPCStrictOpcode(unsigned Opc) {
8350 switch (Opc) {
8351 default:
8352 llvm_unreachable("No strict version of this opcode!");
8353 case PPCISD::FCTIDZ:
8354 return PPCISD::STRICT_FCTIDZ;
8355 case PPCISD::FCTIWZ:
8356 return PPCISD::STRICT_FCTIWZ;
8357 case PPCISD::FCTIDUZ:
8358 return PPCISD::STRICT_FCTIDUZ;
8359 case PPCISD::FCTIWUZ:
8360 return PPCISD::STRICT_FCTIWUZ;
8361 case PPCISD::FCFID:
8362 return PPCISD::STRICT_FCFID;
8363 case PPCISD::FCFIDU:
8364 return PPCISD::STRICT_FCFIDU;
8365 case PPCISD::FCFIDS:
8366 return PPCISD::STRICT_FCFIDS;
8367 case PPCISD::FCFIDUS:
8368 return PPCISD::STRICT_FCFIDUS;
8369 }
8370}
8371
8372static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8373 const PPCSubtarget &Subtarget) {
8374 SDLoc dl(Op);
8375 bool IsStrict = Op->isStrictFPOpcode();
8376 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8377 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8378
8379 // TODO: Any other flags to propagate?
8380 SDNodeFlags Flags;
8381 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8382
8383 // For strict nodes, source is the second operand.
8384 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8385 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : SDValue();
8386 MVT DestTy = Op.getSimpleValueType();
8387 assert(Src.getValueType().isFloatingPoint() &&
8388 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8389 DestTy == MVT::i64) &&
8390 "Invalid FP_TO_INT types");
8391 if (Src.getValueType() == MVT::f32) {
8392 if (IsStrict) {
8393 Src =
8394 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL: dl,
8395 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8396 Chain = Src.getValue(R: 1);
8397 } else
8398 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
8399 }
8400 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8401 DestTy = Subtarget.getScalarIntVT();
8402 unsigned Opc = ISD::DELETED_NODE;
8403 switch (DestTy.SimpleTy) {
8404 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8405 case MVT::i32:
8406 Opc = IsSigned ? PPCISD::FCTIWZ
8407 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8408 break;
8409 case MVT::i64:
8410 assert((IsSigned || Subtarget.hasFPCVT()) &&
8411 "i64 FP_TO_UINT is supported only with FPCVT");
8412 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8413 }
8414 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8415 SDValue Conv;
8416 if (IsStrict) {
8417 Opc = getPPCStrictOpcode(Opc);
8418 Conv = DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src},
8419 Flags);
8420 } else {
8421 Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: ConvTy, Operand: Src);
8422 }
8423 return Conv;
8424}
8425
8426void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8427 SelectionDAG &DAG,
8428 const SDLoc &dl) const {
8429 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8430 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8431 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8432 bool IsStrict = Op->isStrictFPOpcode();
8433
8434 // Convert the FP value to an int value through memory.
8435 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8436 (IsSigned || Subtarget.hasFPCVT());
8437 SDValue FIPtr = DAG.CreateStackTemporary(VT: i32Stack ? MVT::i32 : MVT::f64);
8438 int FI = cast<FrameIndexSDNode>(Val&: FIPtr)->getIndex();
8439 MachinePointerInfo MPI =
8440 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
8441
8442 // Emit a store to the stack slot.
8443 SDValue Chain = IsStrict ? Tmp.getValue(R: 1) : DAG.getEntryNode();
8444 Align Alignment(DAG.getEVTAlign(MemoryVT: Tmp.getValueType()));
8445 if (i32Stack) {
8446 MachineFunction &MF = DAG.getMachineFunction();
8447 Alignment = Align(4);
8448 MachineMemOperand *MMO =
8449 MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Alignment);
8450 SDValue Ops[] = { Chain, Tmp, FIPtr };
8451 Chain = DAG.getMemIntrinsicNode(Opcode: PPCISD::STFIWX, dl,
8452 VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT: MVT::i32, MMO);
8453 } else
8454 Chain = DAG.getStore(Chain, dl, Val: Tmp, Ptr: FIPtr, PtrInfo: MPI, Alignment);
8455
8456 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8457 // add in a bias on big endian.
8458 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8459 FIPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: FIPtr.getValueType(), N1: FIPtr,
8460 N2: DAG.getConstant(Val: 4, DL: dl, VT: FIPtr.getValueType()));
8461 MPI = MPI.getWithOffset(O: Subtarget.isLittleEndian() ? 0 : 4);
8462 }
8463
8464 RLI.Chain = Chain;
8465 RLI.Ptr = FIPtr;
8466 RLI.MPI = MPI;
8467 RLI.Alignment = Alignment;
8468}
8469
8470/// Custom lowers floating point to integer conversions to use
8471/// the direct move instructions available in ISA 2.07 to avoid the
8472/// need for load/store combinations.
8473SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8474 SelectionDAG &DAG,
8475 const SDLoc &dl) const {
8476 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8477 SDValue Mov = DAG.getNode(Opcode: PPCISD::MFVSR, DL: dl, VT: Op.getValueType(), Operand: Conv);
8478 if (Op->isStrictFPOpcode())
8479 return DAG.getMergeValues(Ops: {Mov, Conv.getValue(R: 1)}, dl);
8480 else
8481 return Mov;
8482}
8483
8484SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8485 const SDLoc &dl) const {
8486 bool IsStrict = Op->isStrictFPOpcode();
8487 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8488 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8489 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8490 EVT SrcVT = Src.getValueType();
8491 EVT DstVT = Op.getValueType();
8492
8493 // FP to INT conversions are legal for f128.
8494 if (SrcVT == MVT::f128)
8495 return Subtarget.hasP9Vector() ? Op : SDValue();
8496
8497 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8498 // PPC (the libcall is not available).
8499 if (SrcVT == MVT::ppcf128) {
8500 if (DstVT == MVT::i32) {
8501 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8502 // set other fast-math flags to FP operations in both strict and
8503 // non-strict cases. (FP_TO_SINT, FSUB)
8504 SDNodeFlags Flags;
8505 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8506
8507 if (IsSigned) {
8508 SDValue Lo, Hi;
8509 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: dl, LoVT: MVT::f64, HiVT: MVT::f64);
8510
8511 // Add the two halves of the long double in round-to-zero mode, and use
8512 // a smaller FP_TO_SINT.
8513 if (IsStrict) {
8514 SDValue Res = DAG.getNode(Opcode: PPCISD::STRICT_FADDRTZ, DL: dl,
8515 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8516 Ops: {Op.getOperand(i: 0), Lo, Hi}, Flags);
8517 return DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8518 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8519 Ops: {Res.getValue(R: 1), Res}, Flags);
8520 } else {
8521 SDValue Res = DAG.getNode(Opcode: PPCISD::FADDRTZ, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
8522 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Res);
8523 }
8524 } else {
8525 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8526 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8527 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8528 SDValue SignMask = DAG.getConstant(Val: 0x80000000, DL: dl, VT: DstVT);
8529 if (IsStrict) {
8530 // Sel = Src < 0x80000000
8531 // FltOfs = select Sel, 0.0, 0x80000000
8532 // IntOfs = select Sel, 0, 0x80000000
8533 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8534 SDValue Chain = Op.getOperand(i: 0);
8535 EVT SetCCVT =
8536 getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: SrcVT);
8537 EVT DstSetCCVT =
8538 getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: DstVT);
8539 SDValue Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8540 Chain, IsSignaling: true);
8541 Chain = Sel.getValue(R: 1);
8542
8543 SDValue FltOfs = DAG.getSelect(
8544 DL: dl, VT: SrcVT, Cond: Sel, LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8545 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8546
8547 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl,
8548 VTList: DAG.getVTList(VT1: SrcVT, VT2: MVT::Other),
8549 Ops: {Chain, Src, FltOfs}, Flags);
8550 Chain = Val.getValue(R: 1);
8551 SDValue SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8552 VTList: DAG.getVTList(VT1: DstVT, VT2: MVT::Other),
8553 Ops: {Chain, Val}, Flags);
8554 Chain = SInt.getValue(R: 1);
8555 SDValue IntOfs = DAG.getSelect(
8556 DL: dl, VT: DstVT, Cond: Sel, LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), RHS: SignMask);
8557 SDValue Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8558 return DAG.getMergeValues(Ops: {Result, Chain}, dl);
8559 } else {
8560 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8561 // FIXME: generated code sucks.
8562 SDValue True = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::ppcf128, N1: Src, N2: Cst);
8563 True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: True);
8564 True = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: True, N2: SignMask);
8565 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Src);
8566 return DAG.getSelectCC(DL: dl, LHS: Src, RHS: Cst, True, False, Cond: ISD::SETGE);
8567 }
8568 }
8569 }
8570
8571 return SDValue();
8572 }
8573
8574 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8575 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8576
8577 ReuseLoadInfo RLI;
8578 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8579
8580 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8581 Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8582}
8583
8584// We're trying to insert a regular store, S, and then a load, L. If the
8585// incoming value, O, is a load, we might just be able to have our load use the
8586// address used by O. However, we don't know if anything else will store to
8587// that address before we can load from it. To prevent this situation, we need
8588// to insert our load, L, into the chain as a peer of O. To do this, we give L
8589// the same chain operand as O, we create a token factor from the chain results
8590// of O and L, and we replace all uses of O's chain result with that token
8591// factor (this last part is handled by makeEquivalentMemoryOrdering).
8592bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8593 ReuseLoadInfo &RLI,
8594 SelectionDAG &DAG,
8595 ISD::LoadExtType ET) const {
8596 // Conservatively skip reusing for constrained FP nodes.
8597 if (Op->isStrictFPOpcode())
8598 return false;
8599
8600 SDLoc dl(Op);
8601 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8602 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8603 if (ET == ISD::NON_EXTLOAD &&
8604 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8605 isOperationLegalOrCustom(Op: Op.getOpcode(),
8606 VT: Op.getOperand(i: 0).getValueType())) {
8607
8608 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8609 return true;
8610 }
8611
8612 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op);
8613 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8614 LD->isNonTemporal())
8615 return false;
8616 if (LD->getMemoryVT() != MemVT)
8617 return false;
8618
8619 // If the result of the load is an illegal type, then we can't build a
8620 // valid chain for reuse since the legalised loads and token factor node that
8621 // ties the legalised loads together uses a different output chain then the
8622 // illegal load.
8623 if (!isTypeLegal(VT: LD->getValueType(ResNo: 0)))
8624 return false;
8625
8626 RLI.Ptr = LD->getBasePtr();
8627 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8628 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8629 "Non-pre-inc AM on PPC?");
8630 RLI.Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RLI.Ptr.getValueType(), N1: RLI.Ptr,
8631 N2: LD->getOffset());
8632 }
8633
8634 RLI.Chain = LD->getChain();
8635 RLI.MPI = LD->getPointerInfo();
8636 RLI.IsDereferenceable = LD->isDereferenceable();
8637 RLI.IsInvariant = LD->isInvariant();
8638 RLI.Alignment = LD->getAlign();
8639 RLI.AAInfo = LD->getAAInfo();
8640 RLI.Ranges = LD->getRanges();
8641
8642 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8643 return true;
8644}
8645
8646/// Analyze profitability of direct move
8647/// prefer float load to int load plus direct move
8648/// when there is no integer use of int load
8649bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8650 SDNode *Origin = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0).getNode();
8651 if (Origin->getOpcode() != ISD::LOAD)
8652 return true;
8653
8654 // If there is no LXSIBZX/LXSIHZX, like Power8,
8655 // prefer direct move if the memory size is 1 or 2 bytes.
8656 MachineMemOperand *MMO = cast<LoadSDNode>(Val: Origin)->getMemOperand();
8657 if (!Subtarget.hasP9Vector() &&
8658 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8659 return true;
8660
8661 for (SDUse &Use : Origin->uses()) {
8662
8663 // Only look at the users of the loaded value.
8664 if (Use.getResNo() != 0)
8665 continue;
8666
8667 SDNode *User = Use.getUser();
8668 if (User->getOpcode() != ISD::SINT_TO_FP &&
8669 User->getOpcode() != ISD::UINT_TO_FP &&
8670 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8671 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8672 return true;
8673 }
8674
8675 return false;
8676}
8677
8678static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8679 const PPCSubtarget &Subtarget,
8680 SDValue Chain = SDValue()) {
8681 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8682 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8683 SDLoc dl(Op);
8684
8685 // TODO: Any other flags to propagate?
8686 SDNodeFlags Flags;
8687 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8688
8689 // If we have FCFIDS, then use it when converting to single-precision.
8690 // Otherwise, convert to double-precision and then round.
8691 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8692 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8693 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8694 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8695 if (Op->isStrictFPOpcode()) {
8696 if (!Chain)
8697 Chain = Op.getOperand(i: 0);
8698 return DAG.getNode(Opcode: getPPCStrictOpcode(Opc: ConvOpc), DL: dl,
8699 VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8700 } else
8701 return DAG.getNode(Opcode: ConvOpc, DL: dl, VT: ConvTy, Operand: Src);
8702}
8703
8704/// Custom lowers integer to floating point conversions to use
8705/// the direct move instructions available in ISA 2.07 to avoid the
8706/// need for load/store combinations.
8707SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8708 SelectionDAG &DAG,
8709 const SDLoc &dl) const {
8710 assert((Op.getValueType() == MVT::f32 ||
8711 Op.getValueType() == MVT::f64) &&
8712 "Invalid floating point type as target of conversion");
8713 assert(Subtarget.hasFPCVT() &&
8714 "Int to FP conversions with direct moves require FPCVT");
8715 SDValue Src = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0);
8716 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8717 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8718 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8719 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8720 SDValue Mov = DAG.getNode(Opcode: MovOpc, DL: dl, VT: MVT::f64, Operand: Src);
8721 return convertIntToFP(Op, Src: Mov, DAG, Subtarget);
8722}
8723
8724static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8725
8726 EVT VecVT = Vec.getValueType();
8727 assert(VecVT.isVector() && "Expected a vector type.");
8728 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8729
8730 EVT EltVT = VecVT.getVectorElementType();
8731 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8732 EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8733
8734 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8735 SmallVector<SDValue, 16> Ops(NumConcat);
8736 Ops[0] = Vec;
8737 SDValue UndefVec = DAG.getUNDEF(VT: VecVT);
8738 for (unsigned i = 1; i < NumConcat; ++i)
8739 Ops[i] = UndefVec;
8740
8741 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: WideVT, Ops);
8742}
8743
8744SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8745 const SDLoc &dl) const {
8746 bool IsStrict = Op->isStrictFPOpcode();
8747 unsigned Opc = Op.getOpcode();
8748 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8749 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8750 Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8751 "Unexpected conversion type");
8752 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8753 "Supports conversions to v2f64/v4f32 only.");
8754
8755 // TODO: Any other flags to propagate?
8756 SDNodeFlags Flags;
8757 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8758
8759 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8760 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8761
8762 SDValue Wide = widenVec(DAG, Vec: Src, dl);
8763 EVT WideVT = Wide.getValueType();
8764 unsigned WideNumElts = WideVT.getVectorNumElements();
8765 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8766
8767 SmallVector<int, 16> ShuffV;
8768 for (unsigned i = 0; i < WideNumElts; ++i)
8769 ShuffV.push_back(Elt: i + WideNumElts);
8770
8771 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8772 int SaveElts = FourEltRes ? 4 : 2;
8773 if (Subtarget.isLittleEndian())
8774 for (int i = 0; i < SaveElts; i++)
8775 ShuffV[i * Stride] = i;
8776 else
8777 for (int i = 1; i <= SaveElts; i++)
8778 ShuffV[i * Stride - 1] = i - 1;
8779
8780 SDValue ShuffleSrc2 =
8781 SignedConv ? DAG.getUNDEF(VT: WideVT) : DAG.getConstant(Val: 0, DL: dl, VT: WideVT);
8782 SDValue Arrange = DAG.getVectorShuffle(VT: WideVT, dl, N1: Wide, N2: ShuffleSrc2, Mask: ShuffV);
8783
8784 SDValue Extend;
8785 if (SignedConv) {
8786 Arrange = DAG.getBitcast(VT: IntermediateVT, V: Arrange);
8787 EVT ExtVT = Src.getValueType();
8788 if (Subtarget.hasP9Altivec())
8789 ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT.getVectorElementType(),
8790 NumElements: IntermediateVT.getVectorNumElements());
8791
8792 Extend = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: IntermediateVT, N1: Arrange,
8793 N2: DAG.getValueType(ExtVT));
8794 } else
8795 Extend = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntermediateVT, Operand: Arrange);
8796
8797 if (IsStrict)
8798 return DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other),
8799 Ops: {Op.getOperand(i: 0), Extend}, Flags);
8800
8801 return DAG.getNode(Opcode: Opc, DL: dl, VT: Op.getValueType(), Operand: Extend);
8802}
8803
8804SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8805 SelectionDAG &DAG) const {
8806 SDLoc dl(Op);
8807 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8808 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8809 bool IsStrict = Op->isStrictFPOpcode();
8810 SDValue Src = Op.getOperand(i: IsStrict ? 1 : 0);
8811 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode();
8812
8813 // TODO: Any other flags to propagate?
8814 SDNodeFlags Flags;
8815 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8816
8817 EVT InVT = Src.getValueType();
8818 EVT OutVT = Op.getValueType();
8819 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8820 isOperationCustom(Op: Op.getOpcode(), VT: InVT))
8821 return LowerINT_TO_FPVector(Op, DAG, dl);
8822
8823 // Conversions to f128 are legal.
8824 if (Op.getValueType() == MVT::f128)
8825 return Subtarget.hasP9Vector() ? Op : SDValue();
8826
8827 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8828 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8829 return SDValue();
8830
8831 if (Src.getValueType() == MVT::i1) {
8832 SDValue Sel = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: Op.getValueType(), N1: Src,
8833 N2: DAG.getConstantFP(Val: 1.0, DL: dl, VT: Op.getValueType()),
8834 N3: DAG.getConstantFP(Val: 0.0, DL: dl, VT: Op.getValueType()));
8835 if (IsStrict)
8836 return DAG.getMergeValues(Ops: {Sel, Chain}, dl);
8837 else
8838 return Sel;
8839 }
8840
8841 // If we have direct moves, we can do all the conversion, skip the store/load
8842 // however, without FPCVT we can't do most conversions.
8843 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8844 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8845 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8846
8847 assert((IsSigned || Subtarget.hasFPCVT()) &&
8848 "UINT_TO_FP is supported only with FPCVT");
8849
8850 if (Src.getValueType() == MVT::i64) {
8851 SDValue SINT = Src;
8852 // When converting to single-precision, we actually need to convert
8853 // to double-precision first and then round to single-precision.
8854 // To avoid double-rounding effects during that operation, we have
8855 // to prepare the input operand. Bits that might be truncated when
8856 // converting to double-precision are replaced by a bit that won't
8857 // be lost at this stage, but is below the single-precision rounding
8858 // position.
8859 //
8860 // However, if -enable-unsafe-fp-math is in effect, accept double
8861 // rounding to avoid the extra overhead.
8862 if (Op.getValueType() == MVT::f32 &&
8863 !Subtarget.hasFPCVT() &&
8864 !DAG.getTarget().Options.UnsafeFPMath) {
8865
8866 // Twiddle input to make sure the low 11 bits are zero. (If this
8867 // is the case, we are guaranteed the value will fit into the 53 bit
8868 // mantissa of an IEEE double-precision value without rounding.)
8869 // If any of those low 11 bits were not zero originally, make sure
8870 // bit 12 (value 2048) is set instead, so that the final rounding
8871 // to single-precision gets the correct result.
8872 SDValue Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64,
8873 N1: SINT, N2: DAG.getConstant(Val: 2047, DL: dl, VT: MVT::i64));
8874 Round = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8875 N1: Round, N2: DAG.getConstant(Val: 2047, DL: dl, VT: MVT::i64));
8876 Round = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i64, N1: Round, N2: SINT);
8877 Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64, N1: Round,
8878 N2: DAG.getSignedConstant(Val: -2048, DL: dl, VT: MVT::i64));
8879
8880 // However, we cannot use that value unconditionally: if the magnitude
8881 // of the input value is small, the bit-twiddling we did above might
8882 // end up visibly changing the output. Fortunately, in that case, we
8883 // don't need to twiddle bits since the original input will convert
8884 // exactly to double-precision floating-point already. Therefore,
8885 // construct a conditional to use the original value if the top 11
8886 // bits are all sign-bit copies, and use the rounded value computed
8887 // above otherwise.
8888 SDValue Cond = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: MVT::i64,
8889 N1: SINT, N2: DAG.getConstant(Val: 53, DL: dl, VT: MVT::i32));
8890 Cond = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8891 N1: Cond, N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i64));
8892 Cond = DAG.getSetCC(
8893 DL: dl,
8894 VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: MVT::i64),
8895 LHS: Cond, RHS: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i64), Cond: ISD::SETUGT);
8896
8897 SINT = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i64, N1: Cond, N2: Round, N3: SINT);
8898 }
8899
8900 ReuseLoadInfo RLI;
8901 SDValue Bits;
8902
8903 MachineFunction &MF = DAG.getMachineFunction();
8904 if (canReuseLoadAddress(Op: SINT, MemVT: MVT::i64, RLI, DAG)) {
8905 Bits = DAG.getLoad(VT: MVT::f64, dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8906 Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8907 if (RLI.ResChain)
8908 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8909 } else if (Subtarget.hasLFIWAX() &&
8910 canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::SEXTLOAD)) {
8911 MachineMemOperand *MMO =
8912 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8913 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8914 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8915 Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWAX, dl,
8916 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8917 Ops, MemVT: MVT::i32, MMO);
8918 if (RLI.ResChain)
8919 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8920 } else if (Subtarget.hasFPCVT() &&
8921 canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::ZEXTLOAD)) {
8922 MachineMemOperand *MMO =
8923 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8924 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8925 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8926 Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWZX, dl,
8927 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8928 Ops, MemVT: MVT::i32, MMO);
8929 if (RLI.ResChain)
8930 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
8931 } else if (((Subtarget.hasLFIWAX() &&
8932 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8933 (Subtarget.hasFPCVT() &&
8934 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8935 SINT.getOperand(i: 0).getValueType() == MVT::i32) {
8936 MachineFrameInfo &MFI = MF.getFrameInfo();
8937 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8938
8939 int FrameIdx = MFI.CreateStackObject(Size: 4, Alignment: Align(4), isSpillSlot: false);
8940 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8941
8942 SDValue Store = DAG.getStore(Chain, dl, Val: SINT.getOperand(i: 0), Ptr: FIdx,
8943 PtrInfo: MachinePointerInfo::getFixedStack(
8944 MF&: DAG.getMachineFunction(), FI: FrameIdx));
8945 Chain = Store;
8946
8947 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8948 "Expected an i32 store");
8949
8950 RLI.Ptr = FIdx;
8951 RLI.Chain = Chain;
8952 RLI.MPI =
8953 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8954 RLI.Alignment = Align(4);
8955
8956 MachineMemOperand *MMO =
8957 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
8958 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8959 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8960 Bits = DAG.getMemIntrinsicNode(Opcode: SINT.getOpcode() == ISD::ZERO_EXTEND ?
8961 PPCISD::LFIWZX : PPCISD::LFIWAX,
8962 dl, VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8963 Ops, MemVT: MVT::i32, MMO);
8964 Chain = Bits.getValue(R: 1);
8965 } else
8966 Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f64, Operand: SINT);
8967
8968 SDValue FP = convertIntToFP(Op, Src: Bits, DAG, Subtarget, Chain);
8969 if (IsStrict)
8970 Chain = FP.getValue(R: 1);
8971
8972 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973 if (IsStrict)
8974 FP = DAG.getNode(
8975 Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8976 Ops: {Chain, FP, DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)},
8977 Flags);
8978 else
8979 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8980 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
8981 }
8982 return FP;
8983 }
8984
8985 assert(Src.getValueType() == MVT::i32 &&
8986 "Unhandled INT_TO_FP type in custom expander!");
8987 // Since we only generate this in 64-bit mode, we can take advantage of
8988 // 64-bit registers. In particular, sign extend the input value into the
8989 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8990 // then lfd it and fcfid it.
8991 MachineFunction &MF = DAG.getMachineFunction();
8992 MachineFrameInfo &MFI = MF.getFrameInfo();
8993 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8994
8995 SDValue Ld;
8996 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8997 ReuseLoadInfo RLI;
8998 bool ReusingLoad;
8999 if (!(ReusingLoad = canReuseLoadAddress(Op: Src, MemVT: MVT::i32, RLI, DAG))) {
9000 int FrameIdx = MFI.CreateStackObject(Size: 4, Alignment: Align(4), isSpillSlot: false);
9001 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
9002
9003 SDValue Store = DAG.getStore(Chain, dl, Val: Src, Ptr: FIdx,
9004 PtrInfo: MachinePointerInfo::getFixedStack(
9005 MF&: DAG.getMachineFunction(), FI: FrameIdx));
9006 Chain = Store;
9007
9008 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9009 "Expected an i32 store");
9010
9011 RLI.Ptr = FIdx;
9012 RLI.Chain = Chain;
9013 RLI.MPI =
9014 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
9015 RLI.Alignment = Align(4);
9016 }
9017
9018 MachineMemOperand *MMO =
9019 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
9020 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
9021 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9022 Ld = DAG.getMemIntrinsicNode(Opcode: IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9023 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops,
9024 MemVT: MVT::i32, MMO);
9025 Chain = Ld.getValue(R: 1);
9026 if (ReusingLoad && RLI.ResChain) {
9027 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Ld.getValue(R: 1));
9028 }
9029 } else {
9030 assert(Subtarget.isPPC64() &&
9031 "i32->FP without LFIWAX supported only on PPC64");
9032
9033 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9034 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
9035
9036 SDValue Ext64 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i64, Operand: Src);
9037
9038 // STD the extended value into the stack slot.
9039 SDValue Store = DAG.getStore(
9040 Chain, dl, Val: Ext64, Ptr: FIdx,
9041 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
9042 Chain = Store;
9043
9044 // Load the value as a double.
9045 Ld = DAG.getLoad(
9046 VT: MVT::f64, dl, Chain, Ptr: FIdx,
9047 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
9048 Chain = Ld.getValue(R: 1);
9049 }
9050
9051 // FCFID it and return it.
9052 SDValue FP = convertIntToFP(Op, Src: Ld, DAG, Subtarget, Chain);
9053 if (IsStrict)
9054 Chain = FP.getValue(R: 1);
9055 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9056 if (IsStrict)
9057 FP = DAG.getNode(
9058 Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
9059 Ops: {Chain, FP, DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)}, Flags);
9060 else
9061 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
9062 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
9063 }
9064 return FP;
9065}
9066
9067SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9068 SelectionDAG &DAG) const {
9069 SDLoc Dl(Op);
9070 MachineFunction &MF = DAG.getMachineFunction();
9071 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9072 SDValue Chain = Op.getOperand(i: 0);
9073
9074 // If requested mode is constant, just use simpler mtfsb/mffscrni
9075 if (auto *CVal = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) {
9076 uint64_t Mode = CVal->getZExtValue();
9077 assert(Mode < 4 && "Unsupported rounding mode!");
9078 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9079 if (Subtarget.isISA3_0())
9080 return SDValue(
9081 DAG.getMachineNode(
9082 Opcode: PPC::MFFSCRNI, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9083 Ops: {DAG.getConstant(Val: InternalRnd, DL: Dl, VT: MVT::i32, isTarget: true), Chain}),
9084 1);
9085 SDNode *SetHi = DAG.getMachineNode(
9086 Opcode: (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9087 Ops: {DAG.getConstant(Val: 30, DL: Dl, VT: MVT::i32, isTarget: true), Chain});
9088 SDNode *SetLo = DAG.getMachineNode(
9089 Opcode: (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9090 Ops: {DAG.getConstant(Val: 31, DL: Dl, VT: MVT::i32, isTarget: true), SDValue(SetHi, 0)});
9091 return SDValue(SetLo, 0);
9092 }
9093
9094 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9095 SDValue One = DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32);
9096 SDValue SrcFlag = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: Op.getOperand(i: 1),
9097 N2: DAG.getConstant(Val: 3, DL: Dl, VT: MVT::i32));
9098 SDValue DstFlag = DAG.getNode(
9099 Opcode: ISD::XOR, DL: Dl, VT: MVT::i32, N1: SrcFlag,
9100 N2: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32,
9101 N1: DAG.getNOT(DL: Dl,
9102 Val: DAG.getNode(Opcode: ISD::SRL, DL: Dl, VT: MVT::i32, N1: SrcFlag, N2: One),
9103 VT: MVT::i32),
9104 N2: One));
9105 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9106 SDValue MFFS;
9107 if (!Subtarget.isISA3_0()) {
9108 MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: Dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9109 Chain = MFFS.getValue(R: 1);
9110 }
9111 SDValue NewFPSCR;
9112 if (Subtarget.isPPC64()) {
9113 if (Subtarget.isISA3_0()) {
9114 NewFPSCR = DAG.getAnyExtOrTrunc(Op: DstFlag, DL: Dl, VT: MVT::i64);
9115 } else {
9116 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9117 SDNode *InsertRN = DAG.getMachineNode(
9118 Opcode: PPC::RLDIMI, dl: Dl, VT: MVT::i64,
9119 Ops: {DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::i64, Operand: MFFS),
9120 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: Dl, VT: MVT::i64, Operand: DstFlag),
9121 DAG.getTargetConstant(Val: 0, DL: Dl, VT: MVT::i32),
9122 DAG.getTargetConstant(Val: 62, DL: Dl, VT: MVT::i32)});
9123 NewFPSCR = SDValue(InsertRN, 0);
9124 }
9125 NewFPSCR = DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::f64, Operand: NewFPSCR);
9126 } else {
9127 // In 32-bit mode, store f64, load and update the lower half.
9128 int SSFI = MF.getFrameInfo().CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9129 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9130 SDValue Addr = Subtarget.isLittleEndian()
9131 ? StackSlot
9132 : DAG.getNode(Opcode: ISD::ADD, DL: Dl, VT: PtrVT, N1: StackSlot,
9133 N2: DAG.getConstant(Val: 4, DL: Dl, VT: PtrVT));
9134 if (Subtarget.isISA3_0()) {
9135 Chain = DAG.getStore(Chain, dl: Dl, Val: DstFlag, Ptr: Addr, PtrInfo: MachinePointerInfo());
9136 } else {
9137 Chain = DAG.getStore(Chain, dl: Dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9138 SDValue Tmp =
9139 DAG.getLoad(VT: MVT::i32, dl: Dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo());
9140 Chain = Tmp.getValue(R: 1);
9141 Tmp = SDValue(DAG.getMachineNode(
9142 Opcode: PPC::RLWIMI, dl: Dl, VT: MVT::i32,
9143 Ops: {Tmp, DstFlag, DAG.getTargetConstant(Val: 0, DL: Dl, VT: MVT::i32),
9144 DAG.getTargetConstant(Val: 30, DL: Dl, VT: MVT::i32),
9145 DAG.getTargetConstant(Val: 31, DL: Dl, VT: MVT::i32)}),
9146 0);
9147 Chain = DAG.getStore(Chain, dl: Dl, Val: Tmp, Ptr: Addr, PtrInfo: MachinePointerInfo());
9148 }
9149 NewFPSCR =
9150 DAG.getLoad(VT: MVT::f64, dl: Dl, Chain, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9151 Chain = NewFPSCR.getValue(R: 1);
9152 }
9153 if (Subtarget.isISA3_0())
9154 return SDValue(DAG.getMachineNode(Opcode: PPC::MFFSCRN, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9155 Ops: {NewFPSCR, Chain}),
9156 1);
9157 SDValue Zero = DAG.getConstant(Val: 0, DL: Dl, VT: MVT::i32, isTarget: true);
9158 SDNode *MTFSF = DAG.getMachineNode(
9159 Opcode: PPC::MTFSF, dl: Dl, VT: MVT::Other,
9160 Ops: {DAG.getConstant(Val: 255, DL: Dl, VT: MVT::i32, isTarget: true), NewFPSCR, Zero, Zero, Chain});
9161 return SDValue(MTFSF, 0);
9162}
9163
9164SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9165 SelectionDAG &DAG) const {
9166 SDLoc dl(Op);
9167 /*
9168 The rounding mode is in bits 30:31 of FPSR, and has the following
9169 settings:
9170 00 Round to nearest
9171 01 Round to 0
9172 10 Round to +inf
9173 11 Round to -inf
9174
9175 GET_ROUNDING, on the other hand, expects the following:
9176 -1 Undefined
9177 0 Round to 0
9178 1 Round to nearest
9179 2 Round to +inf
9180 3 Round to -inf
9181
9182 To perform the conversion, we do:
9183 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9184 */
9185
9186 MachineFunction &MF = DAG.getMachineFunction();
9187 EVT VT = Op.getValueType();
9188 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9189
9190 // Save FP Control Word to register
9191 SDValue Chain = Op.getOperand(i: 0);
9192 SDValue MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9193 Chain = MFFS.getValue(R: 1);
9194
9195 SDValue CWD;
9196 if (isTypeLegal(VT: MVT::i64)) {
9197 CWD = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32,
9198 Operand: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: MFFS));
9199 } else {
9200 // Save FP register to stack slot
9201 int SSFI = MF.getFrameInfo().CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
9202 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9203 Chain = DAG.getStore(Chain, dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo());
9204
9205 // Load FP Control Word from low 32 bits of stack slot.
9206 assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9207 "Stack slot adjustment is valid only on big endian subtargets!");
9208 SDValue Four = DAG.getConstant(Val: 4, DL: dl, VT: PtrVT);
9209 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackSlot, N2: Four);
9210 CWD = DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo());
9211 Chain = CWD.getValue(R: 1);
9212 }
9213
9214 // Transform as necessary
9215 SDValue CWD1 =
9216 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9217 N1: CWD, N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32));
9218 SDValue CWD2 =
9219 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
9220 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9221 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32,
9222 N1: CWD, N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)),
9223 N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)),
9224 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
9225
9226 SDValue RetVal =
9227 DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1, N2: CWD2);
9228
9229 RetVal =
9230 DAG.getNode(Opcode: (VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9231 DL: dl, VT, Operand: RetVal);
9232
9233 return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
9234}
9235
9236SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9237 EVT VT = Op.getValueType();
9238 uint64_t BitWidth = VT.getSizeInBits();
9239 SDLoc dl(Op);
9240 assert(Op.getNumOperands() == 3 &&
9241 VT == Op.getOperand(1).getValueType() &&
9242 "Unexpected SHL!");
9243
9244 // Expand into a bunch of logical ops. Note that these ops
9245 // depend on the PPC behavior for oversized shift amounts.
9246 SDValue Lo = Op.getOperand(i: 0);
9247 SDValue Hi = Op.getOperand(i: 1);
9248 SDValue Amt = Op.getOperand(i: 2);
9249 EVT AmtVT = Amt.getValueType();
9250
9251 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9252 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9253 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Amt);
9254 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Tmp1);
9255 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR , DL: dl, VT, N1: Tmp2, N2: Tmp3);
9256 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9257 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9258 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Tmp5);
9259 SDValue OutHi = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9260 SDValue OutLo = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Amt);
9261 SDValue OutOps[] = { OutLo, OutHi };
9262 return DAG.getMergeValues(Ops: OutOps, dl);
9263}
9264
9265SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9266 EVT VT = Op.getValueType();
9267 SDLoc dl(Op);
9268 uint64_t BitWidth = VT.getSizeInBits();
9269 assert(Op.getNumOperands() == 3 &&
9270 VT == Op.getOperand(1).getValueType() &&
9271 "Unexpected SRL!");
9272
9273 // Expand into a bunch of logical ops. Note that these ops
9274 // depend on the PPC behavior for oversized shift amounts.
9275 SDValue Lo = Op.getOperand(i: 0);
9276 SDValue Hi = Op.getOperand(i: 1);
9277 SDValue Amt = Op.getOperand(i: 2);
9278 EVT AmtVT = Amt.getValueType();
9279
9280 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9281 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9282 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9283 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9284 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9285 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9286 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9287 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Tmp5);
9288 SDValue OutLo = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9289 SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Amt);
9290 SDValue OutOps[] = { OutLo, OutHi };
9291 return DAG.getMergeValues(Ops: OutOps, dl);
9292}
9293
9294SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9295 SDLoc dl(Op);
9296 EVT VT = Op.getValueType();
9297 uint64_t BitWidth = VT.getSizeInBits();
9298 assert(Op.getNumOperands() == 3 &&
9299 VT == Op.getOperand(1).getValueType() &&
9300 "Unexpected SRA!");
9301
9302 // Expand into a bunch of logical ops, followed by a select_cc.
9303 SDValue Lo = Op.getOperand(i: 0);
9304 SDValue Hi = Op.getOperand(i: 1);
9305 SDValue Amt = Op.getOperand(i: 2);
9306 EVT AmtVT = Amt.getValueType();
9307
9308 SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9309 N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9310 SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9311 SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9312 SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9313 SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9314 N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9315 SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Tmp5);
9316 SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Amt);
9317 SDValue OutLo = DAG.getSelectCC(DL: dl, LHS: Tmp5, RHS: DAG.getConstant(Val: 0, DL: dl, VT: AmtVT),
9318 True: Tmp4, False: Tmp6, Cond: ISD::SETLE);
9319 SDValue OutOps[] = { OutLo, OutHi };
9320 return DAG.getMergeValues(Ops: OutOps, dl);
9321}
9322
9323SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9324 SelectionDAG &DAG) const {
9325 SDLoc dl(Op);
9326 EVT VT = Op.getValueType();
9327 unsigned BitWidth = VT.getSizeInBits();
9328
9329 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9330 SDValue X = Op.getOperand(i: 0);
9331 SDValue Y = Op.getOperand(i: 1);
9332 SDValue Z = Op.getOperand(i: 2);
9333 EVT AmtVT = Z.getValueType();
9334
9335 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9336 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9337 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9338 // on PowerPC shift by BW being well defined.
9339 Z = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Z,
9340 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT: AmtVT));
9341 SDValue SubZ =
9342 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT, N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Z);
9343 X = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: X, N2: IsFSHL ? Z : SubZ);
9344 Y = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Y, N2: IsFSHL ? SubZ : Z);
9345 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: X, N2: Y);
9346}
9347
9348//===----------------------------------------------------------------------===//
9349// Vector related lowering.
9350//
9351
9352/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9353/// element size of SplatSize. Cast the result to VT.
9354static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9355 SelectionDAG &DAG, const SDLoc &dl) {
9356 static const MVT VTys[] = { // canonical VT to use for each size.
9357 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9358 };
9359
9360 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9361
9362 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9363 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9364 SplatSize = 1;
9365 Val = 0xFF;
9366 }
9367
9368 EVT CanonicalVT = VTys[SplatSize-1];
9369
9370 // Build a canonical splat for this value.
9371 // Explicitly truncate APInt here, as this API is used with a mix of
9372 // signed and unsigned values.
9373 return DAG.getBitcast(
9374 VT: ReqVT,
9375 V: DAG.getConstant(Val: APInt(64, Val).trunc(width: SplatSize * 8), DL: dl, VT: CanonicalVT));
9376}
9377
9378/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9379/// specified intrinsic ID.
9380static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9381 const SDLoc &dl, EVT DestVT = MVT::Other) {
9382 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9383 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9384 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op);
9385}
9386
9387/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9388/// specified intrinsic ID.
9389static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9390 SelectionDAG &DAG, const SDLoc &dl,
9391 EVT DestVT = MVT::Other) {
9392 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9393 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9394 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: LHS, N3: RHS);
9395}
9396
9397/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9398/// specified intrinsic ID.
9399static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9400 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9401 EVT DestVT = MVT::Other) {
9402 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9403 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9404 N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op0, N3: Op1, N4: Op2);
9405}
9406
9407/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9408/// amount. The result has the specified value type.
9409static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9410 SelectionDAG &DAG, const SDLoc &dl) {
9411 // Force LHS/RHS to be the right type.
9412 LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: LHS);
9413 RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: RHS);
9414
9415 int Ops[16];
9416 for (unsigned i = 0; i != 16; ++i)
9417 Ops[i] = i + Amt;
9418 SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: LHS, N2: RHS, Mask: Ops);
9419 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9420}
9421
9422/// Do we have an efficient pattern in a .td file for this node?
9423///
9424/// \param V - pointer to the BuildVectorSDNode being matched
9425/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9426///
9427/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9428/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9429/// the opposite is true (expansion is beneficial) are:
9430/// - The node builds a vector out of integers that are not 32 or 64-bits
9431/// - The node builds a vector out of constants
9432/// - The node is a "load-and-splat"
9433/// In all other cases, we will choose to keep the BUILD_VECTOR.
9434static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9435 bool HasDirectMove,
9436 bool HasP8Vector) {
9437 EVT VecVT = V->getValueType(ResNo: 0);
9438 bool RightType = VecVT == MVT::v2f64 ||
9439 (HasP8Vector && VecVT == MVT::v4f32) ||
9440 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9441 if (!RightType)
9442 return false;
9443
9444 bool IsSplat = true;
9445 bool IsLoad = false;
9446 SDValue Op0 = V->getOperand(Num: 0);
9447
9448 // This function is called in a block that confirms the node is not a constant
9449 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9450 // different constants.
9451 if (V->isConstant())
9452 return false;
9453 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9454 if (V->getOperand(Num: i).isUndef())
9455 return false;
9456 // We want to expand nodes that represent load-and-splat even if the
9457 // loaded value is a floating point truncation or conversion to int.
9458 if (V->getOperand(Num: i).getOpcode() == ISD::LOAD ||
9459 (V->getOperand(Num: i).getOpcode() == ISD::FP_ROUND &&
9460 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD) ||
9461 (V->getOperand(Num: i).getOpcode() == ISD::FP_TO_SINT &&
9462 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD) ||
9463 (V->getOperand(Num: i).getOpcode() == ISD::FP_TO_UINT &&
9464 V->getOperand(Num: i).getOperand(i: 0).getOpcode() == ISD::LOAD))
9465 IsLoad = true;
9466 // If the operands are different or the input is not a load and has more
9467 // uses than just this BV node, then it isn't a splat.
9468 if (V->getOperand(Num: i) != Op0 ||
9469 (!IsLoad && !V->isOnlyUserOf(N: V->getOperand(Num: i).getNode())))
9470 IsSplat = false;
9471 }
9472 return !(IsSplat && IsLoad);
9473}
9474
9475// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9476SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9477
9478 SDLoc dl(Op);
9479 SDValue Op0 = Op->getOperand(Num: 0);
9480
9481 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9482 (Op.getValueType() != MVT::f128))
9483 return SDValue();
9484
9485 SDValue Lo = Op0.getOperand(i: 0);
9486 SDValue Hi = Op0.getOperand(i: 1);
9487 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9488 return SDValue();
9489
9490 if (!Subtarget.isLittleEndian())
9491 std::swap(a&: Lo, b&: Hi);
9492
9493 return DAG.getNode(Opcode: PPCISD::BUILD_FP128, DL: dl, VT: MVT::f128, N1: Lo, N2: Hi);
9494}
9495
9496static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9497 const SDValue *InputLoad = &Op;
9498 while (InputLoad->getOpcode() == ISD::BITCAST)
9499 InputLoad = &InputLoad->getOperand(i: 0);
9500 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9501 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9502 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9503 InputLoad = &InputLoad->getOperand(i: 0);
9504 }
9505 if (InputLoad->getOpcode() != ISD::LOAD)
9506 return nullptr;
9507 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
9508 return ISD::isNormalLoad(N: LD) ? InputLoad : nullptr;
9509}
9510
9511// Convert the argument APFloat to a single precision APFloat if there is no
9512// loss in information during the conversion to single precision APFloat and the
9513// resulting number is not a denormal number. Return true if successful.
9514bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9515 APFloat APFloatToConvert = ArgAPFloat;
9516 bool LosesInfo = true;
9517 APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9518 losesInfo: &LosesInfo);
9519 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9520 if (Success)
9521 ArgAPFloat = APFloatToConvert;
9522 return Success;
9523}
9524
9525// Bitcast the argument APInt to a double and convert it to a single precision
9526// APFloat, bitcast the APFloat to an APInt and assign it to the original
9527// argument if there is no loss in information during the conversion from
9528// double to single precision APFloat and the resulting number is not a denormal
9529// number. Return true if successful.
9530bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9531 double DpValue = ArgAPInt.bitsToDouble();
9532 APFloat APFloatDp(DpValue);
9533 bool Success = convertToNonDenormSingle(ArgAPFloat&: APFloatDp);
9534 if (Success)
9535 ArgAPInt = APFloatDp.bitcastToAPInt();
9536 return Success;
9537}
9538
9539// Nondestructive check for convertTonNonDenormSingle.
9540bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9541 // Only convert if it loses info, since XXSPLTIDP should
9542 // handle the other case.
9543 APFloat APFloatToConvert = ArgAPFloat;
9544 bool LosesInfo = true;
9545 APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9546 losesInfo: &LosesInfo);
9547
9548 return (!LosesInfo && !APFloatToConvert.isDenormal());
9549}
9550
9551static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9552 unsigned &Opcode) {
9553 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Val: Op.getOperand(i: 0));
9554 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(N: InputNode))
9555 return false;
9556
9557 EVT Ty = Op->getValueType(ResNo: 0);
9558 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9559 // as we cannot handle extending loads for these types.
9560 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9561 ISD::isNON_EXTLoad(N: InputNode))
9562 return true;
9563
9564 EVT MemVT = InputNode->getMemoryVT();
9565 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9566 // memory VT is the same vector element VT type.
9567 // The loads feeding into the v8i16 and v16i8 types will be extending because
9568 // scalar i8/i16 are not legal types.
9569 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(N: InputNode) &&
9570 (MemVT == Ty.getVectorElementType()))
9571 return true;
9572
9573 if (Ty == MVT::v2i64) {
9574 // Check the extend type, when the input type is i32, and the output vector
9575 // type is v2i64.
9576 if (MemVT == MVT::i32) {
9577 if (ISD::isZEXTLoad(N: InputNode))
9578 Opcode = PPCISD::ZEXT_LD_SPLAT;
9579 if (ISD::isSEXTLoad(N: InputNode))
9580 Opcode = PPCISD::SEXT_LD_SPLAT;
9581 }
9582 return true;
9583 }
9584 return false;
9585}
9586
9587// If this is a case we can't handle, return null and let the default
9588// expansion code take care of it. If we CAN select this case, and if it
9589// selects to a single instruction, return Op. Otherwise, if we can codegen
9590// this case more efficiently than a constant pool load, lower it to the
9591// sequence of ops that should be used.
9592SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9593 SelectionDAG &DAG) const {
9594 SDLoc dl(Op);
9595 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: Op.getNode());
9596 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9597
9598 // Check if this is a splat of a constant value.
9599 APInt APSplatBits, APSplatUndef;
9600 unsigned SplatBitSize;
9601 bool HasAnyUndefs;
9602 bool BVNIsConstantSplat =
9603 BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
9604 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian());
9605
9606 // If it is a splat of a double, check if we can shrink it to a 32 bit
9607 // non-denormal float which when converted back to double gives us the same
9608 // double. This is to exploit the XXSPLTIDP instruction.
9609 // If we lose precision, we use XXSPLTI32DX.
9610 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9611 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9612 // Check the type first to short-circuit so we don't modify APSplatBits if
9613 // this block isn't executed.
9614 if ((Op->getValueType(ResNo: 0) == MVT::v2f64) &&
9615 convertToNonDenormSingle(ArgAPInt&: APSplatBits)) {
9616 SDValue SplatNode = DAG.getNode(
9617 Opcode: PPCISD::XXSPLTI_SP_TO_DP, DL: dl, VT: MVT::v2f64,
9618 Operand: DAG.getTargetConstant(Val: APSplatBits.getZExtValue(), DL: dl, VT: MVT::i32));
9619 return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9620 } else {
9621 // We may lose precision, so we have to use XXSPLTI32DX.
9622
9623 uint32_t Hi = Hi_32(Value: APSplatBits.getZExtValue());
9624 uint32_t Lo = Lo_32(Value: APSplatBits.getZExtValue());
9625 SDValue SplatNode = DAG.getUNDEF(VT: MVT::v2i64);
9626
9627 if (!Hi || !Lo)
9628 // If either load is 0, then we should generate XXLXOR to set to 0.
9629 SplatNode = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::v2i64);
9630
9631 if (Hi)
9632 SplatNode = DAG.getNode(
9633 Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9634 N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
9635 N3: DAG.getTargetConstant(Val: Hi, DL: dl, VT: MVT::i32));
9636
9637 if (Lo)
9638 SplatNode =
9639 DAG.getNode(Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9640 N2: DAG.getTargetConstant(Val: 1, DL: dl, VT: MVT::i32),
9641 N3: DAG.getTargetConstant(Val: Lo, DL: dl, VT: MVT::i32));
9642
9643 return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9644 }
9645 }
9646
9647 bool IsSplat64 = false;
9648 uint64_t SplatBits = 0;
9649 int32_t SextVal = 0;
9650 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9651 SplatBits = APSplatBits.getZExtValue();
9652 if (SplatBitSize <= 32) {
9653 SextVal = SignExtend32(X: SplatBits, B: SplatBitSize);
9654 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9655 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9656 bool P9Vector = Subtarget.hasP9Vector();
9657 int32_t Hi = P9Vector ? 127 : 15;
9658 int32_t Lo = P9Vector ? -128 : -16;
9659 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9660 SextVal = static_cast<int32_t>(SplatBits);
9661 }
9662 }
9663
9664 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9665 unsigned NewOpcode = PPCISD::LD_SPLAT;
9666
9667 // Handle load-and-splat patterns as we have instructions that will do this
9668 // in one go.
9669 if (DAG.isSplatValue(V: Op, AllowUndefs: true) &&
9670 isValidSplatLoad(Subtarget, Op, Opcode&: NewOpcode)) {
9671 const SDValue *InputLoad = &Op.getOperand(i: 0);
9672 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
9673
9674 // If the input load is an extending load, it will be an i32 -> i64
9675 // extending load and isValidSplatLoad() will update NewOpcode.
9676 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9677 unsigned ElementSize =
9678 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9679
9680 assert(((ElementSize == 2 * MemorySize)
9681 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9682 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9683 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9684 "Unmatched element size and opcode!\n");
9685
9686 // Checking for a single use of this load, we have to check for vector
9687 // width (128 bits) / ElementSize uses (since each operand of the
9688 // BUILD_VECTOR is a separate use of the value.
9689 unsigned NumUsesOfInputLD = 128 / ElementSize;
9690 for (SDValue BVInOp : Op->ops())
9691 if (BVInOp.isUndef())
9692 NumUsesOfInputLD--;
9693
9694 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9695 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9696 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9697 // 15", but function IsValidSplatLoad() now will only return true when
9698 // the data at index 0 is not nullptr. So we will not get into trouble for
9699 // these cases.
9700 //
9701 // case 1 - lfiwzx/lfiwax
9702 // 1.1: load result is i32 and is sign/zero extend to i64;
9703 // 1.2: build a v2i64 vector type with above loaded value;
9704 // 1.3: the vector has only one value at index 0, others are all undef;
9705 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9706 if (NumUsesOfInputLD == 1 &&
9707 (Op->getValueType(ResNo: 0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9708 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9709 Subtarget.hasLFIWAX()))
9710 return SDValue();
9711
9712 // case 2 - lxvr[hb]x
9713 // 2.1: load result is at most i16;
9714 // 2.2: build a vector with above loaded value;
9715 // 2.3: the vector has only one value at index 0, others are all undef;
9716 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9717 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9718 Subtarget.isISA3_1() && ElementSize <= 16)
9719 return SDValue();
9720
9721 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9722 if (InputLoad->getNode()->hasNUsesOfValue(NUses: NumUsesOfInputLD, Value: 0) &&
9723 Subtarget.hasVSX()) {
9724 SDValue Ops[] = {
9725 LD->getChain(), // Chain
9726 LD->getBasePtr(), // Ptr
9727 DAG.getValueType(Op.getValueType()) // VT
9728 };
9729 SDValue LdSplt = DAG.getMemIntrinsicNode(
9730 Opcode: NewOpcode, dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other), Ops,
9731 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
9732 // Replace all uses of the output chain of the original load with the
9733 // output chain of the new load.
9734 DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: 1),
9735 To: LdSplt.getValue(R: 1));
9736 return LdSplt;
9737 }
9738 }
9739
9740 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9741 // 32-bits can be lowered to VSX instructions under certain conditions.
9742 // Without VSX, there is no pattern more efficient than expanding the node.
9743 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9744 haveEfficientBuildVectorPattern(V: BVN, HasDirectMove: Subtarget.hasDirectMove(),
9745 HasP8Vector: Subtarget.hasP8Vector()))
9746 return Op;
9747 return SDValue();
9748 }
9749
9750 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9751 unsigned SplatSize = SplatBitSize / 8;
9752
9753 // First, handle single instruction cases.
9754
9755 // All zeros?
9756 if (SplatBits == 0) {
9757 // Canonicalize all zero vectors to be v4i32.
9758 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9759 SDValue Z = DAG.getConstant(Val: 0, DL: dl, VT: MVT::v4i32);
9760 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Z);
9761 }
9762 return Op;
9763 }
9764
9765 // We have XXSPLTIW for constant splats four bytes wide.
9766 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9767 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9768 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9769 // turned into a 4-byte splat of 0xABABABAB.
9770 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9771 return getCanonicalConstSplat(Val: SplatBits | (SplatBits << 16), SplatSize: SplatSize * 2,
9772 VT: Op.getValueType(), DAG, dl);
9773
9774 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9775 return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9776 dl);
9777
9778 // We have XXSPLTIB for constant splats one byte wide.
9779 if (Subtarget.hasP9Vector() && SplatSize == 1)
9780 return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9781 dl);
9782
9783 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9784 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9785 if (SextVal >= -16 && SextVal <= 15) {
9786 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9787 // generate a splat word with extend for size 8.
9788 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9789 SDValue Res =
9790 getCanonicalConstSplat(Val: SextVal, SplatSize: UseSize, VT: Op.getValueType(), DAG, dl);
9791 if (SplatSize != 8)
9792 return Res;
9793 return BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vupklsw, Op: Res, DAG, dl);
9794 }
9795
9796 // Two instruction sequences.
9797
9798 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9799 SDValue C = DAG.getConstant(Val: (unsigned char)SextVal, DL: dl, VT: MVT::i32);
9800 SmallVector<SDValue, 16> Ops(16, C);
9801 SDValue BV = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops);
9802 unsigned IID;
9803 switch (SplatSize) {
9804 default:
9805 llvm_unreachable("Unexpected type for vector constant.");
9806 case 2:
9807 IID = Intrinsic::ppc_altivec_vupklsb;
9808 break;
9809 case 4:
9810 IID = Intrinsic::ppc_altivec_vextsb2w;
9811 break;
9812 case 8:
9813 IID = Intrinsic::ppc_altivec_vextsb2d;
9814 break;
9815 }
9816 SDValue Extend = BuildIntrinsicOp(IID, Op: BV, DAG, dl);
9817 return DAG.getBitcast(VT: Op->getValueType(ResNo: 0), V: Extend);
9818 }
9819 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9820
9821 // If this value is in the range [-32,30] and is even, use:
9822 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9823 // If this value is in the range [17,31] and is odd, use:
9824 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9825 // If this value is in the range [-31,-17] and is odd, use:
9826 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9827 // Note the last two are three-instruction sequences.
9828 if (SextVal >= -32 && SextVal <= 31) {
9829 // To avoid having these optimizations undone by constant folding,
9830 // we convert to a pseudo that will be expanded later into one of
9831 // the above forms.
9832 SDValue Elt = DAG.getSignedConstant(Val: SextVal, DL: dl, VT: MVT::i32);
9833 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9834 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9835 SDValue EltSize = DAG.getConstant(Val: SplatSize, DL: dl, VT: MVT::i32);
9836 SDValue RetVal = DAG.getNode(Opcode: PPCISD::VADD_SPLAT, DL: dl, VT, N1: Elt, N2: EltSize);
9837 if (VT == Op.getValueType())
9838 return RetVal;
9839 else
9840 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: RetVal);
9841 }
9842
9843 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9844 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9845 // for fneg/fabs.
9846 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9847 // Make -1 and vspltisw -1:
9848 SDValue OnesV = getCanonicalConstSplat(Val: -1, SplatSize: 4, VT: MVT::v4i32, DAG, dl);
9849
9850 // Make the VSLW intrinsic, computing 0x8000_0000.
9851 SDValue Res = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: OnesV,
9852 RHS: OnesV, DAG, dl);
9853
9854 // xor by OnesV to invert it.
9855 Res = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::v4i32, N1: Res, N2: OnesV);
9856 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9857 }
9858
9859 // Check to see if this is a wide variety of vsplti*, binop self cases.
9860 static const signed char SplatCsts[] = {
9861 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9862 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9863 };
9864
9865 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9866 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9867 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9868 int i = SplatCsts[idx];
9869
9870 // Figure out what shift amount will be used by altivec if shifted by i in
9871 // this splat size.
9872 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9873
9874 // vsplti + shl self.
9875 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9876 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9877 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9878 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9879 Intrinsic::ppc_altivec_vslw
9880 };
9881 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9882 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9883 }
9884
9885 // vsplti + srl self.
9886 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9887 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9888 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9889 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9890 Intrinsic::ppc_altivec_vsrw
9891 };
9892 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9893 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9894 }
9895
9896 // vsplti + rol self.
9897 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9898 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9899 SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9900 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9901 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9902 Intrinsic::ppc_altivec_vrlw
9903 };
9904 Res = BuildIntrinsicOp(IID: IIDs[SplatSize-1], LHS: Res, RHS: Res, DAG, dl);
9905 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9906 }
9907
9908 // t = vsplti c, result = vsldoi t, t, 1
9909 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9910 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9911 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9912 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9913 }
9914 // t = vsplti c, result = vsldoi t, t, 2
9915 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9916 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9917 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9918 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9919 }
9920 // t = vsplti c, result = vsldoi t, t, 3
9921 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9922 SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9923 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9924 return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9925 }
9926 }
9927
9928 return SDValue();
9929}
9930
9931/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9932/// the specified operations to build the shuffle.
9933static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9934 SDValue RHS, SelectionDAG &DAG,
9935 const SDLoc &dl) {
9936 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9937 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9938 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9939
9940 enum {
9941 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9942 OP_VMRGHW,
9943 OP_VMRGLW,
9944 OP_VSPLTISW0,
9945 OP_VSPLTISW1,
9946 OP_VSPLTISW2,
9947 OP_VSPLTISW3,
9948 OP_VSLDOI4,
9949 OP_VSLDOI8,
9950 OP_VSLDOI12
9951 };
9952
9953 if (OpNum == OP_COPY) {
9954 if (LHSID == (1*9+2)*9+3) return LHS;
9955 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9956 return RHS;
9957 }
9958
9959 SDValue OpLHS, OpRHS;
9960 OpLHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9961 OpRHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9962
9963 int ShufIdxs[16];
9964 switch (OpNum) {
9965 default: llvm_unreachable("Unknown i32 permute!");
9966 case OP_VMRGHW:
9967 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9968 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9969 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9970 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9971 break;
9972 case OP_VMRGLW:
9973 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9974 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9975 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9976 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9977 break;
9978 case OP_VSPLTISW0:
9979 for (unsigned i = 0; i != 16; ++i)
9980 ShufIdxs[i] = (i&3)+0;
9981 break;
9982 case OP_VSPLTISW1:
9983 for (unsigned i = 0; i != 16; ++i)
9984 ShufIdxs[i] = (i&3)+4;
9985 break;
9986 case OP_VSPLTISW2:
9987 for (unsigned i = 0; i != 16; ++i)
9988 ShufIdxs[i] = (i&3)+8;
9989 break;
9990 case OP_VSPLTISW3:
9991 for (unsigned i = 0; i != 16; ++i)
9992 ShufIdxs[i] = (i&3)+12;
9993 break;
9994 case OP_VSLDOI4:
9995 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 4, VT: OpLHS.getValueType(), DAG, dl);
9996 case OP_VSLDOI8:
9997 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 8, VT: OpLHS.getValueType(), DAG, dl);
9998 case OP_VSLDOI12:
9999 return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: 12, VT: OpLHS.getValueType(), DAG, dl);
10000 }
10001 EVT VT = OpLHS.getValueType();
10002 OpLHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpLHS);
10003 OpRHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpRHS);
10004 SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OpLHS, N2: OpRHS, Mask: ShufIdxs);
10005 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
10006}
10007
10008/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10009/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10010/// SDValue.
10011SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10012 SelectionDAG &DAG) const {
10013 const unsigned BytesInVector = 16;
10014 bool IsLE = Subtarget.isLittleEndian();
10015 SDLoc dl(N);
10016 SDValue V1 = N->getOperand(Num: 0);
10017 SDValue V2 = N->getOperand(Num: 1);
10018 unsigned ShiftElts = 0, InsertAtByte = 0;
10019 bool Swap = false;
10020
10021 // Shifts required to get the byte we want at element 7.
10022 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10023 0, 15, 14, 13, 12, 11, 10, 9};
10024 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10025 1, 2, 3, 4, 5, 6, 7, 8};
10026
10027 ArrayRef<int> Mask = N->getMask();
10028 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10029
10030 // For each mask element, find out if we're just inserting something
10031 // from V2 into V1 or vice versa.
10032 // Possible permutations inserting an element from V2 into V1:
10033 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10034 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10035 // ...
10036 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10037 // Inserting from V1 into V2 will be similar, except mask range will be
10038 // [16,31].
10039
10040 bool FoundCandidate = false;
10041 // If both vector operands for the shuffle are the same vector, the mask
10042 // will contain only elements from the first one and the second one will be
10043 // undef.
10044 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10045 // Go through the mask of half-words to find an element that's being moved
10046 // from one vector to the other.
10047 for (unsigned i = 0; i < BytesInVector; ++i) {
10048 unsigned CurrentElement = Mask[i];
10049 // If 2nd operand is undefined, we should only look for element 7 in the
10050 // Mask.
10051 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10052 continue;
10053
10054 bool OtherElementsInOrder = true;
10055 // Examine the other elements in the Mask to see if they're in original
10056 // order.
10057 for (unsigned j = 0; j < BytesInVector; ++j) {
10058 if (j == i)
10059 continue;
10060 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10061 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10062 // in which we always assume we're always picking from the 1st operand.
10063 int MaskOffset =
10064 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10065 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10066 OtherElementsInOrder = false;
10067 break;
10068 }
10069 }
10070 // If other elements are in original order, we record the number of shifts
10071 // we need to get the element we want into element 7. Also record which byte
10072 // in the vector we should insert into.
10073 if (OtherElementsInOrder) {
10074 // If 2nd operand is undefined, we assume no shifts and no swapping.
10075 if (V2.isUndef()) {
10076 ShiftElts = 0;
10077 Swap = false;
10078 } else {
10079 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10080 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10081 : BigEndianShifts[CurrentElement & 0xF];
10082 Swap = CurrentElement < BytesInVector;
10083 }
10084 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10085 FoundCandidate = true;
10086 break;
10087 }
10088 }
10089
10090 if (!FoundCandidate)
10091 return SDValue();
10092
10093 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10094 // optionally with VECSHL if shift is required.
10095 if (Swap)
10096 std::swap(a&: V1, b&: V2);
10097 if (V2.isUndef())
10098 V2 = V1;
10099 if (ShiftElts) {
10100 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10101 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10102 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: Shl,
10103 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10104 }
10105 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: V2,
10106 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10107}
10108
10109/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10110/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10111/// SDValue.
10112SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10113 SelectionDAG &DAG) const {
10114 const unsigned NumHalfWords = 8;
10115 const unsigned BytesInVector = NumHalfWords * 2;
10116 // Check that the shuffle is on half-words.
10117 if (!isNByteElemShuffleMask(N, Width: 2, StepLen: 1))
10118 return SDValue();
10119
10120 bool IsLE = Subtarget.isLittleEndian();
10121 SDLoc dl(N);
10122 SDValue V1 = N->getOperand(Num: 0);
10123 SDValue V2 = N->getOperand(Num: 1);
10124 unsigned ShiftElts = 0, InsertAtByte = 0;
10125 bool Swap = false;
10126
10127 // Shifts required to get the half-word we want at element 3.
10128 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10129 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10130
10131 uint32_t Mask = 0;
10132 uint32_t OriginalOrderLow = 0x1234567;
10133 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10134 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10135 // 32-bit space, only need 4-bit nibbles per element.
10136 for (unsigned i = 0; i < NumHalfWords; ++i) {
10137 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10138 Mask |= ((uint32_t)(N->getMaskElt(Idx: i * 2) / 2) << MaskShift);
10139 }
10140
10141 // For each mask element, find out if we're just inserting something
10142 // from V2 into V1 or vice versa. Possible permutations inserting an element
10143 // from V2 into V1:
10144 // X, 1, 2, 3, 4, 5, 6, 7
10145 // 0, X, 2, 3, 4, 5, 6, 7
10146 // 0, 1, X, 3, 4, 5, 6, 7
10147 // 0, 1, 2, X, 4, 5, 6, 7
10148 // 0, 1, 2, 3, X, 5, 6, 7
10149 // 0, 1, 2, 3, 4, X, 6, 7
10150 // 0, 1, 2, 3, 4, 5, X, 7
10151 // 0, 1, 2, 3, 4, 5, 6, X
10152 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10153
10154 bool FoundCandidate = false;
10155 // Go through the mask of half-words to find an element that's being moved
10156 // from one vector to the other.
10157 for (unsigned i = 0; i < NumHalfWords; ++i) {
10158 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10159 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10160 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10161 uint32_t TargetOrder = 0x0;
10162
10163 // If both vector operands for the shuffle are the same vector, the mask
10164 // will contain only elements from the first one and the second one will be
10165 // undef.
10166 if (V2.isUndef()) {
10167 ShiftElts = 0;
10168 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10169 TargetOrder = OriginalOrderLow;
10170 Swap = false;
10171 // Skip if not the correct element or mask of other elements don't equal
10172 // to our expected order.
10173 if (MaskOneElt == VINSERTHSrcElem &&
10174 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10175 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10176 FoundCandidate = true;
10177 break;
10178 }
10179 } else { // If both operands are defined.
10180 // Target order is [8,15] if the current mask is between [0,7].
10181 TargetOrder =
10182 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10183 // Skip if mask of other elements don't equal our expected order.
10184 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10185 // We only need the last 3 bits for the number of shifts.
10186 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10187 : BigEndianShifts[MaskOneElt & 0x7];
10188 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10189 Swap = MaskOneElt < NumHalfWords;
10190 FoundCandidate = true;
10191 break;
10192 }
10193 }
10194 }
10195
10196 if (!FoundCandidate)
10197 return SDValue();
10198
10199 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10200 // optionally with VECSHL if shift is required.
10201 if (Swap)
10202 std::swap(a&: V1, b&: V2);
10203 if (V2.isUndef())
10204 V2 = V1;
10205 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10206 if (ShiftElts) {
10207 // Double ShiftElts because we're left shifting on v16i8 type.
10208 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10209 N3: DAG.getConstant(Val: 2 * ShiftElts, DL: dl, VT: MVT::i32));
10210 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: Shl);
10211 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10212 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10213 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10214 }
10215 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V2);
10216 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10217 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10218 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10219}
10220
10221/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10222/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10223/// return the default SDValue.
10224SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10225 SelectionDAG &DAG) const {
10226 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10227 // to v16i8. Peek through the bitcasts to get the actual operands.
10228 SDValue LHS = peekThroughBitcasts(V: SVN->getOperand(Num: 0));
10229 SDValue RHS = peekThroughBitcasts(V: SVN->getOperand(Num: 1));
10230
10231 auto ShuffleMask = SVN->getMask();
10232 SDValue VecShuffle(SVN, 0);
10233 SDLoc DL(SVN);
10234
10235 // Check that we have a four byte shuffle.
10236 if (!isNByteElemShuffleMask(N: SVN, Width: 4, StepLen: 1))
10237 return SDValue();
10238
10239 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10240 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10241 std::swap(a&: LHS, b&: RHS);
10242 VecShuffle = peekThroughBitcasts(V: DAG.getCommutedVectorShuffle(SV: *SVN));
10243 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(Val&: VecShuffle);
10244 if (!CommutedSV)
10245 return SDValue();
10246 ShuffleMask = CommutedSV->getMask();
10247 }
10248
10249 // Ensure that the RHS is a vector of constants.
10250 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: RHS.getNode());
10251 if (!BVN)
10252 return SDValue();
10253
10254 // Check if RHS is a splat of 4-bytes (or smaller).
10255 APInt APSplatValue, APSplatUndef;
10256 unsigned SplatBitSize;
10257 bool HasAnyUndefs;
10258 if (!BVN->isConstantSplat(SplatValue&: APSplatValue, SplatUndef&: APSplatUndef, SplatBitSize,
10259 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian()) ||
10260 SplatBitSize > 32)
10261 return SDValue();
10262
10263 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10264 // The instruction splats a constant C into two words of the source vector
10265 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10266 // Thus we check that the shuffle mask is the equivalent of
10267 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10268 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10269 // within each word are consecutive, so we only need to check the first byte.
10270 SDValue Index;
10271 bool IsLE = Subtarget.isLittleEndian();
10272 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10273 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10274 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10275 Index = DAG.getTargetConstant(Val: IsLE ? 0 : 1, DL, VT: MVT::i32);
10276 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10277 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10278 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10279 Index = DAG.getTargetConstant(Val: IsLE ? 1 : 0, DL, VT: MVT::i32);
10280 else
10281 return SDValue();
10282
10283 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10284 // for XXSPLTI32DX.
10285 unsigned SplatVal = APSplatValue.getZExtValue();
10286 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10287 SplatVal |= (SplatVal << SplatBitSize);
10288
10289 SDValue SplatNode = DAG.getNode(
10290 Opcode: PPCISD::XXSPLTI32DX, DL, VT: MVT::v2i64, N1: DAG.getBitcast(VT: MVT::v2i64, V: LHS),
10291 N2: Index, N3: DAG.getTargetConstant(Val: SplatVal, DL, VT: MVT::i32));
10292 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: SplatNode);
10293}
10294
10295/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10296/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10297/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10298/// i.e (or (shl x, C1), (srl x, 128-C1)).
10299SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10300 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10301 assert(Op.getValueType() == MVT::v1i128 &&
10302 "Only set v1i128 as custom, other type shouldn't reach here!");
10303 SDLoc dl(Op);
10304 SDValue N0 = peekThroughBitcasts(V: Op.getOperand(i: 0));
10305 SDValue N1 = peekThroughBitcasts(V: Op.getOperand(i: 1));
10306 unsigned SHLAmt = N1.getConstantOperandVal(i: 0);
10307 if (SHLAmt % 8 == 0) {
10308 std::array<int, 16> Mask;
10309 std::iota(first: Mask.begin(), last: Mask.end(), value: 0);
10310 std::rotate(first: Mask.begin(), middle: Mask.begin() + SHLAmt / 8, last: Mask.end());
10311 if (SDValue Shuffle =
10312 DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: DAG.getBitcast(VT: MVT::v16i8, V: N0),
10313 N2: DAG.getUNDEF(VT: MVT::v16i8), Mask))
10314 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: Shuffle);
10315 }
10316 SDValue ArgVal = DAG.getBitcast(VT: MVT::i128, V: N0);
10317 SDValue SHLOp = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ArgVal,
10318 N2: DAG.getConstant(Val: SHLAmt, DL: dl, VT: MVT::i32));
10319 SDValue SRLOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: ArgVal,
10320 N2: DAG.getConstant(Val: 128 - SHLAmt, DL: dl, VT: MVT::i32));
10321 SDValue OROp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i128, N1: SHLOp, N2: SRLOp);
10322 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: OROp);
10323}
10324
10325/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10326/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10327/// return the code it can be lowered into. Worst case, it can always be
10328/// lowered into a vperm.
10329SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10330 SelectionDAG &DAG) const {
10331 SDLoc dl(Op);
10332 SDValue V1 = Op.getOperand(i: 0);
10333 SDValue V2 = Op.getOperand(i: 1);
10334 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10335
10336 // Any nodes that were combined in the target-independent combiner prior
10337 // to vector legalization will not be sent to the target combine. Try to
10338 // combine it here.
10339 if (SDValue NewShuffle = combineVectorShuffle(SVN: SVOp, DAG)) {
10340 if (!isa<ShuffleVectorSDNode>(Val: NewShuffle))
10341 return NewShuffle;
10342 Op = NewShuffle;
10343 SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10344 V1 = Op.getOperand(i: 0);
10345 V2 = Op.getOperand(i: 1);
10346 }
10347 EVT VT = Op.getValueType();
10348 bool isLittleEndian = Subtarget.isLittleEndian();
10349
10350 unsigned ShiftElts, InsertAtByte;
10351 bool Swap = false;
10352
10353 // If this is a load-and-splat, we can do that with a single instruction
10354 // in some cases. However if the load has multiple uses, we don't want to
10355 // combine it because that will just produce multiple loads.
10356 bool IsPermutedLoad = false;
10357 const SDValue *InputLoad = getNormalLoadInput(Op: V1, IsPermuted&: IsPermutedLoad);
10358 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10359 (PPC::isSplatShuffleMask(N: SVOp, EltSize: 4) || PPC::isSplatShuffleMask(N: SVOp, EltSize: 8)) &&
10360 InputLoad->hasOneUse()) {
10361 bool IsFourByte = PPC::isSplatShuffleMask(N: SVOp, EltSize: 4);
10362 int SplatIdx =
10363 PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: IsFourByte ? 4 : 8, DAG);
10364
10365 // The splat index for permuted loads will be in the left half of the vector
10366 // which is strictly wider than the loaded value by 8 bytes. So we need to
10367 // adjust the splat index to point to the correct address in memory.
10368 if (IsPermutedLoad) {
10369 assert((isLittleEndian || IsFourByte) &&
10370 "Unexpected size for permuted load on big endian target");
10371 SplatIdx += IsFourByte ? 2 : 1;
10372 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10373 "Splat of a value outside of the loaded memory");
10374 }
10375
10376 LoadSDNode *LD = cast<LoadSDNode>(Val: *InputLoad);
10377 // For 4-byte load-and-splat, we need Power9.
10378 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10379 uint64_t Offset = 0;
10380 if (IsFourByte)
10381 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10382 else
10383 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10384
10385 // If the width of the load is the same as the width of the splat,
10386 // loading with an offset would load the wrong memory.
10387 if (LD->getValueType(ResNo: 0).getSizeInBits() == (IsFourByte ? 32 : 64))
10388 Offset = 0;
10389
10390 SDValue BasePtr = LD->getBasePtr();
10391 if (Offset != 0)
10392 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
10393 N1: BasePtr, N2: DAG.getIntPtrConstant(Val: Offset, DL: dl));
10394 SDValue Ops[] = {
10395 LD->getChain(), // Chain
10396 BasePtr, // BasePtr
10397 DAG.getValueType(Op.getValueType()) // VT
10398 };
10399 SDVTList VTL =
10400 DAG.getVTList(VT1: IsFourByte ? MVT::v4i32 : MVT::v2i64, VT2: MVT::Other);
10401 SDValue LdSplt =
10402 DAG.getMemIntrinsicNode(Opcode: PPCISD::LD_SPLAT, dl, VTList: VTL,
10403 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
10404 DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: 1), To: LdSplt.getValue(R: 1));
10405 if (LdSplt.getValueType() != SVOp->getValueType(ResNo: 0))
10406 LdSplt = DAG.getBitcast(VT: SVOp->getValueType(ResNo: 0), V: LdSplt);
10407 return LdSplt;
10408 }
10409 }
10410
10411 // All v2i64 and v2f64 shuffles are legal
10412 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10413 return Op;
10414
10415 if (Subtarget.hasP9Vector() &&
10416 PPC::isXXINSERTWMask(N: SVOp, ShiftElts, InsertAtByte, Swap,
10417 IsLE: isLittleEndian)) {
10418 if (V2.isUndef())
10419 V2 = V1;
10420 else if (Swap)
10421 std::swap(a&: V1, b&: V2);
10422 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10423 SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2);
10424 if (ShiftElts) {
10425 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv2, N2: Conv2,
10426 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10427 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Shl,
10428 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10429 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10430 }
10431 SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10432 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10433 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10434 }
10435
10436 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10437 SDValue SplatInsertNode;
10438 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVN: SVOp, DAG)))
10439 return SplatInsertNode;
10440 }
10441
10442 if (Subtarget.hasP9Altivec()) {
10443 SDValue NewISDNode;
10444 if ((NewISDNode = lowerToVINSERTH(N: SVOp, DAG)))
10445 return NewISDNode;
10446
10447 if ((NewISDNode = lowerToVINSERTB(N: SVOp, DAG)))
10448 return NewISDNode;
10449 }
10450
10451 if (Subtarget.hasVSX() &&
10452 PPC::isXXSLDWIShuffleMask(N: SVOp, ShiftElts, Swap, IsLE: isLittleEndian)) {
10453 if (Swap)
10454 std::swap(a&: V1, b&: V2);
10455 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10456 SDValue Conv2 =
10457 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2.isUndef() ? V1 : V2);
10458
10459 SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10460 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10461 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Shl);
10462 }
10463
10464 if (Subtarget.hasVSX() &&
10465 PPC::isXXPERMDIShuffleMask(N: SVOp, DM&: ShiftElts, Swap, IsLE: isLittleEndian)) {
10466 if (Swap)
10467 std::swap(a&: V1, b&: V2);
10468 SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10469 SDValue Conv2 =
10470 DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V2.isUndef() ? V1 : V2);
10471
10472 SDValue PermDI = DAG.getNode(Opcode: PPCISD::XXPERMDI, DL: dl, VT: MVT::v2i64, N1: Conv1, N2: Conv2,
10473 N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10474 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: PermDI);
10475 }
10476
10477 if (Subtarget.hasP9Vector()) {
10478 if (PPC::isXXBRHShuffleMask(N: SVOp)) {
10479 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10480 SDValue ReveHWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v8i16, Operand: Conv);
10481 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveHWord);
10482 } else if (PPC::isXXBRWShuffleMask(N: SVOp)) {
10483 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10484 SDValue ReveWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v4i32, Operand: Conv);
10485 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveWord);
10486 } else if (PPC::isXXBRDShuffleMask(N: SVOp)) {
10487 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10488 SDValue ReveDWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Conv);
10489 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveDWord);
10490 } else if (PPC::isXXBRQShuffleMask(N: SVOp)) {
10491 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: V1);
10492 SDValue ReveQWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v1i128, Operand: Conv);
10493 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveQWord);
10494 }
10495 }
10496
10497 if (Subtarget.hasVSX()) {
10498 if (V2.isUndef() && PPC::isSplatShuffleMask(N: SVOp, EltSize: 4)) {
10499 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: 4, DAG);
10500
10501 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10502 SDValue Splat = DAG.getNode(Opcode: PPCISD::XXSPLT, DL: dl, VT: MVT::v4i32, N1: Conv,
10503 N2: DAG.getConstant(Val: SplatIdx, DL: dl, VT: MVT::i32));
10504 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Splat);
10505 }
10506
10507 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10508 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: 1, DAG) == 8) {
10509 SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: V1);
10510 SDValue Swap = DAG.getNode(Opcode: PPCISD::SWAP_NO_CHAIN, DL: dl, VT: MVT::v2f64, Operand: Conv);
10511 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Swap);
10512 }
10513 }
10514
10515 // Cases that are handled by instructions that take permute immediates
10516 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10517 // selected by the instruction selector.
10518 if (V2.isUndef()) {
10519 if (PPC::isSplatShuffleMask(N: SVOp, EltSize: 1) ||
10520 PPC::isSplatShuffleMask(N: SVOp, EltSize: 2) ||
10521 PPC::isSplatShuffleMask(N: SVOp, EltSize: 4) ||
10522 PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10523 PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10524 PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: 1, DAG) != -1 ||
10525 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind: 1, DAG) ||
10526 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind: 1, DAG) ||
10527 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind: 1, DAG) ||
10528 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind: 1, DAG) ||
10529 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind: 1, DAG) ||
10530 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind: 1, DAG) ||
10531 (Subtarget.hasP8Altivec() && (
10532 PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind: 1, DAG) ||
10533 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind: 1, DAG) ||
10534 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind: 1, DAG)))) {
10535 return Op;
10536 }
10537 }
10538
10539 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10540 // and produce a fixed permutation. If any of these match, do not lower to
10541 // VPERM.
10542 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10543 if (PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10544 PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10545 PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind, DAG) != -1 ||
10546 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind, DAG) ||
10547 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind, DAG) ||
10548 PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind, DAG) ||
10549 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 1, ShuffleKind, DAG) ||
10550 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 2, ShuffleKind, DAG) ||
10551 PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: 4, ShuffleKind, DAG) ||
10552 (Subtarget.hasP8Altivec() && (
10553 PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind, DAG) ||
10554 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind, DAG) ||
10555 PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind, DAG))))
10556 return Op;
10557
10558 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10559 // perfect shuffle table to emit an optimal matching sequence.
10560 ArrayRef<int> PermMask = SVOp->getMask();
10561
10562 if (!DisablePerfectShuffle && !isLittleEndian) {
10563 unsigned PFIndexes[4];
10564 bool isFourElementShuffle = true;
10565 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10566 ++i) { // Element number
10567 unsigned EltNo = 8; // Start out undef.
10568 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10569 if (PermMask[i * 4 + j] < 0)
10570 continue; // Undef, ignore it.
10571
10572 unsigned ByteSource = PermMask[i * 4 + j];
10573 if ((ByteSource & 3) != j) {
10574 isFourElementShuffle = false;
10575 break;
10576 }
10577
10578 if (EltNo == 8) {
10579 EltNo = ByteSource / 4;
10580 } else if (EltNo != ByteSource / 4) {
10581 isFourElementShuffle = false;
10582 break;
10583 }
10584 }
10585 PFIndexes[i] = EltNo;
10586 }
10587
10588 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10589 // perfect shuffle vector to determine if it is cost effective to do this as
10590 // discrete instructions, or whether we should use a vperm.
10591 // For now, we skip this for little endian until such time as we have a
10592 // little-endian perfect shuffle table.
10593 if (isFourElementShuffle) {
10594 // Compute the index in the perfect shuffle table.
10595 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10596 PFIndexes[2] * 9 + PFIndexes[3];
10597
10598 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10599 unsigned Cost = (PFEntry >> 30);
10600
10601 // Determining when to avoid vperm is tricky. Many things affect the cost
10602 // of vperm, particularly how many times the perm mask needs to be
10603 // computed. For example, if the perm mask can be hoisted out of a loop or
10604 // is already used (perhaps because there are multiple permutes with the
10605 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10606 // permute mask out of the loop requires an extra register.
10607 //
10608 // As a compromise, we only emit discrete instructions if the shuffle can
10609 // be generated in 3 or fewer operations. When we have loop information
10610 // available, if this block is within a loop, we should avoid using vperm
10611 // for 3-operation perms and use a constant pool load instead.
10612 if (Cost < 3)
10613 return GeneratePerfectShuffle(PFEntry, LHS: V1, RHS: V2, DAG, dl);
10614 }
10615 }
10616
10617 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10618 // vector that will get spilled to the constant pool.
10619 if (V2.isUndef()) V2 = V1;
10620
10621 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10622}
10623
10624SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10625 ArrayRef<int> PermMask, EVT VT,
10626 SDValue V1, SDValue V2) const {
10627 unsigned Opcode = PPCISD::VPERM;
10628 EVT ValType = V1.getValueType();
10629 SDLoc dl(Op);
10630 bool NeedSwap = false;
10631 bool isLittleEndian = Subtarget.isLittleEndian();
10632 bool isPPC64 = Subtarget.isPPC64();
10633
10634 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10635 (V1->hasOneUse() || V2->hasOneUse())) {
10636 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10637 "XXPERM instead\n");
10638 Opcode = PPCISD::XXPERM;
10639
10640 // The second input to XXPERM is also an output so if the second input has
10641 // multiple uses then copying is necessary, as a result we want the
10642 // single-use operand to be used as the second input to prevent copying.
10643 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10644 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10645 std::swap(a&: V1, b&: V2);
10646 NeedSwap = !NeedSwap;
10647 }
10648 }
10649
10650 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10651 // that it is in input element units, not in bytes. Convert now.
10652
10653 // For little endian, the order of the input vectors is reversed, and
10654 // the permutation mask is complemented with respect to 31. This is
10655 // necessary to produce proper semantics with the big-endian-based vperm
10656 // instruction.
10657 EVT EltVT = V1.getValueType().getVectorElementType();
10658 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10659
10660 bool V1HasXXSWAPD = V1->getOperand(Num: 0)->getOpcode() == PPCISD::XXSWAPD;
10661 bool V2HasXXSWAPD = V2->getOperand(Num: 0)->getOpcode() == PPCISD::XXSWAPD;
10662
10663 /*
10664 Vectors will be appended like so: [ V1 | v2 ]
10665 XXSWAPD on V1:
10666 [ A | B | C | D ] -> [ C | D | A | B ]
10667 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10668 i.e. index of A, B += 8, and index of C, D -= 8.
10669 XXSWAPD on V2:
10670 [ E | F | G | H ] -> [ G | H | E | F ]
10671 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10672 i.e. index of E, F += 8, index of G, H -= 8
10673 Swap V1 and V2:
10674 [ V1 | V2 ] -> [ V2 | V1 ]
10675 0-15 16-31 0-15 16-31
10676 i.e. index of V1 += 16, index of V2 -= 16
10677 */
10678
10679 SmallVector<SDValue, 16> ResultMask;
10680 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10681 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10682
10683 if (V1HasXXSWAPD) {
10684 if (SrcElt < 8)
10685 SrcElt += 8;
10686 else if (SrcElt < 16)
10687 SrcElt -= 8;
10688 }
10689 if (V2HasXXSWAPD) {
10690 if (SrcElt > 23)
10691 SrcElt -= 8;
10692 else if (SrcElt > 15)
10693 SrcElt += 8;
10694 }
10695 if (NeedSwap) {
10696 if (SrcElt < 16)
10697 SrcElt += 16;
10698 else
10699 SrcElt -= 16;
10700 }
10701 for (unsigned j = 0; j != BytesPerElement; ++j)
10702 if (isLittleEndian)
10703 ResultMask.push_back(
10704 Elt: DAG.getConstant(Val: 31 - (SrcElt * BytesPerElement + j), DL: dl, VT: MVT::i32));
10705 else
10706 ResultMask.push_back(
10707 Elt: DAG.getConstant(Val: SrcElt * BytesPerElement + j, DL: dl, VT: MVT::i32));
10708 }
10709
10710 if (V1HasXXSWAPD) {
10711 dl = SDLoc(V1->getOperand(Num: 0));
10712 V1 = V1->getOperand(Num: 0)->getOperand(Num: 1);
10713 }
10714 if (V2HasXXSWAPD) {
10715 dl = SDLoc(V2->getOperand(Num: 0));
10716 V2 = V2->getOperand(Num: 0)->getOperand(Num: 1);
10717 }
10718
10719 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10720 if (ValType != MVT::v2f64)
10721 V1 = DAG.getBitcast(VT: MVT::v2f64, V: V1);
10722 if (V2.getValueType() != MVT::v2f64)
10723 V2 = DAG.getBitcast(VT: MVT::v2f64, V: V2);
10724 }
10725
10726 ShufflesHandledWithVPERM++;
10727 SDValue VPermMask = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops: ResultMask);
10728 LLVM_DEBUG({
10729 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10730 if (Opcode == PPCISD::XXPERM) {
10731 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10732 } else {
10733 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10734 }
10735 SVOp->dump();
10736 dbgs() << "With the following permute control vector:\n";
10737 VPermMask.dump();
10738 });
10739
10740 if (Opcode == PPCISD::XXPERM)
10741 VPermMask = DAG.getBitcast(VT: MVT::v4i32, V: VPermMask);
10742
10743 // Only need to place items backwards in LE,
10744 // the mask was properly calculated.
10745 if (isLittleEndian)
10746 std::swap(a&: V1, b&: V2);
10747
10748 SDValue VPERMNode =
10749 DAG.getNode(Opcode, DL: dl, VT: V1.getValueType(), N1: V1, N2: V2, N3: VPermMask);
10750
10751 VPERMNode = DAG.getBitcast(VT: ValType, V: VPERMNode);
10752 return VPERMNode;
10753}
10754
10755/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10756/// vector comparison. If it is, return true and fill in Opc/isDot with
10757/// information about the intrinsic.
10758static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10759 bool &isDot, const PPCSubtarget &Subtarget) {
10760 unsigned IntrinsicID = Intrin.getConstantOperandVal(i: 0);
10761 CompareOpc = -1;
10762 isDot = false;
10763 switch (IntrinsicID) {
10764 default:
10765 return false;
10766 // Comparison predicates.
10767 case Intrinsic::ppc_altivec_vcmpbfp_p:
10768 CompareOpc = 966;
10769 isDot = true;
10770 break;
10771 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10772 CompareOpc = 198;
10773 isDot = true;
10774 break;
10775 case Intrinsic::ppc_altivec_vcmpequb_p:
10776 CompareOpc = 6;
10777 isDot = true;
10778 break;
10779 case Intrinsic::ppc_altivec_vcmpequh_p:
10780 CompareOpc = 70;
10781 isDot = true;
10782 break;
10783 case Intrinsic::ppc_altivec_vcmpequw_p:
10784 CompareOpc = 134;
10785 isDot = true;
10786 break;
10787 case Intrinsic::ppc_altivec_vcmpequd_p:
10788 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10789 CompareOpc = 199;
10790 isDot = true;
10791 } else
10792 return false;
10793 break;
10794 case Intrinsic::ppc_altivec_vcmpneb_p:
10795 case Intrinsic::ppc_altivec_vcmpneh_p:
10796 case Intrinsic::ppc_altivec_vcmpnew_p:
10797 case Intrinsic::ppc_altivec_vcmpnezb_p:
10798 case Intrinsic::ppc_altivec_vcmpnezh_p:
10799 case Intrinsic::ppc_altivec_vcmpnezw_p:
10800 if (Subtarget.hasP9Altivec()) {
10801 switch (IntrinsicID) {
10802 default:
10803 llvm_unreachable("Unknown comparison intrinsic.");
10804 case Intrinsic::ppc_altivec_vcmpneb_p:
10805 CompareOpc = 7;
10806 break;
10807 case Intrinsic::ppc_altivec_vcmpneh_p:
10808 CompareOpc = 71;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpnew_p:
10811 CompareOpc = 135;
10812 break;
10813 case Intrinsic::ppc_altivec_vcmpnezb_p:
10814 CompareOpc = 263;
10815 break;
10816 case Intrinsic::ppc_altivec_vcmpnezh_p:
10817 CompareOpc = 327;
10818 break;
10819 case Intrinsic::ppc_altivec_vcmpnezw_p:
10820 CompareOpc = 391;
10821 break;
10822 }
10823 isDot = true;
10824 } else
10825 return false;
10826 break;
10827 case Intrinsic::ppc_altivec_vcmpgefp_p:
10828 CompareOpc = 454;
10829 isDot = true;
10830 break;
10831 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10832 CompareOpc = 710;
10833 isDot = true;
10834 break;
10835 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10836 CompareOpc = 774;
10837 isDot = true;
10838 break;
10839 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10840 CompareOpc = 838;
10841 isDot = true;
10842 break;
10843 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10844 CompareOpc = 902;
10845 isDot = true;
10846 break;
10847 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10848 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10849 CompareOpc = 967;
10850 isDot = true;
10851 } else
10852 return false;
10853 break;
10854 case Intrinsic::ppc_altivec_vcmpgtub_p:
10855 CompareOpc = 518;
10856 isDot = true;
10857 break;
10858 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10859 CompareOpc = 582;
10860 isDot = true;
10861 break;
10862 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10863 CompareOpc = 646;
10864 isDot = true;
10865 break;
10866 case Intrinsic::ppc_altivec_vcmpgtud_p:
10867 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10868 CompareOpc = 711;
10869 isDot = true;
10870 } else
10871 return false;
10872 break;
10873
10874 case Intrinsic::ppc_altivec_vcmpequq:
10875 case Intrinsic::ppc_altivec_vcmpgtsq:
10876 case Intrinsic::ppc_altivec_vcmpgtuq:
10877 if (!Subtarget.isISA3_1())
10878 return false;
10879 switch (IntrinsicID) {
10880 default:
10881 llvm_unreachable("Unknown comparison intrinsic.");
10882 case Intrinsic::ppc_altivec_vcmpequq:
10883 CompareOpc = 455;
10884 break;
10885 case Intrinsic::ppc_altivec_vcmpgtsq:
10886 CompareOpc = 903;
10887 break;
10888 case Intrinsic::ppc_altivec_vcmpgtuq:
10889 CompareOpc = 647;
10890 break;
10891 }
10892 break;
10893
10894 // VSX predicate comparisons use the same infrastructure
10895 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10896 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10897 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10898 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10899 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10900 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10901 if (Subtarget.hasVSX()) {
10902 switch (IntrinsicID) {
10903 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10904 CompareOpc = 99;
10905 break;
10906 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10907 CompareOpc = 115;
10908 break;
10909 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10910 CompareOpc = 107;
10911 break;
10912 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10913 CompareOpc = 67;
10914 break;
10915 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10916 CompareOpc = 83;
10917 break;
10918 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10919 CompareOpc = 75;
10920 break;
10921 }
10922 isDot = true;
10923 } else
10924 return false;
10925 break;
10926
10927 // Normal Comparisons.
10928 case Intrinsic::ppc_altivec_vcmpbfp:
10929 CompareOpc = 966;
10930 break;
10931 case Intrinsic::ppc_altivec_vcmpeqfp:
10932 CompareOpc = 198;
10933 break;
10934 case Intrinsic::ppc_altivec_vcmpequb:
10935 CompareOpc = 6;
10936 break;
10937 case Intrinsic::ppc_altivec_vcmpequh:
10938 CompareOpc = 70;
10939 break;
10940 case Intrinsic::ppc_altivec_vcmpequw:
10941 CompareOpc = 134;
10942 break;
10943 case Intrinsic::ppc_altivec_vcmpequd:
10944 if (Subtarget.hasP8Altivec())
10945 CompareOpc = 199;
10946 else
10947 return false;
10948 break;
10949 case Intrinsic::ppc_altivec_vcmpneb:
10950 case Intrinsic::ppc_altivec_vcmpneh:
10951 case Intrinsic::ppc_altivec_vcmpnew:
10952 case Intrinsic::ppc_altivec_vcmpnezb:
10953 case Intrinsic::ppc_altivec_vcmpnezh:
10954 case Intrinsic::ppc_altivec_vcmpnezw:
10955 if (Subtarget.hasP9Altivec())
10956 switch (IntrinsicID) {
10957 default:
10958 llvm_unreachable("Unknown comparison intrinsic.");
10959 case Intrinsic::ppc_altivec_vcmpneb:
10960 CompareOpc = 7;
10961 break;
10962 case Intrinsic::ppc_altivec_vcmpneh:
10963 CompareOpc = 71;
10964 break;
10965 case Intrinsic::ppc_altivec_vcmpnew:
10966 CompareOpc = 135;
10967 break;
10968 case Intrinsic::ppc_altivec_vcmpnezb:
10969 CompareOpc = 263;
10970 break;
10971 case Intrinsic::ppc_altivec_vcmpnezh:
10972 CompareOpc = 327;
10973 break;
10974 case Intrinsic::ppc_altivec_vcmpnezw:
10975 CompareOpc = 391;
10976 break;
10977 }
10978 else
10979 return false;
10980 break;
10981 case Intrinsic::ppc_altivec_vcmpgefp:
10982 CompareOpc = 454;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtfp:
10985 CompareOpc = 710;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtsb:
10988 CompareOpc = 774;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtsh:
10991 CompareOpc = 838;
10992 break;
10993 case Intrinsic::ppc_altivec_vcmpgtsw:
10994 CompareOpc = 902;
10995 break;
10996 case Intrinsic::ppc_altivec_vcmpgtsd:
10997 if (Subtarget.hasP8Altivec())
10998 CompareOpc = 967;
10999 else
11000 return false;
11001 break;
11002 case Intrinsic::ppc_altivec_vcmpgtub:
11003 CompareOpc = 518;
11004 break;
11005 case Intrinsic::ppc_altivec_vcmpgtuh:
11006 CompareOpc = 582;
11007 break;
11008 case Intrinsic::ppc_altivec_vcmpgtuw:
11009 CompareOpc = 646;
11010 break;
11011 case Intrinsic::ppc_altivec_vcmpgtud:
11012 if (Subtarget.hasP8Altivec())
11013 CompareOpc = 711;
11014 else
11015 return false;
11016 break;
11017 case Intrinsic::ppc_altivec_vcmpequq_p:
11018 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11019 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11020 if (!Subtarget.isISA3_1())
11021 return false;
11022 switch (IntrinsicID) {
11023 default:
11024 llvm_unreachable("Unknown comparison intrinsic.");
11025 case Intrinsic::ppc_altivec_vcmpequq_p:
11026 CompareOpc = 455;
11027 break;
11028 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11029 CompareOpc = 903;
11030 break;
11031 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11032 CompareOpc = 647;
11033 break;
11034 }
11035 isDot = true;
11036 break;
11037 }
11038 return true;
11039}
11040
11041/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11042/// lower, do it, otherwise return null.
11043SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11044 SelectionDAG &DAG) const {
11045 unsigned IntrinsicID = Op.getConstantOperandVal(i: 0);
11046
11047 SDLoc dl(Op);
11048
11049 switch (IntrinsicID) {
11050 case Intrinsic::thread_pointer:
11051 // Reads the thread pointer register, used for __builtin_thread_pointer.
11052 if (Subtarget.isPPC64())
11053 return DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
11054 return DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
11055
11056 case Intrinsic::ppc_rldimi: {
11057 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11058 SDValue Src = Op.getOperand(i: 1);
11059 APInt Mask = Op.getConstantOperandAPInt(i: 4);
11060 if (Mask.isZero())
11061 return Op.getOperand(i: 2);
11062 if (Mask.isAllOnes())
11063 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src, N2: Op.getOperand(i: 3));
11064 uint64_t SH = Op.getConstantOperandVal(i: 3);
11065 unsigned MB = 0, ME = 0;
11066 if (!isRunOfOnes64(Val: Mask.getZExtValue(), MB, ME))
11067 report_fatal_error(reason: "invalid rldimi mask!");
11068 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11069 if (ME < 63 - SH) {
11070 Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11071 N2: DAG.getConstant(Val: ME + SH + 1, DL: dl, VT: MVT::i32));
11072 } else if (ME > 63 - SH) {
11073 Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11074 N2: DAG.getConstant(Val: ME + SH - 63, DL: dl, VT: MVT::i32));
11075 }
11076 return SDValue(
11077 DAG.getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64,
11078 Ops: {Op.getOperand(i: 2), Src,
11079 DAG.getTargetConstant(Val: 63 - ME, DL: dl, VT: MVT::i32),
11080 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32)}),
11081 0);
11082 }
11083
11084 case Intrinsic::ppc_rlwimi: {
11085 APInt Mask = Op.getConstantOperandAPInt(i: 4);
11086 if (Mask.isZero())
11087 return Op.getOperand(i: 2);
11088 if (Mask.isAllOnes())
11089 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i32, N1: Op.getOperand(i: 1),
11090 N2: Op.getOperand(i: 3));
11091 unsigned MB = 0, ME = 0;
11092 if (!isRunOfOnes(Val: Mask.getZExtValue(), MB, ME))
11093 report_fatal_error(reason: "invalid rlwimi mask!");
11094 return SDValue(DAG.getMachineNode(
11095 Opcode: PPC::RLWIMI, dl, VT: MVT::i32,
11096 Ops: {Op.getOperand(i: 2), Op.getOperand(i: 1), Op.getOperand(i: 3),
11097 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11098 DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11099 0);
11100 }
11101
11102 case Intrinsic::ppc_rlwnm: {
11103 if (Op.getConstantOperandVal(i: 3) == 0)
11104 return DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32);
11105 unsigned MB = 0, ME = 0;
11106 if (!isRunOfOnes(Val: Op.getConstantOperandVal(i: 3), MB, ME))
11107 report_fatal_error(reason: "invalid rlwnm mask!");
11108 return SDValue(
11109 DAG.getMachineNode(Opcode: PPC::RLWNM, dl, VT: MVT::i32,
11110 Ops: {Op.getOperand(i: 1), Op.getOperand(i: 2),
11111 DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11112 DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11113 0);
11114 }
11115
11116 case Intrinsic::ppc_mma_disassemble_acc: {
11117 if (Subtarget.isISAFuture()) {
11118 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11119 SDValue WideVec =
11120 SDValue(DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes,
11121 Ops: Op.getOperand(i: 1)),
11122 0);
11123 SmallVector<SDValue, 4> RetOps;
11124 SDValue Value = SDValue(WideVec.getNode(), 0);
11125 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11126
11127 SDValue Extract;
11128 Extract = DAG.getNode(
11129 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11130 N1: Subtarget.isLittleEndian() ? Value2 : Value,
11131 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 1 : 0,
11132 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11133 RetOps.push_back(Elt: Extract);
11134 Extract = DAG.getNode(
11135 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11136 N1: Subtarget.isLittleEndian() ? Value2 : Value,
11137 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 0 : 1,
11138 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11139 RetOps.push_back(Elt: Extract);
11140 Extract = DAG.getNode(
11141 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11142 N1: Subtarget.isLittleEndian() ? Value : Value2,
11143 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 1 : 0,
11144 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11145 RetOps.push_back(Elt: Extract);
11146 Extract = DAG.getNode(
11147 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11148 N1: Subtarget.isLittleEndian() ? Value : Value2,
11149 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? 0 : 1,
11150 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11151 RetOps.push_back(Elt: Extract);
11152 return DAG.getMergeValues(Ops: RetOps, dl);
11153 }
11154 [[fallthrough]];
11155 }
11156 case Intrinsic::ppc_vsx_disassemble_pair: {
11157 int NumVecs = 2;
11158 SDValue WideVec = Op.getOperand(i: 1);
11159 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11160 NumVecs = 4;
11161 WideVec = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: WideVec);
11162 }
11163 SmallVector<SDValue, 4> RetOps;
11164 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11165 SDValue Extract = DAG.getNode(
11166 Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: WideVec,
11167 N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11168 : VecNo,
11169 DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11170 RetOps.push_back(Elt: Extract);
11171 }
11172 return DAG.getMergeValues(Ops: RetOps, dl);
11173 }
11174
11175 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11176 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11177 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11178 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11179 "Specify P of 0 or 1 for lower or upper 512 bytes");
11180 unsigned HiLo = Idx->getSExtValue();
11181 unsigned Opcode;
11182 unsigned Subx;
11183 if (HiLo == 0) {
11184 Opcode = PPC::DMXXEXTFDMR512;
11185 Subx = PPC::sub_wacc_lo;
11186 } else {
11187 Opcode = PPC::DMXXEXTFDMR512_HI;
11188 Subx = PPC::sub_wacc_hi;
11189 }
11190 SDValue Subreg(
11191 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
11192 Op1: Op.getOperand(i: 1),
11193 Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11194 0);
11195 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11196 return SDValue(DAG.getMachineNode(Opcode, dl, ResultTys: ReturnTypes, Ops: Subreg), 0);
11197 }
11198
11199 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11200 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11201 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11202 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11203 "Specify a dmr row pair 0-3");
11204 unsigned IdxVal = Idx->getSExtValue();
11205 unsigned Subx;
11206 switch (IdxVal) {
11207 case 0:
11208 Subx = PPC::sub_dmrrowp0;
11209 break;
11210 case 1:
11211 Subx = PPC::sub_dmrrowp1;
11212 break;
11213 case 2:
11214 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11215 break;
11216 case 3:
11217 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11218 break;
11219 }
11220 SDValue Subreg(
11221 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v256i1,
11222 Op1: Op.getOperand(i: 1),
11223 Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11224 0);
11225 SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11226 return SDValue(
11227 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR256, dl, VT: MVT::v256i1, Ops: {Subreg, P}),
11228 0);
11229 }
11230
11231 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11232 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11233 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 4));
11234 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11235 "Specify P of 0 or 1 for lower or upper 512 bytes");
11236 unsigned HiLo = Idx->getSExtValue();
11237 unsigned Opcode;
11238 unsigned Subx;
11239 if (HiLo == 0) {
11240 Opcode = PPC::DMXXINSTDMR512;
11241 Subx = PPC::sub_wacc_lo;
11242 } else {
11243 Opcode = PPC::DMXXINSTDMR512_HI;
11244 Subx = PPC::sub_wacc_hi;
11245 }
11246 SDValue Ops[] = {Op.getOperand(i: 2), Op.getOperand(i: 3)};
11247 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, VT: MVT::v512i1, Ops), 0);
11248 SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11249 return SDValue(DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11250 Op1: Op.getOperand(i: 1), Op2: Wacc, Op3: SubReg),
11251 0);
11252 }
11253
11254 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11255 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11256 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 3));
11257 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11258 "Specify a dmr row pair 0-3");
11259 unsigned IdxVal = Idx->getSExtValue();
11260 unsigned Subx;
11261 switch (IdxVal) {
11262 case 0:
11263 Subx = PPC::sub_dmrrowp0;
11264 break;
11265 case 1:
11266 Subx = PPC::sub_dmrrowp1;
11267 break;
11268 case 2:
11269 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11270 break;
11271 case 3:
11272 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11273 break;
11274 }
11275 SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11276 SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11277 SDValue Ops[] = {Op.getOperand(i: 2), P};
11278 SDValue DMRRowp = SDValue(
11279 DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR256, dl, VT: MVT::v256i1, Ops), 0);
11280 return SDValue(DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11281 Op1: Op.getOperand(i: 1), Op2: DMRRowp, Op3: SubReg),
11282 0);
11283 }
11284
11285 case Intrinsic::ppc_mma_xxmfacc:
11286 case Intrinsic::ppc_mma_xxmtacc: {
11287 // Allow pre-isa-future subtargets to lower as normal.
11288 if (!Subtarget.isISAFuture())
11289 return SDValue();
11290 // The intrinsics for xxmtacc and xxmfacc take one argument of
11291 // type v512i1, for future cpu the corresponding wacc instruction
11292 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11293 // the need to produce the xxm[t|f]acc.
11294 SDValue WideVec = Op.getOperand(i: 1);
11295 DAG.ReplaceAllUsesWith(From: Op, To: WideVec);
11296 return SDValue();
11297 }
11298
11299 case Intrinsic::ppc_unpack_longdouble: {
11300 auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11301 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11302 "Argument of long double unpack must be 0 or 1!");
11303 return DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: dl, VT: MVT::f64, N1: Op.getOperand(i: 1),
11304 N2: DAG.getConstant(Val: !!(Idx->getSExtValue()), DL: dl,
11305 VT: Idx->getValueType(ResNo: 0)));
11306 }
11307
11308 case Intrinsic::ppc_compare_exp_lt:
11309 case Intrinsic::ppc_compare_exp_gt:
11310 case Intrinsic::ppc_compare_exp_eq:
11311 case Intrinsic::ppc_compare_exp_uo: {
11312 unsigned Pred;
11313 switch (IntrinsicID) {
11314 case Intrinsic::ppc_compare_exp_lt:
11315 Pred = PPC::PRED_LT;
11316 break;
11317 case Intrinsic::ppc_compare_exp_gt:
11318 Pred = PPC::PRED_GT;
11319 break;
11320 case Intrinsic::ppc_compare_exp_eq:
11321 Pred = PPC::PRED_EQ;
11322 break;
11323 case Intrinsic::ppc_compare_exp_uo:
11324 Pred = PPC::PRED_UN;
11325 break;
11326 }
11327 return SDValue(
11328 DAG.getMachineNode(
11329 Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11330 Ops: {SDValue(DAG.getMachineNode(Opcode: PPC::XSCMPEXPDP, dl, VT: MVT::i32,
11331 Op1: Op.getOperand(i: 1), Op2: Op.getOperand(i: 2)),
11332 0),
11333 DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32), DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
11334 DAG.getTargetConstant(Val: Pred, DL: dl, VT: MVT::i32)}),
11335 0);
11336 }
11337 case Intrinsic::ppc_test_data_class: {
11338 EVT OpVT = Op.getOperand(i: 1).getValueType();
11339 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11340 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11341 : PPC::XSTSTDCSP);
11342 return SDValue(
11343 DAG.getMachineNode(
11344 Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11345 Ops: {SDValue(DAG.getMachineNode(Opcode: CmprOpc, dl, VT: MVT::i32, Op1: Op.getOperand(i: 2),
11346 Op2: Op.getOperand(i: 1)),
11347 0),
11348 DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32), DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
11349 DAG.getTargetConstant(Val: PPC::PRED_EQ, DL: dl, VT: MVT::i32)}),
11350 0);
11351 }
11352 case Intrinsic::ppc_fnmsub: {
11353 EVT VT = Op.getOperand(i: 1).getValueType();
11354 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11355 return DAG.getNode(
11356 Opcode: ISD::FNEG, DL: dl, VT,
11357 Operand: DAG.getNode(Opcode: ISD::FMA, DL: dl, VT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
11358 N3: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op.getOperand(i: 3))));
11359 return DAG.getNode(Opcode: PPCISD::FNMSUB, DL: dl, VT, N1: Op.getOperand(i: 1),
11360 N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
11361 }
11362 case Intrinsic::ppc_convert_f128_to_ppcf128:
11363 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11364 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11365 ? RTLIB::CONVERT_PPCF128_F128
11366 : RTLIB::CONVERT_F128_PPCF128;
11367 MakeLibCallOptions CallOptions;
11368 std::pair<SDValue, SDValue> Result =
11369 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op.getOperand(i: 1), CallOptions,
11370 dl, Chain: SDValue());
11371 return Result.first;
11372 }
11373 case Intrinsic::ppc_maxfe:
11374 case Intrinsic::ppc_maxfl:
11375 case Intrinsic::ppc_maxfs:
11376 case Intrinsic::ppc_minfe:
11377 case Intrinsic::ppc_minfl:
11378 case Intrinsic::ppc_minfs: {
11379 EVT VT = Op.getValueType();
11380 assert(
11381 all_of(Op->ops().drop_front(4),
11382 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11383 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11384 (void)VT;
11385 ISD::CondCode CC = ISD::SETGT;
11386 if (IntrinsicID == Intrinsic::ppc_minfe ||
11387 IntrinsicID == Intrinsic::ppc_minfl ||
11388 IntrinsicID == Intrinsic::ppc_minfs)
11389 CC = ISD::SETLT;
11390 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11391 SDValue Res = Op.getOperand(i: I);
11392 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11393 Res =
11394 DAG.getSelectCC(DL: dl, LHS: Res, RHS: Op.getOperand(i: I), True: Res, False: Op.getOperand(i: I), Cond: CC);
11395 }
11396 return Res;
11397 }
11398 }
11399
11400 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11401 // opcode number of the comparison.
11402 int CompareOpc;
11403 bool isDot;
11404 if (!getVectorCompareInfo(Intrin: Op, CompareOpc, isDot, Subtarget))
11405 return SDValue(); // Don't custom lower most intrinsics.
11406
11407 // If this is a non-dot comparison, make the VCMP node and we are done.
11408 if (!isDot) {
11409 SDValue Tmp = DAG.getNode(Opcode: PPCISD::VCMP, DL: dl, VT: Op.getOperand(i: 2).getValueType(),
11410 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2),
11411 N3: DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32));
11412 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Tmp);
11413 }
11414
11415 // Create the PPCISD altivec 'dot' comparison node.
11416 SDValue Ops[] = {
11417 Op.getOperand(i: 2), // LHS
11418 Op.getOperand(i: 3), // RHS
11419 DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
11420 };
11421 EVT VTs[] = { Op.getOperand(i: 2).getValueType(), MVT::Glue };
11422 SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
11423
11424 // Unpack the result based on how the target uses it.
11425 unsigned BitNo; // Bit # of CR6.
11426 bool InvertBit; // Invert result?
11427 unsigned Bitx;
11428 unsigned SetOp;
11429 switch (Op.getConstantOperandVal(i: 1)) {
11430 default: // Can't happen, don't crash on invalid number though.
11431 case 0: // Return the value of the EQ bit of CR6.
11432 BitNo = 0;
11433 InvertBit = false;
11434 Bitx = PPC::sub_eq;
11435 SetOp = PPCISD::SETBC;
11436 break;
11437 case 1: // Return the inverted value of the EQ bit of CR6.
11438 BitNo = 0;
11439 InvertBit = true;
11440 Bitx = PPC::sub_eq;
11441 SetOp = PPCISD::SETBCR;
11442 break;
11443 case 2: // Return the value of the LT bit of CR6.
11444 BitNo = 2;
11445 InvertBit = false;
11446 Bitx = PPC::sub_lt;
11447 SetOp = PPCISD::SETBC;
11448 break;
11449 case 3: // Return the inverted value of the LT bit of CR6.
11450 BitNo = 2;
11451 InvertBit = true;
11452 Bitx = PPC::sub_lt;
11453 SetOp = PPCISD::SETBCR;
11454 break;
11455 }
11456
11457 SDValue GlueOp = CompNode.getValue(R: 1);
11458 if (Subtarget.isISA3_1()) {
11459 SDValue SubRegIdx = DAG.getTargetConstant(Val: Bitx, DL: dl, VT: MVT::i32);
11460 SDValue CR6Reg = DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32);
11461 SDValue CRBit =
11462 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11463 Op1: CR6Reg, Op2: SubRegIdx, Op3: GlueOp),
11464 0);
11465 return DAG.getNode(Opcode: SetOp, DL: dl, VT: MVT::i32, Operand: CRBit);
11466 }
11467
11468 // Now that we have the comparison, emit a copy from the CR to a GPR.
11469 // This is flagged to the above dot comparison.
11470 SDValue Flags = DAG.getNode(Opcode: PPCISD::MFOCRF, DL: dl, VT: MVT::i32,
11471 N1: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32), N2: GlueOp);
11472
11473 // Shift the bit into the low position.
11474 Flags = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: Flags,
11475 N2: DAG.getConstant(Val: 8 - (3 - BitNo), DL: dl, VT: MVT::i32));
11476 // Isolate the bit.
11477 Flags = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Flags,
11478 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11479
11480 // If we are supposed to, toggle the bit.
11481 if (InvertBit)
11482 Flags = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: Flags,
11483 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11484 return Flags;
11485}
11486
11487SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11488 SelectionDAG &DAG) const {
11489 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11490 // the beginning of the argument list.
11491 int ArgStart = isa<ConstantSDNode>(Val: Op.getOperand(i: 0)) ? 0 : 1;
11492 SDLoc DL(Op);
11493 switch (Op.getConstantOperandVal(i: ArgStart)) {
11494 case Intrinsic::ppc_cfence: {
11495 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11496 SDValue Val = Op.getOperand(i: ArgStart + 1);
11497 EVT Ty = Val.getValueType();
11498 if (Ty == MVT::i128) {
11499 // FIXME: Testing one of two paired registers is sufficient to guarantee
11500 // ordering?
11501 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: Val);
11502 }
11503 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11504 return SDValue(
11505 DAG.getMachineNode(
11506 Opcode, dl: DL, VT: MVT::Other,
11507 Op1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getScalarIntVT(), Operand: Val),
11508 Op2: Op.getOperand(i: 0)),
11509 0);
11510 }
11511 default:
11512 break;
11513 }
11514 return SDValue();
11515}
11516
11517// Lower scalar BSWAP64 to xxbrd.
11518SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11519 SDLoc dl(Op);
11520 if (!Subtarget.isPPC64())
11521 return Op;
11522 // MTVSRDD
11523 Op = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: MVT::v2i64, N1: Op.getOperand(i: 0),
11524 N2: Op.getOperand(i: 0));
11525 // XXBRD
11526 Op = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Op);
11527 // MFVSRD
11528 int VectorIndex = 0;
11529 if (Subtarget.isLittleEndian())
11530 VectorIndex = 1;
11531 Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Op,
11532 N2: DAG.getTargetConstant(Val: VectorIndex, DL: dl, VT: MVT::i32));
11533 return Op;
11534}
11535
11536// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11537// compared to a value that is atomically loaded (atomic loads zero-extend).
11538SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11539 SelectionDAG &DAG) const {
11540 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11541 "Expecting an atomic compare-and-swap here.");
11542 SDLoc dl(Op);
11543 auto *AtomicNode = cast<AtomicSDNode>(Val: Op.getNode());
11544 EVT MemVT = AtomicNode->getMemoryVT();
11545 if (MemVT.getSizeInBits() >= 32)
11546 return Op;
11547
11548 SDValue CmpOp = Op.getOperand(i: 2);
11549 // If this is already correctly zero-extended, leave it alone.
11550 auto HighBits = APInt::getHighBitsSet(numBits: 32, hiBitsSet: 32 - MemVT.getSizeInBits());
11551 if (DAG.MaskedValueIsZero(Op: CmpOp, Mask: HighBits))
11552 return Op;
11553
11554 // Clear the high bits of the compare operand.
11555 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11556 SDValue NewCmpOp =
11557 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: CmpOp,
11558 N2: DAG.getConstant(Val: MaskVal, DL: dl, VT: MVT::i32));
11559
11560 // Replace the existing compare operand with the properly zero-extended one.
11561 SmallVector<SDValue, 4> Ops;
11562 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11563 Ops.push_back(Elt: AtomicNode->getOperand(Num: i));
11564 Ops[2] = NewCmpOp;
11565 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11566 SDVTList Tys = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
11567 auto NodeTy =
11568 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11569 return DAG.getMemIntrinsicNode(Opcode: NodeTy, dl, VTList: Tys, Ops, MemVT, MMO);
11570}
11571
11572SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11573 SelectionDAG &DAG) const {
11574 AtomicSDNode *N = cast<AtomicSDNode>(Val: Op.getNode());
11575 EVT MemVT = N->getMemoryVT();
11576 assert(MemVT.getSimpleVT() == MVT::i128 &&
11577 "Expect quadword atomic operations");
11578 SDLoc dl(N);
11579 unsigned Opc = N->getOpcode();
11580 switch (Opc) {
11581 case ISD::ATOMIC_LOAD: {
11582 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11583 // lowered to ppc instructions by pattern matching instruction selector.
11584 SDVTList Tys = DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::Other);
11585 SmallVector<SDValue, 4> Ops{
11586 N->getOperand(Num: 0),
11587 DAG.getConstant(Val: Intrinsic::ppc_atomic_load_i128, DL: dl, VT: MVT::i32)};
11588 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11589 Ops.push_back(Elt: N->getOperand(Num: I));
11590 SDValue LoadedVal = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys,
11591 Ops, MemVT, MMO: N->getMemOperand());
11592 SDValue ValLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal);
11593 SDValue ValHi =
11594 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal.getValue(R: 1));
11595 ValHi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ValHi,
11596 N2: DAG.getConstant(Val: 64, DL: dl, VT: MVT::i32));
11597 SDValue Val =
11598 DAG.getNode(Opcode: ISD::OR, DL: dl, ResultTys: {MVT::i128, MVT::Other}, Ops: {ValLo, ValHi});
11599 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, ResultTys: {MVT::i128, MVT::Other},
11600 Ops: {Val, LoadedVal.getValue(R: 2)});
11601 }
11602 case ISD::ATOMIC_STORE: {
11603 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11604 // lowered to ppc instructions by pattern matching instruction selector.
11605 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11606 SmallVector<SDValue, 4> Ops{
11607 N->getOperand(Num: 0),
11608 DAG.getConstant(Val: Intrinsic::ppc_atomic_store_i128, DL: dl, VT: MVT::i32)};
11609 SDValue Val = N->getOperand(Num: 1);
11610 SDValue ValLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: Val);
11611 SDValue ValHi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: Val,
11612 N2: DAG.getConstant(Val: 64, DL: dl, VT: MVT::i32));
11613 ValHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: ValHi);
11614 Ops.push_back(Elt: ValLo);
11615 Ops.push_back(Elt: ValHi);
11616 Ops.push_back(Elt: N->getOperand(Num: 2));
11617 return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops, MemVT,
11618 MMO: N->getMemOperand());
11619 }
11620 default:
11621 llvm_unreachable("Unexpected atomic opcode");
11622 }
11623}
11624
11625static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11626 SelectionDAG &DAG,
11627 const PPCSubtarget &Subtarget) {
11628 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11629
11630 enum DataClassMask {
11631 DC_NAN = 1 << 6,
11632 DC_NEG_INF = 1 << 4,
11633 DC_POS_INF = 1 << 5,
11634 DC_NEG_ZERO = 1 << 2,
11635 DC_POS_ZERO = 1 << 3,
11636 DC_NEG_SUBNORM = 1,
11637 DC_POS_SUBNORM = 1 << 1,
11638 };
11639
11640 EVT VT = Op.getValueType();
11641
11642 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11643 : VT == MVT::f64 ? PPC::XSTSTDCDP
11644 : PPC::XSTSTDCSP;
11645
11646 if (Mask == fcAllFlags)
11647 return DAG.getBoolConstant(V: true, DL: Dl, VT: MVT::i1, OpVT: VT);
11648 if (Mask == 0)
11649 return DAG.getBoolConstant(V: false, DL: Dl, VT: MVT::i1, OpVT: VT);
11650
11651 // When it's cheaper or necessary to test reverse flags.
11652 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11653 SDValue Rev = getDataClassTest(Op, Mask: ~Mask, Dl, DAG, Subtarget);
11654 return DAG.getNOT(DL: Dl, Val: Rev, VT: MVT::i1);
11655 }
11656
11657 // Power doesn't support testing whether a value is 'normal'. Test the rest
11658 // first, and test if it's 'not not-normal' with expected sign.
11659 if (Mask & fcNormal) {
11660 SDValue Rev(DAG.getMachineNode(
11661 Opcode: TestOp, dl: Dl, VT: MVT::i32,
11662 Op1: DAG.getTargetConstant(Val: DC_NAN | DC_NEG_INF | DC_POS_INF |
11663 DC_NEG_ZERO | DC_POS_ZERO |
11664 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11665 DL: Dl, VT: MVT::i32),
11666 Op2: Op),
11667 0);
11668 // Sign are stored in CR bit 0, result are in CR bit 2.
11669 SDValue Sign(
11670 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11671 Op2: DAG.getTargetConstant(Val: PPC::sub_lt, DL: Dl, VT: MVT::i32)),
11672 0);
11673 SDValue Normal(DAG.getNOT(
11674 DL: Dl,
11675 Val: SDValue(DAG.getMachineNode(
11676 Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11677 Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11678 0),
11679 VT: MVT::i1));
11680 if (Mask & fcPosNormal)
11681 Sign = DAG.getNOT(DL: Dl, Val: Sign, VT: MVT::i1);
11682 SDValue Result = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: Sign, N2: Normal);
11683 if (Mask == fcPosNormal || Mask == fcNegNormal)
11684 return Result;
11685
11686 return DAG.getNode(
11687 Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11688 N1: getDataClassTest(Op, Mask: Mask & ~fcNormal, Dl, DAG, Subtarget), N2: Result);
11689 }
11690
11691 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11692 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11693 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11694 bool IsQuiet = Mask & fcQNan;
11695 SDValue NanCheck = getDataClassTest(Op, Mask: fcNan, Dl, DAG, Subtarget);
11696
11697 // Quietness is determined by the first bit in fraction field.
11698 uint64_t QuietMask = 0;
11699 SDValue HighWord;
11700 if (VT == MVT::f128) {
11701 HighWord = DAG.getNode(
11702 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: DAG.getBitcast(VT: MVT::v4i32, V: Op),
11703 N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? 3 : 0, DL: Dl));
11704 QuietMask = 0x8000;
11705 } else if (VT == MVT::f64) {
11706 if (Subtarget.isPPC64()) {
11707 HighWord = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::i32,
11708 N1: DAG.getBitcast(VT: MVT::i64, V: Op),
11709 N2: DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32));
11710 } else {
11711 SDValue Vec = DAG.getBitcast(
11712 VT: MVT::v4i32, V: DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: Dl, VT: MVT::v2f64, Operand: Op));
11713 HighWord = DAG.getNode(
11714 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: Vec,
11715 N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? 1 : 0, DL: Dl));
11716 }
11717 QuietMask = 0x80000;
11718 } else if (VT == MVT::f32) {
11719 HighWord = DAG.getBitcast(VT: MVT::i32, V: Op);
11720 QuietMask = 0x400000;
11721 }
11722 SDValue NanRes = DAG.getSetCC(
11723 DL: Dl, VT: MVT::i1,
11724 LHS: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: HighWord,
11725 N2: DAG.getConstant(Val: QuietMask, DL: Dl, VT: MVT::i32)),
11726 RHS: DAG.getConstant(Val: 0, DL: Dl, VT: MVT::i32), Cond: IsQuiet ? ISD::SETNE : ISD::SETEQ);
11727 NanRes = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: NanCheck, N2: NanRes);
11728 if (Mask == fcQNan || Mask == fcSNan)
11729 return NanRes;
11730
11731 return DAG.getNode(Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11732 N1: getDataClassTest(Op, Mask: Mask & ~fcNan, Dl, DAG, Subtarget),
11733 N2: NanRes);
11734 }
11735
11736 unsigned NativeMask = 0;
11737 if ((Mask & fcNan) == fcNan)
11738 NativeMask |= DC_NAN;
11739 if (Mask & fcNegInf)
11740 NativeMask |= DC_NEG_INF;
11741 if (Mask & fcPosInf)
11742 NativeMask |= DC_POS_INF;
11743 if (Mask & fcNegZero)
11744 NativeMask |= DC_NEG_ZERO;
11745 if (Mask & fcPosZero)
11746 NativeMask |= DC_POS_ZERO;
11747 if (Mask & fcNegSubnormal)
11748 NativeMask |= DC_NEG_SUBNORM;
11749 if (Mask & fcPosSubnormal)
11750 NativeMask |= DC_POS_SUBNORM;
11751 return SDValue(
11752 DAG.getMachineNode(
11753 Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1,
11754 Op1: SDValue(DAG.getMachineNode(
11755 Opcode: TestOp, dl: Dl, VT: MVT::i32,
11756 Op1: DAG.getTargetConstant(Val: NativeMask, DL: Dl, VT: MVT::i32), Op2: Op),
11757 0),
11758 Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11759 0);
11760}
11761
11762SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11763 SelectionDAG &DAG) const {
11764 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11765 SDValue LHS = Op.getOperand(i: 0);
11766 uint64_t RHSC = Op.getConstantOperandVal(i: 1);
11767 SDLoc Dl(Op);
11768 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11769 if (LHS.getValueType() == MVT::ppcf128) {
11770 // The higher part determines the value class.
11771 LHS = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::f64, N1: LHS,
11772 N2: DAG.getConstant(Val: 1, DL: Dl, VT: MVT::i32));
11773 }
11774
11775 return getDataClassTest(Op: LHS, Mask: Category, Dl, DAG, Subtarget);
11776}
11777
11778SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11779 SelectionDAG &DAG) const {
11780 SDLoc dl(Op);
11781
11782 MachineFunction &MF = DAG.getMachineFunction();
11783 SDValue Op0 = Op.getOperand(i: 0);
11784 EVT ValVT = Op0.getValueType();
11785 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11786 if (isa<ConstantSDNode>(Val: Op0) && EltSize <= 32) {
11787 int64_t IntVal = Op.getConstantOperandVal(i: 0);
11788 if (IntVal >= -16 && IntVal <= 15)
11789 return getCanonicalConstSplat(Val: IntVal, SplatSize: EltSize / 8, VT: Op.getValueType(), DAG,
11790 dl);
11791 }
11792
11793 ReuseLoadInfo RLI;
11794 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11795 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11796 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11797 canReuseLoadAddress(Op: Op0, MemVT: MVT::i32, RLI, DAG, ET: ISD::NON_EXTLOAD)) {
11798
11799 MachineMemOperand *MMO =
11800 MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: 4,
11801 BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
11802 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11803 SDValue Bits = DAG.getMemIntrinsicNode(
11804 Opcode: PPCISD::LD_SPLAT, dl, VTList: DAG.getVTList(VT1: MVT::v4i32, VT2: MVT::Other), Ops,
11805 MemVT: MVT::i32, MMO);
11806 if (RLI.ResChain)
11807 DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: 1));
11808 return Bits.getValue(R: 0);
11809 }
11810
11811 // Create a stack slot that is 16-byte aligned.
11812 MachineFrameInfo &MFI = MF.getFrameInfo();
11813 int FrameIdx = MFI.CreateStackObject(Size: 16, Alignment: Align(16), isSpillSlot: false);
11814 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11815 SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
11816
11817 SDValue Val = Op0;
11818 // P10 hardware store forwarding requires that a single store contains all
11819 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11820 // to avoid load hit store on P10 when running binaries compiled for older
11821 // processors by generating two mergeable scalar stores to forward with the
11822 // vector load.
11823 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11824 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11825 ValVT.getSizeInBits() <= 64) {
11826 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: Val);
11827 EVT ShiftAmountTy = getShiftAmountTy(LHSTy: MVT::i64, DL: DAG.getDataLayout());
11828 SDValue ShiftBy = DAG.getConstant(
11829 Val: 64 - Op.getValueType().getScalarSizeInBits(), DL: dl, VT: ShiftAmountTy);
11830 Val = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: Val, N2: ShiftBy);
11831 SDValue Plus8 =
11832 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FIdx, N2: DAG.getConstant(Val: 8, DL: dl, VT: PtrVT));
11833 SDValue Store2 =
11834 DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: Plus8, PtrInfo: MachinePointerInfo());
11835 SDValue Store = DAG.getStore(Chain: Store2, dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo());
11836 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx,
11837 PtrInfo: MachinePointerInfo());
11838 }
11839
11840 // Store the input value into Value#0 of the stack slot.
11841 SDValue Store =
11842 DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo());
11843 // Load it out.
11844 return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx, PtrInfo: MachinePointerInfo());
11845}
11846
11847SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11848 SelectionDAG &DAG) const {
11849 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11850 "Should only be called for ISD::INSERT_VECTOR_ELT");
11851
11852 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
11853
11854 EVT VT = Op.getValueType();
11855 SDLoc dl(Op);
11856 SDValue V1 = Op.getOperand(i: 0);
11857 SDValue V2 = Op.getOperand(i: 1);
11858
11859 if (VT == MVT::v2f64 && C)
11860 return Op;
11861
11862 if (Subtarget.hasP9Vector()) {
11863 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11864 // because on P10, it allows this specific insert_vector_elt load pattern to
11865 // utilize the refactored load and store infrastructure in order to exploit
11866 // prefixed loads.
11867 // On targets with inexpensive direct moves (Power9 and up), a
11868 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11869 // load since a single precision load will involve conversion to double
11870 // precision on the load followed by another conversion to single precision.
11871 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11872 (isa<LoadSDNode>(Val: V2))) {
11873 SDValue BitcastVector = DAG.getBitcast(VT: MVT::v4i32, V: V1);
11874 SDValue BitcastLoad = DAG.getBitcast(VT: MVT::i32, V: V2);
11875 SDValue InsVecElt =
11876 DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, VT: MVT::v4i32, N1: BitcastVector,
11877 N2: BitcastLoad, N3: Op.getOperand(i: 2));
11878 return DAG.getBitcast(VT: MVT::v4f32, V: InsVecElt);
11879 }
11880 }
11881
11882 if (Subtarget.isISA3_1()) {
11883 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11884 return SDValue();
11885 // On P10, we have legal lowering for constant and variable indices for
11886 // all vectors.
11887 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11888 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11889 return Op;
11890 }
11891
11892 // Before P10, we have legal lowering for constant indices but not for
11893 // variable ones.
11894 if (!C)
11895 return SDValue();
11896
11897 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11898 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11899 SDValue Mtvsrz = DAG.getNode(Opcode: PPCISD::MTVSRZ, DL: dl, VT, Operand: V2);
11900 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11901 unsigned InsertAtElement = C->getZExtValue();
11902 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11903 if (Subtarget.isLittleEndian()) {
11904 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11905 }
11906 return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT, N1: V1, N2: Mtvsrz,
11907 N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
11908 }
11909 return Op;
11910}
11911
11912SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11913 SelectionDAG &DAG) const {
11914 SDLoc dl(Op);
11915 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
11916 SDValue LoadChain = LN->getChain();
11917 SDValue BasePtr = LN->getBasePtr();
11918 EVT VT = Op.getValueType();
11919 bool IsV1024i1 = VT == MVT::v1024i1;
11920 bool IsV2048i1 = VT == MVT::v2048i1;
11921
11922 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
11923 // Dense Math dmr pair registers, respectively.
11924 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
11925 (void)IsV2048i1;
11926 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11927 "Dense Math support required.");
11928 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11929
11930 SmallVector<SDValue, 8> Loads;
11931 SmallVector<SDValue, 8> LoadChains;
11932
11933 SDValue IntrinID = DAG.getConstant(Val: Intrinsic::ppc_vsx_lxvp, DL: dl, VT: MVT::i32);
11934 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11935 MachineMemOperand *MMO = LN->getMemOperand();
11936 unsigned NumVecs = VT.getSizeInBits() / 256;
11937 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11938 MachineMemOperand *NewMMO =
11939 DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * 32, Size: 32);
11940 if (Idx > 0) {
11941 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
11942 N2: DAG.getConstant(Val: 32, DL: dl, VT: BasePtr.getValueType()));
11943 LoadOps[2] = BasePtr;
11944 }
11945 SDValue Ld = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
11946 VTList: DAG.getVTList(VT1: MVT::v256i1, VT2: MVT::Other),
11947 Ops: LoadOps, MemVT: MVT::v256i1, MMO: NewMMO);
11948 LoadChains.push_back(Elt: Ld.getValue(R: 1));
11949 Loads.push_back(Elt: Ld);
11950 }
11951
11952 if (Subtarget.isLittleEndian()) {
11953 std::reverse(first: Loads.begin(), last: Loads.end());
11954 std::reverse(first: LoadChains.begin(), last: LoadChains.end());
11955 }
11956
11957 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
11958 SDValue Lo(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512, dl, VT: MVT::v512i1, Op1: Loads[0],
11959 Op2: Loads[1]),
11960 0);
11961 SDValue LoSub = DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32);
11962 SDValue Hi(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512_HI, dl, VT: MVT::v512i1,
11963 Op1: Loads[2], Op2: Loads[3]),
11964 0);
11965 SDValue HiSub = DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32);
11966 SDValue RC = DAG.getTargetConstant(Val: PPC::DMRRCRegClassID, DL: dl, VT: MVT::i32);
11967 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11968
11969 SDValue Value =
11970 SDValue(DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1, Ops), 0);
11971
11972 if (IsV1024i1) {
11973 return DAG.getMergeValues(Ops: {Value, TF}, dl);
11974 }
11975
11976 // Handle Loads for V2048i1 which represents a dmr pair.
11977 SDValue DmrPValue;
11978 SDValue Dmr1Lo(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512, dl, VT: MVT::v512i1,
11979 Op1: Loads[4], Op2: Loads[5]),
11980 0);
11981 SDValue Dmr1Hi(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512_HI, dl, VT: MVT::v512i1,
11982 Op1: Loads[6], Op2: Loads[7]),
11983 0);
11984 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
11985 SDValue Dmr1Value = SDValue(
11986 DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1, Ops: Dmr1Ops), 0);
11987
11988 SDValue Dmr0Sub = DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32);
11989 SDValue Dmr1Sub = DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32);
11990
11991 SDValue DmrPRC = DAG.getTargetConstant(Val: PPC::DMRpRCRegClassID, DL: dl, VT: MVT::i32);
11992 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
11993
11994 DmrPValue = SDValue(
11995 DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v2048i1, Ops: DmrPOps), 0);
11996
11997 return DAG.getMergeValues(Ops: {DmrPValue, TF}, dl);
11998}
11999
12000SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12001 SelectionDAG &DAG) const {
12002 SDLoc dl(Op);
12003 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12004 SDValue LoadChain = LN->getChain();
12005 SDValue BasePtr = LN->getBasePtr();
12006 EVT VT = Op.getValueType();
12007
12008 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12009 return LowerDMFVectorLoad(Op, DAG);
12010
12011 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12012 return Op;
12013
12014 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12015 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12016 // 2 or 4 vsx registers.
12017 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12018 "Type unsupported without MMA");
12019 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12020 "Type unsupported without paired vector support");
12021 Align Alignment = LN->getAlign();
12022 SmallVector<SDValue, 4> Loads;
12023 SmallVector<SDValue, 4> LoadChains;
12024 unsigned NumVecs = VT.getSizeInBits() / 128;
12025 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12026 SDValue Load =
12027 DAG.getLoad(VT: MVT::v16i8, dl, Chain: LoadChain, Ptr: BasePtr,
12028 PtrInfo: LN->getPointerInfo().getWithOffset(O: Idx * 16),
12029 Alignment: commonAlignment(A: Alignment, Offset: Idx * 16),
12030 MMOFlags: LN->getMemOperand()->getFlags(), AAInfo: LN->getAAInfo());
12031 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12032 N2: DAG.getConstant(Val: 16, DL: dl, VT: BasePtr.getValueType()));
12033 Loads.push_back(Elt: Load);
12034 LoadChains.push_back(Elt: Load.getValue(R: 1));
12035 }
12036 if (Subtarget.isLittleEndian()) {
12037 std::reverse(first: Loads.begin(), last: Loads.end());
12038 std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12039 }
12040 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12041 SDValue Value =
12042 DAG.getNode(Opcode: VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12043 DL: dl, VT, Ops: Loads);
12044 SDValue RetOps[] = {Value, TF};
12045 return DAG.getMergeValues(Ops: RetOps, dl);
12046}
12047
12048SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12049 SelectionDAG &DAG) const {
12050
12051 SDLoc dl(Op);
12052 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12053 SDValue StoreChain = SN->getChain();
12054 SDValue BasePtr = SN->getBasePtr();
12055 SmallVector<SDValue, 8> Values;
12056 SmallVector<SDValue, 8> Stores;
12057 EVT VT = SN->getValue().getValueType();
12058 bool IsV1024i1 = VT == MVT::v1024i1;
12059 bool IsV2048i1 = VT == MVT::v2048i1;
12060
12061 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12062 // Dense Math dmr pair registers, respectively.
12063 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12064 (void)IsV2048i1;
12065 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12066 "Dense Math support required.");
12067 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12068
12069 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12070 if (IsV1024i1) {
12071 SDValue Lo(DAG.getMachineNode(
12072 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12073 Op1: Op.getOperand(i: 1),
12074 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12075 0);
12076 SDValue Hi(DAG.getMachineNode(
12077 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12078 Op1: Op.getOperand(i: 1),
12079 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12080 0);
12081 MachineSDNode *ExtNode =
12082 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Lo);
12083 Values.push_back(Elt: SDValue(ExtNode, 0));
12084 Values.push_back(Elt: SDValue(ExtNode, 1));
12085 ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Hi);
12086 Values.push_back(Elt: SDValue(ExtNode, 0));
12087 Values.push_back(Elt: SDValue(ExtNode, 1));
12088 } else {
12089 // This corresponds to v2048i1 which represents a dmr pair.
12090 SDValue Dmr0(
12091 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12092 Op1: Op.getOperand(i: 1),
12093 Op2: DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32)),
12094 0);
12095
12096 SDValue Dmr1(
12097 DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12098 Op1: Op.getOperand(i: 1),
12099 Op2: DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32)),
12100 0);
12101
12102 SDValue Dmr0Lo(DAG.getMachineNode(
12103 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12104 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12105 0);
12106
12107 SDValue Dmr0Hi(DAG.getMachineNode(
12108 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12109 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12110 0);
12111
12112 SDValue Dmr1Lo(DAG.getMachineNode(
12113 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12114 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12115 0);
12116
12117 SDValue Dmr1Hi(DAG.getMachineNode(
12118 Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12119 Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12120 0);
12121
12122 MachineSDNode *ExtNode =
12123 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr0Lo);
12124 Values.push_back(Elt: SDValue(ExtNode, 0));
12125 Values.push_back(Elt: SDValue(ExtNode, 1));
12126 ExtNode =
12127 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr0Hi);
12128 Values.push_back(Elt: SDValue(ExtNode, 0));
12129 Values.push_back(Elt: SDValue(ExtNode, 1));
12130 ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr1Lo);
12131 Values.push_back(Elt: SDValue(ExtNode, 0));
12132 Values.push_back(Elt: SDValue(ExtNode, 1));
12133 ExtNode =
12134 DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr1Hi);
12135 Values.push_back(Elt: SDValue(ExtNode, 0));
12136 Values.push_back(Elt: SDValue(ExtNode, 1));
12137 }
12138
12139 if (Subtarget.isLittleEndian())
12140 std::reverse(first: Values.begin(), last: Values.end());
12141
12142 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
12143 SmallVector<SDValue, 4> Ops{
12144 StoreChain, DAG.getConstant(Val: Intrinsic::ppc_vsx_stxvp, DL: dl, VT: MVT::i32),
12145 Values[0], BasePtr};
12146 MachineMemOperand *MMO = SN->getMemOperand();
12147 unsigned NumVecs = VT.getSizeInBits() / 256;
12148 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12149 MachineMemOperand *NewMMO =
12150 DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * 32, Size: 32);
12151 if (Idx > 0) {
12152 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12153 N2: DAG.getConstant(Val: 32, DL: dl, VT: BasePtr.getValueType()));
12154 Ops[3] = BasePtr;
12155 }
12156 Ops[2] = Values[Idx];
12157 SDValue St = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
12158 MemVT: MVT::v256i1, MMO: NewMMO);
12159 Stores.push_back(Elt: St);
12160 }
12161
12162 SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12163 return TF;
12164}
12165
12166SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12167 SelectionDAG &DAG) const {
12168 SDLoc dl(Op);
12169 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12170 SDValue StoreChain = SN->getChain();
12171 SDValue BasePtr = SN->getBasePtr();
12172 SDValue Value = SN->getValue();
12173 SDValue Value2 = SN->getValue();
12174 EVT StoreVT = Value.getValueType();
12175
12176 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12177 return LowerDMFVectorStore(Op, DAG);
12178
12179 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12180 return Op;
12181
12182 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12183 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12184 // underlying registers individually.
12185 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12186 "Type unsupported without MMA");
12187 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12188 "Type unsupported without paired vector support");
12189 Align Alignment = SN->getAlign();
12190 SmallVector<SDValue, 4> Stores;
12191 unsigned NumVecs = 2;
12192 if (StoreVT == MVT::v512i1) {
12193 if (Subtarget.isISAFuture()) {
12194 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12195 MachineSDNode *ExtNode = DAG.getMachineNode(
12196 Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Op.getOperand(i: 1));
12197
12198 Value = SDValue(ExtNode, 0);
12199 Value2 = SDValue(ExtNode, 1);
12200 } else
12201 Value = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: Value);
12202 NumVecs = 4;
12203 }
12204 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12205 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12206 SDValue Elt;
12207 if (Subtarget.isISAFuture()) {
12208 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12209 Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
12210 N1: Idx > 1 ? Value2 : Value,
12211 N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12212 } else
12213 Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: Value,
12214 N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12215
12216 SDValue Store =
12217 DAG.getStore(Chain: StoreChain, dl, Val: Elt, Ptr: BasePtr,
12218 PtrInfo: SN->getPointerInfo().getWithOffset(O: Idx * 16),
12219 Alignment: commonAlignment(A: Alignment, Offset: Idx * 16),
12220 MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
12221 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12222 N2: DAG.getConstant(Val: 16, DL: dl, VT: BasePtr.getValueType()));
12223 Stores.push_back(Elt: Store);
12224 }
12225 SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12226 return TF;
12227}
12228
12229SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12230 SDLoc dl(Op);
12231 if (Op.getValueType() == MVT::v4i32) {
12232 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
12233
12234 SDValue Zero = getCanonicalConstSplat(Val: 0, SplatSize: 1, VT: MVT::v4i32, DAG, dl);
12235 // +16 as shift amt.
12236 SDValue Neg16 = getCanonicalConstSplat(Val: -16, SplatSize: 4, VT: MVT::v4i32, DAG, dl);
12237 SDValue RHSSwap = // = vrlw RHS, 16
12238 BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vrlw, LHS: RHS, RHS: Neg16, DAG, dl);
12239
12240 // Shrinkify inputs to v8i16.
12241 LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: LHS);
12242 RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHS);
12243 RHSSwap = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHSSwap);
12244
12245 // Low parts multiplied together, generating 32-bit results (we ignore the
12246 // top parts).
12247 SDValue LoProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmulouh,
12248 LHS, RHS, DAG, dl, DestVT: MVT::v4i32);
12249
12250 SDValue HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmsumuhm,
12251 Op0: LHS, Op1: RHSSwap, Op2: Zero, DAG, dl, DestVT: MVT::v4i32);
12252 // Shift the high parts up 16 bits.
12253 HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: HiProd,
12254 RHS: Neg16, DAG, dl);
12255 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::v4i32, N1: LoProd, N2: HiProd);
12256 } else if (Op.getValueType() == MVT::v16i8) {
12257 SDValue LHS = Op.getOperand(i: 0), RHS = Op.getOperand(i: 1);
12258 bool isLittleEndian = Subtarget.isLittleEndian();
12259
12260 // Multiply the even 8-bit parts, producing 16-bit sums.
12261 SDValue EvenParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuleub,
12262 LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12263 EvenParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: EvenParts);
12264
12265 // Multiply the odd 8-bit parts, producing 16-bit sums.
12266 SDValue OddParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuloub,
12267 LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12268 OddParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OddParts);
12269
12270 // Merge the results together. Because vmuleub and vmuloub are
12271 // instructions with a big-endian bias, we must reverse the
12272 // element numbering and reverse the meaning of "odd" and "even"
12273 // when generating little endian code.
12274 int Ops[16];
12275 for (unsigned i = 0; i != 8; ++i) {
12276 if (isLittleEndian) {
12277 Ops[i*2 ] = 2*i;
12278 Ops[i*2+1] = 2*i+16;
12279 } else {
12280 Ops[i*2 ] = 2*i+1;
12281 Ops[i*2+1] = 2*i+1+16;
12282 }
12283 }
12284 if (isLittleEndian)
12285 return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OddParts, N2: EvenParts, Mask: Ops);
12286 else
12287 return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: EvenParts, N2: OddParts, Mask: Ops);
12288 } else {
12289 llvm_unreachable("Unknown mul to lower!");
12290 }
12291}
12292
12293SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12294 bool IsStrict = Op->isStrictFPOpcode();
12295 if (Op.getOperand(i: IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12296 !Subtarget.hasP9Vector())
12297 return SDValue();
12298
12299 return Op;
12300}
12301
12302// Custom lowering for fpext vf32 to v2f64
12303SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12304
12305 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12306 "Should only be called for ISD::FP_EXTEND");
12307
12308 // FIXME: handle extends from half precision float vectors on P9.
12309 // We only want to custom lower an extend from v2f32 to v2f64.
12310 if (Op.getValueType() != MVT::v2f64 ||
12311 Op.getOperand(i: 0).getValueType() != MVT::v2f32)
12312 return SDValue();
12313
12314 SDLoc dl(Op);
12315 SDValue Op0 = Op.getOperand(i: 0);
12316
12317 switch (Op0.getOpcode()) {
12318 default:
12319 return SDValue();
12320 case ISD::EXTRACT_SUBVECTOR: {
12321 assert(Op0.getNumOperands() == 2 &&
12322 isa<ConstantSDNode>(Op0->getOperand(1)) &&
12323 "Node should have 2 operands with second one being a constant!");
12324
12325 if (Op0.getOperand(i: 0).getValueType() != MVT::v4f32)
12326 return SDValue();
12327
12328 // Custom lower is only done for high or low doubleword.
12329 int Idx = Op0.getConstantOperandVal(i: 1);
12330 if (Idx % 2 != 0)
12331 return SDValue();
12332
12333 // Since input is v4f32, at this point Idx is either 0 or 2.
12334 // Shift to get the doubleword position we want.
12335 int DWord = Idx >> 1;
12336
12337 // High and low word positions are different on little endian.
12338 if (Subtarget.isLittleEndian())
12339 DWord ^= 0x1;
12340
12341 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64,
12342 N1: Op0.getOperand(i: 0), N2: DAG.getConstant(Val: DWord, DL: dl, VT: MVT::i32));
12343 }
12344 case ISD::FADD:
12345 case ISD::FMUL:
12346 case ISD::FSUB: {
12347 SDValue NewLoad[2];
12348 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12349 // Ensure both input are loads.
12350 SDValue LdOp = Op0.getOperand(i);
12351 if (LdOp.getOpcode() != ISD::LOAD)
12352 return SDValue();
12353 // Generate new load node.
12354 LoadSDNode *LD = cast<LoadSDNode>(Val&: LdOp);
12355 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12356 NewLoad[i] = DAG.getMemIntrinsicNode(
12357 Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12358 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12359 }
12360 SDValue NewOp =
12361 DAG.getNode(Opcode: Op0.getOpcode(), DL: SDLoc(Op0), VT: MVT::v4f32, N1: NewLoad[0],
12362 N2: NewLoad[1], Flags: Op0.getNode()->getFlags());
12363 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewOp,
12364 N2: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32));
12365 }
12366 case ISD::LOAD: {
12367 LoadSDNode *LD = cast<LoadSDNode>(Val&: Op0);
12368 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12369 SDValue NewLd = DAG.getMemIntrinsicNode(
12370 Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12371 MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12372 return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewLd,
12373 N2: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32));
12374 }
12375 }
12376 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12377}
12378
12379static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value,
12380 SelectionDAG &DAG,
12381 const PPCSubtarget &STI) {
12382 SDLoc DL(Value);
12383 if (STI.useCRBits())
12384 Value = DAG.getNode(Opcode: ISD::SELECT, DL, VT: SumType, N1: Value,
12385 N2: DAG.getConstant(Val: 1, DL, VT: SumType),
12386 N3: DAG.getConstant(Val: 0, DL, VT: SumType));
12387 else
12388 Value = DAG.getZExtOrTrunc(Op: Value, DL, VT: SumType);
12389 SDValue Sum = DAG.getNode(Opcode: PPCISD::ADDC, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32),
12390 N1: Value, N2: DAG.getAllOnesConstant(DL, VT: SumType));
12391 return Sum.getValue(R: 1);
12392}
12393
12394static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag,
12395 EVT CarryType, SelectionDAG &DAG,
12396 const PPCSubtarget &STI) {
12397 SDLoc DL(Flag);
12398 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SumType);
12399 SDValue Carry = DAG.getNode(
12400 Opcode: PPCISD::ADDE, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32), N1: Zero, N2: Zero, N3: Flag);
12401 if (STI.useCRBits())
12402 return DAG.getSetCC(DL, VT: CarryType, LHS: Carry, RHS: Zero, Cond: ISD::SETNE);
12403 return DAG.getZExtOrTrunc(Op: Carry, DL, VT: CarryType);
12404}
12405
12406SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12407
12408 SDLoc DL(Op);
12409 SDNode *N = Op.getNode();
12410 EVT VT = N->getValueType(ResNo: 0);
12411 EVT CarryType = N->getValueType(ResNo: 1);
12412 unsigned Opc = N->getOpcode();
12413 bool IsAdd = Opc == ISD::UADDO;
12414 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12415 SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12416 N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1));
12417 SDValue Carry = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: 1), CarryType,
12418 DAG, STI: Subtarget);
12419 if (!IsAdd)
12420 Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Carry,
12421 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryType));
12422 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: Carry);
12423}
12424
12425SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12426 SelectionDAG &DAG) const {
12427 SDLoc DL(Op);
12428 SDNode *N = Op.getNode();
12429 unsigned Opc = N->getOpcode();
12430 EVT VT = N->getValueType(ResNo: 0);
12431 EVT CarryType = N->getValueType(ResNo: 1);
12432 SDValue CarryOp = N->getOperand(Num: 2);
12433 bool IsAdd = Opc == ISD::UADDO_CARRY;
12434 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12435 if (!IsAdd)
12436 CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12437 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryOp.getValueType()));
12438 CarryOp = ConvertCarryValueToCarryFlag(SumType: VT, Value: CarryOp, DAG, STI: Subtarget);
12439 SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12440 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1), N3: CarryOp);
12441 CarryOp = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: 1), CarryType, DAG,
12442 STI: Subtarget);
12443 if (!IsAdd)
12444 CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12445 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryOp.getValueType()));
12446 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: CarryOp);
12447}
12448
12449SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12450
12451 SDLoc dl(Op);
12452 SDValue LHS = Op.getOperand(i: 0);
12453 SDValue RHS = Op.getOperand(i: 1);
12454 EVT VT = Op.getNode()->getValueType(ResNo: 0);
12455
12456 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
12457
12458 SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: RHS, N2: LHS);
12459 SDValue Xor2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sub, N2: LHS);
12460
12461 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Xor1, N2: Xor2);
12462
12463 SDValue Overflow =
12464 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: And,
12465 N2: DAG.getConstant(Val: VT.getSizeInBits() - 1, DL: dl, VT: MVT::i32));
12466
12467 SDValue OverflowTrunc =
12468 DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: 1), Operand: Overflow);
12469
12470 return DAG.getMergeValues(Ops: {Sub, OverflowTrunc}, dl);
12471}
12472
12473/// LowerOperation - Provide custom lowering hooks for some operations.
12474///
12475SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
12476 switch (Op.getOpcode()) {
12477 default:
12478 llvm_unreachable("Wasn't expecting to be able to lower this!");
12479 case ISD::FPOW: return lowerPow(Op, DAG);
12480 case ISD::FSIN: return lowerSin(Op, DAG);
12481 case ISD::FCOS: return lowerCos(Op, DAG);
12482 case ISD::FLOG: return lowerLog(Op, DAG);
12483 case ISD::FLOG10: return lowerLog10(Op, DAG);
12484 case ISD::FEXP: return lowerExp(Op, DAG);
12485 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12486 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12487 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12488 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12489 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12490 case ISD::STRICT_FSETCC:
12491 case ISD::STRICT_FSETCCS:
12492 case ISD::SETCC: return LowerSETCC(Op, DAG);
12493 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12494 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12495 case ISD::SSUBO:
12496 return LowerSSUBO(Op, DAG);
12497
12498 case ISD::INLINEASM:
12499 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12500 // Variable argument lowering.
12501 case ISD::VASTART: return LowerVASTART(Op, DAG);
12502 case ISD::VAARG: return LowerVAARG(Op, DAG);
12503 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12504
12505 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12506 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12507 case ISD::GET_DYNAMIC_AREA_OFFSET:
12508 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12509
12510 // Exception handling lowering.
12511 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12512 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12513 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12514
12515 case ISD::LOAD: return LowerLOAD(Op, DAG);
12516 case ISD::STORE: return LowerSTORE(Op, DAG);
12517 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12518 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12519 case ISD::STRICT_FP_TO_UINT:
12520 case ISD::STRICT_FP_TO_SINT:
12521 case ISD::FP_TO_UINT:
12522 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, dl: SDLoc(Op));
12523 case ISD::STRICT_UINT_TO_FP:
12524 case ISD::STRICT_SINT_TO_FP:
12525 case ISD::UINT_TO_FP:
12526 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12527 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12528 case ISD::SET_ROUNDING:
12529 return LowerSET_ROUNDING(Op, DAG);
12530
12531 // Lower 64-bit shifts.
12532 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12533 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12534 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12535
12536 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12537 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12538
12539 // Vector-related lowering.
12540 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12541 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12542 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12543 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12544 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12545 case ISD::MUL: return LowerMUL(Op, DAG);
12546 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12547 case ISD::STRICT_FP_ROUND:
12548 case ISD::FP_ROUND:
12549 return LowerFP_ROUND(Op, DAG);
12550 case ISD::ROTL: return LowerROTL(Op, DAG);
12551
12552 // For counter-based loop handling.
12553 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12554
12555 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12556
12557 // Frame & Return address.
12558 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12559 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12560
12561 case ISD::INTRINSIC_VOID:
12562 return LowerINTRINSIC_VOID(Op, DAG);
12563 case ISD::BSWAP:
12564 return LowerBSWAP(Op, DAG);
12565 case ISD::ATOMIC_CMP_SWAP:
12566 return LowerATOMIC_CMP_SWAP(Op, DAG);
12567 case ISD::ATOMIC_STORE:
12568 return LowerATOMIC_LOAD_STORE(Op, DAG);
12569 case ISD::IS_FPCLASS:
12570 return LowerIS_FPCLASS(Op, DAG);
12571 case ISD::UADDO:
12572 case ISD::USUBO:
12573 return LowerADDSUBO(Op, DAG);
12574 case ISD::UADDO_CARRY:
12575 case ISD::USUBO_CARRY:
12576 return LowerADDSUBO_CARRY(Op, DAG);
12577 }
12578}
12579
12580void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
12581 SmallVectorImpl<SDValue>&Results,
12582 SelectionDAG &DAG) const {
12583 SDLoc dl(N);
12584 switch (N->getOpcode()) {
12585 default:
12586 llvm_unreachable("Do not know how to custom type legalize this operation!");
12587 case ISD::ATOMIC_LOAD: {
12588 SDValue Res = LowerATOMIC_LOAD_STORE(Op: SDValue(N, 0), DAG);
12589 Results.push_back(Elt: Res);
12590 Results.push_back(Elt: Res.getValue(R: 1));
12591 break;
12592 }
12593 case ISD::READCYCLECOUNTER: {
12594 SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other);
12595 SDValue RTB = DAG.getNode(Opcode: PPCISD::READ_TIME_BASE, DL: dl, VTList: VTs, N: N->getOperand(Num: 0));
12596
12597 Results.push_back(
12598 Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: RTB, N2: RTB.getValue(R: 1)));
12599 Results.push_back(Elt: RTB.getValue(R: 2));
12600 break;
12601 }
12602 case ISD::INTRINSIC_W_CHAIN: {
12603 if (N->getConstantOperandVal(Num: 1) != Intrinsic::loop_decrement)
12604 break;
12605
12606 assert(N->getValueType(0) == MVT::i1 &&
12607 "Unexpected result type for CTR decrement intrinsic");
12608 EVT SVT = getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(),
12609 VT: N->getValueType(ResNo: 0));
12610 SDVTList VTs = DAG.getVTList(VT1: SVT, VT2: MVT::Other);
12611 SDValue NewInt = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: VTs, N1: N->getOperand(Num: 0),
12612 N2: N->getOperand(Num: 1));
12613
12614 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewInt));
12615 Results.push_back(Elt: NewInt.getValue(R: 1));
12616 break;
12617 }
12618 case ISD::INTRINSIC_WO_CHAIN: {
12619 switch (N->getConstantOperandVal(Num: 0)) {
12620 case Intrinsic::ppc_pack_longdouble:
12621 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::ppcf128,
12622 N1: N->getOperand(Num: 2), N2: N->getOperand(Num: 1)));
12623 break;
12624 case Intrinsic::ppc_maxfe:
12625 case Intrinsic::ppc_minfe:
12626 case Intrinsic::ppc_fnmsub:
12627 case Intrinsic::ppc_convert_f128_to_ppcf128:
12628 Results.push_back(Elt: LowerINTRINSIC_WO_CHAIN(Op: SDValue(N, 0), DAG));
12629 break;
12630 }
12631 break;
12632 }
12633 case ISD::VAARG: {
12634 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12635 return;
12636
12637 EVT VT = N->getValueType(ResNo: 0);
12638
12639 if (VT == MVT::i64) {
12640 SDValue NewNode = LowerVAARG(Op: SDValue(N, 1), DAG);
12641
12642 Results.push_back(Elt: NewNode);
12643 Results.push_back(Elt: NewNode.getValue(R: 1));
12644 }
12645 return;
12646 }
12647 case ISD::STRICT_FP_TO_SINT:
12648 case ISD::STRICT_FP_TO_UINT:
12649 case ISD::FP_TO_SINT:
12650 case ISD::FP_TO_UINT: {
12651 // LowerFP_TO_INT() can only handle f32 and f64.
12652 if (N->getOperand(Num: N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12653 MVT::ppcf128)
12654 return;
12655 SDValue LoweredValue = LowerFP_TO_INT(Op: SDValue(N, 0), DAG, dl);
12656 Results.push_back(Elt: LoweredValue);
12657 if (N->isStrictFPOpcode())
12658 Results.push_back(Elt: LoweredValue.getValue(R: 1));
12659 return;
12660 }
12661 case ISD::TRUNCATE: {
12662 if (!N->getValueType(ResNo: 0).isVector())
12663 return;
12664 SDValue Lowered = LowerTRUNCATEVector(Op: SDValue(N, 0), DAG);
12665 if (Lowered)
12666 Results.push_back(Elt: Lowered);
12667 return;
12668 }
12669 case ISD::SCALAR_TO_VECTOR: {
12670 SDValue Lowered = LowerSCALAR_TO_VECTOR(Op: SDValue(N, 0), DAG);
12671 if (Lowered)
12672 Results.push_back(Elt: Lowered);
12673 return;
12674 }
12675 case ISD::FSHL:
12676 case ISD::FSHR:
12677 // Don't handle funnel shifts here.
12678 return;
12679 case ISD::BITCAST:
12680 // Don't handle bitcast here.
12681 return;
12682 case ISD::FP_EXTEND:
12683 SDValue Lowered = LowerFP_EXTEND(Op: SDValue(N, 0), DAG);
12684 if (Lowered)
12685 Results.push_back(Elt: Lowered);
12686 return;
12687 }
12688}
12689
12690//===----------------------------------------------------------------------===//
12691// Other Lowering Code
12692//===----------------------------------------------------------------------===//
12693
12694static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
12695 return Builder.CreateIntrinsic(ID: Id, Args: {});
12696}
12697
12698Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
12699 Value *Addr,
12700 AtomicOrdering Ord) const {
12701 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12702
12703 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12704 "Only 8/16/32/64-bit atomic loads supported");
12705 Intrinsic::ID IntID;
12706 switch (SZ) {
12707 default:
12708 llvm_unreachable("Unexpected PrimitiveSize");
12709 case 8:
12710 IntID = Intrinsic::ppc_lbarx;
12711 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12712 break;
12713 case 16:
12714 IntID = Intrinsic::ppc_lharx;
12715 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12716 break;
12717 case 32:
12718 IntID = Intrinsic::ppc_lwarx;
12719 break;
12720 case 64:
12721 IntID = Intrinsic::ppc_ldarx;
12722 break;
12723 }
12724 Value *Call =
12725 Builder.CreateIntrinsic(ID: IntID, Args: Addr, /*FMFSource=*/nullptr, Name: "larx");
12726
12727 return Builder.CreateTruncOrBitCast(V: Call, DestTy: ValueTy);
12728}
12729
12730// Perform a store-conditional operation to Addr. Return the status of the
12731// store. This should be 0 if the store succeeded, non-zero otherwise.
12732Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
12733 Value *Val, Value *Addr,
12734 AtomicOrdering Ord) const {
12735 Type *Ty = Val->getType();
12736 unsigned SZ = Ty->getPrimitiveSizeInBits();
12737
12738 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12739 "Only 8/16/32/64-bit atomic loads supported");
12740 Intrinsic::ID IntID;
12741 switch (SZ) {
12742 default:
12743 llvm_unreachable("Unexpected PrimitiveSize");
12744 case 8:
12745 IntID = Intrinsic::ppc_stbcx;
12746 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12747 break;
12748 case 16:
12749 IntID = Intrinsic::ppc_sthcx;
12750 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12751 break;
12752 case 32:
12753 IntID = Intrinsic::ppc_stwcx;
12754 break;
12755 case 64:
12756 IntID = Intrinsic::ppc_stdcx;
12757 break;
12758 }
12759
12760 if (SZ == 8 || SZ == 16)
12761 Val = Builder.CreateZExt(V: Val, DestTy: Builder.getInt32Ty());
12762
12763 Value *Call = Builder.CreateIntrinsic(ID: IntID, Args: {Addr, Val},
12764 /*FMFSource=*/nullptr, Name: "stcx");
12765 return Builder.CreateXor(LHS: Call, RHS: Builder.getInt32(C: 1));
12766}
12767
12768// The mappings for emitLeading/TrailingFence is taken from
12769// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12770Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
12771 Instruction *Inst,
12772 AtomicOrdering Ord) const {
12773 if (Ord == AtomicOrdering::SequentiallyConsistent)
12774 return callIntrinsic(Builder, Id: Intrinsic::ppc_sync);
12775 if (isReleaseOrStronger(AO: Ord))
12776 return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
12777 return nullptr;
12778}
12779
12780Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
12781 Instruction *Inst,
12782 AtomicOrdering Ord) const {
12783 if (Inst->hasAtomicLoad() && isAcquireOrStronger(AO: Ord)) {
12784 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12785 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12786 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12787 if (isa<LoadInst>(Val: Inst))
12788 return Builder.CreateIntrinsic(ID: Intrinsic::ppc_cfence, Types: {Inst->getType()},
12789 Args: {Inst});
12790 // FIXME: Can use isync for rmw operation.
12791 return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
12792 }
12793 return nullptr;
12794}
12795
12796MachineBasicBlock *
12797PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
12798 unsigned AtomicSize,
12799 unsigned BinOpcode,
12800 unsigned CmpOpcode,
12801 unsigned CmpPred) const {
12802 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12803 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12804
12805 auto LoadMnemonic = PPC::LDARX;
12806 auto StoreMnemonic = PPC::STDCX;
12807 switch (AtomicSize) {
12808 default:
12809 llvm_unreachable("Unexpected size of atomic entity");
12810 case 1:
12811 LoadMnemonic = PPC::LBARX;
12812 StoreMnemonic = PPC::STBCX;
12813 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12814 break;
12815 case 2:
12816 LoadMnemonic = PPC::LHARX;
12817 StoreMnemonic = PPC::STHCX;
12818 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12819 break;
12820 case 4:
12821 LoadMnemonic = PPC::LWARX;
12822 StoreMnemonic = PPC::STWCX;
12823 break;
12824 case 8:
12825 LoadMnemonic = PPC::LDARX;
12826 StoreMnemonic = PPC::STDCX;
12827 break;
12828 }
12829
12830 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12831 MachineFunction *F = BB->getParent();
12832 MachineFunction::iterator It = ++BB->getIterator();
12833
12834 Register dest = MI.getOperand(i: 0).getReg();
12835 Register ptrA = MI.getOperand(i: 1).getReg();
12836 Register ptrB = MI.getOperand(i: 2).getReg();
12837 Register incr = MI.getOperand(i: 3).getReg();
12838 DebugLoc dl = MI.getDebugLoc();
12839
12840 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12841 MachineBasicBlock *loop2MBB =
12842 CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
12843 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12844 F->insert(MBBI: It, MBB: loopMBB);
12845 if (CmpOpcode)
12846 F->insert(MBBI: It, MBB: loop2MBB);
12847 F->insert(MBBI: It, MBB: exitMBB);
12848 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
12849 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
12850 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
12851
12852 MachineRegisterInfo &RegInfo = F->getRegInfo();
12853 Register TmpReg = (!BinOpcode) ? incr :
12854 RegInfo.createVirtualRegister( RegClass: AtomicSize == 8 ? &PPC::G8RCRegClass
12855 : &PPC::GPRCRegClass);
12856
12857 // thisMBB:
12858 // ...
12859 // fallthrough --> loopMBB
12860 BB->addSuccessor(Succ: loopMBB);
12861
12862 // loopMBB:
12863 // l[wd]arx dest, ptr
12864 // add r0, dest, incr
12865 // st[wd]cx. r0, ptr
12866 // bne- loopMBB
12867 // fallthrough --> exitMBB
12868
12869 // For max/min...
12870 // loopMBB:
12871 // l[wd]arx dest, ptr
12872 // cmpl?[wd] dest, incr
12873 // bgt exitMBB
12874 // loop2MBB:
12875 // st[wd]cx. dest, ptr
12876 // bne- loopMBB
12877 // fallthrough --> exitMBB
12878
12879 BB = loopMBB;
12880 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest)
12881 .addReg(RegNo: ptrA).addReg(RegNo: ptrB);
12882 if (BinOpcode)
12883 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg).addReg(RegNo: incr).addReg(RegNo: dest);
12884 if (CmpOpcode) {
12885 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
12886 // Signed comparisons of byte or halfword values must be sign-extended.
12887 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12888 Register ExtReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
12889 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12890 DestReg: ExtReg).addReg(RegNo: dest);
12891 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ExtReg).addReg(RegNo: incr);
12892 } else
12893 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: dest).addReg(RegNo: incr);
12894
12895 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
12896 .addImm(Val: CmpPred)
12897 .addReg(RegNo: CrReg)
12898 .addMBB(MBB: exitMBB);
12899 BB->addSuccessor(Succ: loop2MBB);
12900 BB->addSuccessor(Succ: exitMBB);
12901 BB = loop2MBB;
12902 }
12903 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
12904 .addReg(RegNo: TmpReg).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
12905 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
12906 .addImm(Val: PPC::PRED_NE).addReg(RegNo: PPC::CR0).addMBB(MBB: loopMBB);
12907 BB->addSuccessor(Succ: loopMBB);
12908 BB->addSuccessor(Succ: exitMBB);
12909
12910 // exitMBB:
12911 // ...
12912 BB = exitMBB;
12913 return BB;
12914}
12915
12916static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
12917 switch(MI.getOpcode()) {
12918 default:
12919 return false;
12920 case PPC::COPY:
12921 return TII->isSignExtended(Reg: MI.getOperand(i: 1).getReg(),
12922 MRI: &MI.getMF()->getRegInfo());
12923 case PPC::LHA:
12924 case PPC::LHA8:
12925 case PPC::LHAU:
12926 case PPC::LHAU8:
12927 case PPC::LHAUX:
12928 case PPC::LHAUX8:
12929 case PPC::LHAX:
12930 case PPC::LHAX8:
12931 case PPC::LWA:
12932 case PPC::LWAUX:
12933 case PPC::LWAX:
12934 case PPC::LWAX_32:
12935 case PPC::LWA_32:
12936 case PPC::PLHA:
12937 case PPC::PLHA8:
12938 case PPC::PLHA8pc:
12939 case PPC::PLHApc:
12940 case PPC::PLWA:
12941 case PPC::PLWA8:
12942 case PPC::PLWA8pc:
12943 case PPC::PLWApc:
12944 case PPC::EXTSB:
12945 case PPC::EXTSB8:
12946 case PPC::EXTSB8_32_64:
12947 case PPC::EXTSB8_rec:
12948 case PPC::EXTSB_rec:
12949 case PPC::EXTSH:
12950 case PPC::EXTSH8:
12951 case PPC::EXTSH8_32_64:
12952 case PPC::EXTSH8_rec:
12953 case PPC::EXTSH_rec:
12954 case PPC::EXTSW:
12955 case PPC::EXTSWSLI:
12956 case PPC::EXTSWSLI_32_64:
12957 case PPC::EXTSWSLI_32_64_rec:
12958 case PPC::EXTSWSLI_rec:
12959 case PPC::EXTSW_32:
12960 case PPC::EXTSW_32_64:
12961 case PPC::EXTSW_32_64_rec:
12962 case PPC::EXTSW_rec:
12963 case PPC::SRAW:
12964 case PPC::SRAWI:
12965 case PPC::SRAWI_rec:
12966 case PPC::SRAW_rec:
12967 return true;
12968 }
12969 return false;
12970}
12971
12972MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
12973 MachineInstr &MI, MachineBasicBlock *BB,
12974 bool is8bit, // operation
12975 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12976 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12977 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12978
12979 // If this is a signed comparison and the value being compared is not known
12980 // to be sign extended, sign extend it here.
12981 DebugLoc dl = MI.getDebugLoc();
12982 MachineFunction *F = BB->getParent();
12983 MachineRegisterInfo &RegInfo = F->getRegInfo();
12984 Register incr = MI.getOperand(i: 3).getReg();
12985 bool IsSignExtended =
12986 incr.isVirtual() && isSignExtended(MI&: *RegInfo.getVRegDef(Reg: incr), TII);
12987
12988 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12989 Register ValueReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
12990 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueReg)
12991 .addReg(RegNo: MI.getOperand(i: 3).getReg());
12992 MI.getOperand(i: 3).setReg(ValueReg);
12993 incr = ValueReg;
12994 }
12995 // If we support part-word atomic mnemonics, just use them
12996 if (Subtarget.hasPartwordAtomics())
12997 return EmitAtomicBinary(MI, BB, AtomicSize: is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12998 CmpPred);
12999
13000 // In 64 bit mode we have to use 64 bits for addresses, even though the
13001 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13002 // registers without caring whether they're 32 or 64, but here we're
13003 // doing actual arithmetic on the addresses.
13004 bool is64bit = Subtarget.isPPC64();
13005 bool isLittleEndian = Subtarget.isLittleEndian();
13006 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13007
13008 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13009 MachineFunction::iterator It = ++BB->getIterator();
13010
13011 Register dest = MI.getOperand(i: 0).getReg();
13012 Register ptrA = MI.getOperand(i: 1).getReg();
13013 Register ptrB = MI.getOperand(i: 2).getReg();
13014
13015 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13016 MachineBasicBlock *loop2MBB =
13017 CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13018 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13019 F->insert(MBBI: It, MBB: loopMBB);
13020 if (CmpOpcode)
13021 F->insert(MBBI: It, MBB: loop2MBB);
13022 F->insert(MBBI: It, MBB: exitMBB);
13023 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13024 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13025 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13026
13027 const TargetRegisterClass *RC =
13028 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13029 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13030
13031 Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
13032 Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13033 Register ShiftReg =
13034 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13035 Register Incr2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13036 Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13037 Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13038 Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13039 Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13040 Register Tmp3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13041 Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13042 Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13043 Register SrwDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13044 Register Ptr1Reg;
13045 Register TmpReg =
13046 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13047
13048 // thisMBB:
13049 // ...
13050 // fallthrough --> loopMBB
13051 BB->addSuccessor(Succ: loopMBB);
13052
13053 // The 4-byte load must be aligned, while a char or short may be
13054 // anywhere in the word. Hence all this nasty bookkeeping code.
13055 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13056 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13057 // xori shift, shift1, 24 [16]
13058 // rlwinm ptr, ptr1, 0, 0, 29
13059 // slw incr2, incr, shift
13060 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13061 // slw mask, mask2, shift
13062 // loopMBB:
13063 // lwarx tmpDest, ptr
13064 // add tmp, tmpDest, incr2
13065 // andc tmp2, tmpDest, mask
13066 // and tmp3, tmp, mask
13067 // or tmp4, tmp3, tmp2
13068 // stwcx. tmp4, ptr
13069 // bne- loopMBB
13070 // fallthrough --> exitMBB
13071 // srw SrwDest, tmpDest, shift
13072 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13073 if (ptrA != ZeroReg) {
13074 Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
13075 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
13076 .addReg(RegNo: ptrA)
13077 .addReg(RegNo: ptrB);
13078 } else {
13079 Ptr1Reg = ptrB;
13080 }
13081 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13082 // mode.
13083 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
13084 .addReg(RegNo: Ptr1Reg, flags: 0, SubReg: is64bit ? PPC::sub_32 : 0)
13085 .addImm(Val: 3)
13086 .addImm(Val: 27)
13087 .addImm(Val: is8bit ? 28 : 27);
13088 if (!isLittleEndian)
13089 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
13090 .addReg(RegNo: Shift1Reg)
13091 .addImm(Val: is8bit ? 24 : 16);
13092 if (is64bit)
13093 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
13094 .addReg(RegNo: Ptr1Reg)
13095 .addImm(Val: 0)
13096 .addImm(Val: 61);
13097 else
13098 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
13099 .addReg(RegNo: Ptr1Reg)
13100 .addImm(Val: 0)
13101 .addImm(Val: 0)
13102 .addImm(Val: 29);
13103 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: Incr2Reg).addReg(RegNo: incr).addReg(RegNo: ShiftReg);
13104 if (is8bit)
13105 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: 255);
13106 else {
13107 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: 0);
13108 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
13109 .addReg(RegNo: Mask3Reg)
13110 .addImm(Val: 65535);
13111 }
13112 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
13113 .addReg(RegNo: Mask2Reg)
13114 .addReg(RegNo: ShiftReg);
13115
13116 BB = loopMBB;
13117 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
13118 .addReg(RegNo: ZeroReg)
13119 .addReg(RegNo: PtrReg);
13120 if (BinOpcode)
13121 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg)
13122 .addReg(RegNo: Incr2Reg)
13123 .addReg(RegNo: TmpDestReg);
13124 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
13125 .addReg(RegNo: TmpDestReg)
13126 .addReg(RegNo: MaskReg);
13127 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: Tmp3Reg).addReg(RegNo: TmpReg).addReg(RegNo: MaskReg);
13128 if (CmpOpcode) {
13129 // For unsigned comparisons, we can directly compare the shifted values.
13130 // For signed comparisons we shift and sign extend.
13131 Register SReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13132 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13133 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: SReg)
13134 .addReg(RegNo: TmpDestReg)
13135 .addReg(RegNo: MaskReg);
13136 unsigned ValueReg = SReg;
13137 unsigned CmpReg = Incr2Reg;
13138 if (CmpOpcode == PPC::CMPW) {
13139 ValueReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13140 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: ValueReg)
13141 .addReg(RegNo: SReg)
13142 .addReg(RegNo: ShiftReg);
13143 Register ValueSReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13144 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueSReg)
13145 .addReg(RegNo: ValueReg);
13146 ValueReg = ValueSReg;
13147 CmpReg = incr;
13148 }
13149 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ValueReg).addReg(RegNo: CmpReg);
13150 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13151 .addImm(Val: CmpPred)
13152 .addReg(RegNo: CrReg)
13153 .addMBB(MBB: exitMBB);
13154 BB->addSuccessor(Succ: loop2MBB);
13155 BB->addSuccessor(Succ: exitMBB);
13156 BB = loop2MBB;
13157 }
13158 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg).addReg(RegNo: Tmp3Reg).addReg(RegNo: Tmp2Reg);
13159 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
13160 .addReg(RegNo: Tmp4Reg)
13161 .addReg(RegNo: ZeroReg)
13162 .addReg(RegNo: PtrReg);
13163 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13164 .addImm(Val: PPC::PRED_NE)
13165 .addReg(RegNo: PPC::CR0)
13166 .addMBB(MBB: loopMBB);
13167 BB->addSuccessor(Succ: loopMBB);
13168 BB->addSuccessor(Succ: exitMBB);
13169
13170 // exitMBB:
13171 // ...
13172 BB = exitMBB;
13173 // Since the shift amount is not a constant, we need to clear
13174 // the upper bits with a separate RLWINM.
13175 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: dest)
13176 .addReg(RegNo: SrwDestReg)
13177 .addImm(Val: 0)
13178 .addImm(Val: is8bit ? 24 : 16)
13179 .addImm(Val: 31);
13180 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: SrwDestReg)
13181 .addReg(RegNo: TmpDestReg)
13182 .addReg(RegNo: ShiftReg);
13183 return BB;
13184}
13185
13186llvm::MachineBasicBlock *
13187PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
13188 MachineBasicBlock *MBB) const {
13189 DebugLoc DL = MI.getDebugLoc();
13190 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13191 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13192
13193 MachineFunction *MF = MBB->getParent();
13194 MachineRegisterInfo &MRI = MF->getRegInfo();
13195
13196 const BasicBlock *BB = MBB->getBasicBlock();
13197 MachineFunction::iterator I = ++MBB->getIterator();
13198
13199 Register DstReg = MI.getOperand(i: 0).getReg();
13200 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
13201 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13202 Register mainDstReg = MRI.createVirtualRegister(RegClass: RC);
13203 Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC);
13204
13205 MVT PVT = getPointerTy(DL: MF->getDataLayout());
13206 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13207 "Invalid Pointer Size!");
13208 // For v = setjmp(buf), we generate
13209 //
13210 // thisMBB:
13211 // SjLjSetup mainMBB
13212 // bl mainMBB
13213 // v_restore = 1
13214 // b sinkMBB
13215 //
13216 // mainMBB:
13217 // buf[LabelOffset] = LR
13218 // v_main = 0
13219 //
13220 // sinkMBB:
13221 // v = phi(main, restore)
13222 //
13223
13224 MachineBasicBlock *thisMBB = MBB;
13225 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13226 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13227 MF->insert(MBBI: I, MBB: mainMBB);
13228 MF->insert(MBBI: I, MBB: sinkMBB);
13229
13230 MachineInstrBuilder MIB;
13231
13232 // Transfer the remainder of BB and its successor edges to sinkMBB.
13233 sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB,
13234 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
13235 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13236
13237 // Note that the structure of the jmp_buf used here is not compatible
13238 // with that used by libc, and is not designed to be. Specifically, it
13239 // stores only those 'reserved' registers that LLVM does not otherwise
13240 // understand how to spill. Also, by convention, by the time this
13241 // intrinsic is called, Clang has already stored the frame address in the
13242 // first slot of the buffer and stack address in the third. Following the
13243 // X86 target code, we'll store the jump address in the second slot. We also
13244 // need to save the TOC pointer (R2) to handle jumps between shared
13245 // libraries, and that will be stored in the fourth slot. The thread
13246 // identifier (R13) is not affected.
13247
13248 // thisMBB:
13249 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13250 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13251 const int64_t BPOffset = 4 * PVT.getStoreSize();
13252
13253 // Prepare IP either in reg.
13254 const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
13255 Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
13256 Register BufReg = MI.getOperand(i: 1).getReg();
13257
13258 if (Subtarget.is64BitELFABI()) {
13259 setUsesTOCBasePtr(*MBB->getParent());
13260 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13261 .addReg(RegNo: PPC::X2)
13262 .addImm(Val: TOCOffset)
13263 .addReg(RegNo: BufReg)
13264 .cloneMemRefs(OtherMI: MI);
13265 }
13266
13267 // Naked functions never have a base pointer, and so we use r1. For all
13268 // other functions, this decision must be delayed until during PEI.
13269 unsigned BaseReg;
13270 if (MF->getFunction().hasFnAttribute(Kind: Attribute::Naked))
13271 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13272 else
13273 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13274
13275 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL,
13276 MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13277 .addReg(RegNo: BaseReg)
13278 .addImm(Val: BPOffset)
13279 .addReg(RegNo: BufReg)
13280 .cloneMemRefs(OtherMI: MI);
13281
13282 // Setup
13283 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::BCLalways)).addMBB(MBB: mainMBB);
13284 MIB.addRegMask(Mask: TRI->getNoPreservedMask());
13285
13286 BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: restoreDstReg).addImm(Val: 1);
13287
13288 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::EH_SjLj_Setup))
13289 .addMBB(MBB: mainMBB);
13290 MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: sinkMBB);
13291
13292 thisMBB->addSuccessor(Succ: mainMBB, Prob: BranchProbability::getZero());
13293 thisMBB->addSuccessor(Succ: sinkMBB, Prob: BranchProbability::getOne());
13294
13295 // mainMBB:
13296 // mainDstReg = 0
13297 MIB =
13298 BuildMI(BB: mainMBB, MIMD: DL,
13299 MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), DestReg: LabelReg);
13300
13301 // Store IP
13302 if (Subtarget.isPPC64()) {
13303 MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13304 .addReg(RegNo: LabelReg)
13305 .addImm(Val: LabelOffset)
13306 .addReg(RegNo: BufReg);
13307 } else {
13308 MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STW))
13309 .addReg(RegNo: LabelReg)
13310 .addImm(Val: LabelOffset)
13311 .addReg(RegNo: BufReg);
13312 }
13313 MIB.cloneMemRefs(OtherMI: MI);
13314
13315 BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: mainDstReg).addImm(Val: 0);
13316 mainMBB->addSuccessor(Succ: sinkMBB);
13317
13318 // sinkMBB:
13319 BuildMI(BB&: *sinkMBB, I: sinkMBB->begin(), MIMD: DL,
13320 MCID: TII->get(Opcode: PPC::PHI), DestReg: DstReg)
13321 .addReg(RegNo: mainDstReg).addMBB(MBB: mainMBB)
13322 .addReg(RegNo: restoreDstReg).addMBB(MBB: thisMBB);
13323
13324 MI.eraseFromParent();
13325 return sinkMBB;
13326}
13327
13328MachineBasicBlock *
13329PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
13330 MachineBasicBlock *MBB) const {
13331 DebugLoc DL = MI.getDebugLoc();
13332 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13333
13334 MachineFunction *MF = MBB->getParent();
13335 MachineRegisterInfo &MRI = MF->getRegInfo();
13336
13337 MVT PVT = getPointerTy(DL: MF->getDataLayout());
13338 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13339 "Invalid Pointer Size!");
13340
13341 const TargetRegisterClass *RC =
13342 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13343 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
13344 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13345 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13346 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13347 unsigned BP =
13348 (PVT == MVT::i64)
13349 ? PPC::X30
13350 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13351 : PPC::R30);
13352
13353 MachineInstrBuilder MIB;
13354
13355 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13356 const int64_t SPOffset = 2 * PVT.getStoreSize();
13357 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13358 const int64_t BPOffset = 4 * PVT.getStoreSize();
13359
13360 Register BufReg = MI.getOperand(i: 0).getReg();
13361
13362 // Reload FP (the jumped-to function may not have had a
13363 // frame pointer, and if so, then its r31 will be restored
13364 // as necessary).
13365 if (PVT == MVT::i64) {
13366 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: FP)
13367 .addImm(Val: 0)
13368 .addReg(RegNo: BufReg);
13369 } else {
13370 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: FP)
13371 .addImm(Val: 0)
13372 .addReg(RegNo: BufReg);
13373 }
13374 MIB.cloneMemRefs(OtherMI: MI);
13375
13376 // Reload IP
13377 if (PVT == MVT::i64) {
13378 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: Tmp)
13379 .addImm(Val: LabelOffset)
13380 .addReg(RegNo: BufReg);
13381 } else {
13382 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: Tmp)
13383 .addImm(Val: LabelOffset)
13384 .addReg(RegNo: BufReg);
13385 }
13386 MIB.cloneMemRefs(OtherMI: MI);
13387
13388 // Reload SP
13389 if (PVT == MVT::i64) {
13390 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: SP)
13391 .addImm(Val: SPOffset)
13392 .addReg(RegNo: BufReg);
13393 } else {
13394 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: SP)
13395 .addImm(Val: SPOffset)
13396 .addReg(RegNo: BufReg);
13397 }
13398 MIB.cloneMemRefs(OtherMI: MI);
13399
13400 // Reload BP
13401 if (PVT == MVT::i64) {
13402 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: BP)
13403 .addImm(Val: BPOffset)
13404 .addReg(RegNo: BufReg);
13405 } else {
13406 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: BP)
13407 .addImm(Val: BPOffset)
13408 .addReg(RegNo: BufReg);
13409 }
13410 MIB.cloneMemRefs(OtherMI: MI);
13411
13412 // Reload TOC
13413 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13414 setUsesTOCBasePtr(*MBB->getParent());
13415 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: PPC::X2)
13416 .addImm(Val: TOCOffset)
13417 .addReg(RegNo: BufReg)
13418 .cloneMemRefs(OtherMI: MI);
13419 }
13420
13421 // Jump
13422 BuildMI(BB&: *MBB, I&: MI, MIMD: DL,
13423 MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(RegNo: Tmp);
13424 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13425
13426 MI.eraseFromParent();
13427 return MBB;
13428}
13429
13430bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
13431 // If the function specifically requests inline stack probes, emit them.
13432 if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
13433 return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
13434 "inline-asm";
13435 return false;
13436}
13437
13438unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
13439 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13440 unsigned StackAlign = TFI->getStackAlignment();
13441 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13442 "Unexpected stack alignment");
13443 // The default stack probe size is 4096 if the function has no
13444 // stack-probe-size attribute.
13445 const Function &Fn = MF.getFunction();
13446 unsigned StackProbeSize =
13447 Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: 4096);
13448 // Round down to the stack alignment.
13449 StackProbeSize &= ~(StackAlign - 1);
13450 return StackProbeSize ? StackProbeSize : StackAlign;
13451}
13452
13453// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13454// into three phases. In the first phase, it uses pseudo instruction
13455// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13456// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13457// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13458// MaxCallFrameSize so that it can calculate correct data area pointer.
13459MachineBasicBlock *
13460PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
13461 MachineBasicBlock *MBB) const {
13462 const bool isPPC64 = Subtarget.isPPC64();
13463 MachineFunction *MF = MBB->getParent();
13464 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13465 DebugLoc DL = MI.getDebugLoc();
13466 const unsigned ProbeSize = getStackProbeSize(MF: *MF);
13467 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13468 MachineRegisterInfo &MRI = MF->getRegInfo();
13469 // The CFG of probing stack looks as
13470 // +-----+
13471 // | MBB |
13472 // +--+--+
13473 // |
13474 // +----v----+
13475 // +--->+ TestMBB +---+
13476 // | +----+----+ |
13477 // | | |
13478 // | +-----v----+ |
13479 // +---+ BlockMBB | |
13480 // +----------+ |
13481 // |
13482 // +---------+ |
13483 // | TailMBB +<--+
13484 // +---------+
13485 // In MBB, calculate previous frame pointer and final stack pointer.
13486 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13487 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13488 // TailMBB is spliced via \p MI.
13489 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13490 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13491 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13492
13493 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13494 MF->insert(MBBI: MBBIter, MBB: TestMBB);
13495 MF->insert(MBBI: MBBIter, MBB: BlockMBB);
13496 MF->insert(MBBI: MBBIter, MBB: TailMBB);
13497
13498 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13499 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13500
13501 Register DstReg = MI.getOperand(i: 0).getReg();
13502 Register NegSizeReg = MI.getOperand(i: 1).getReg();
13503 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13504 Register FinalStackPtr = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13505 Register FramePointer = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13506 Register ActualNegSizeReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13507
13508 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13509 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13510 // NegSize.
13511 unsigned ProbeOpc;
13512 if (!MRI.hasOneNonDBGUse(RegNo: NegSizeReg))
13513 ProbeOpc =
13514 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13515 else
13516 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13517 // and NegSizeReg will be allocated in the same phyreg to avoid
13518 // redundant copy when NegSizeReg has only one use which is current MI and
13519 // will be replaced by PREPARE_PROBED_ALLOCA then.
13520 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13521 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13522 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: ProbeOpc), DestReg: FramePointer)
13523 .addDef(RegNo: ActualNegSizeReg)
13524 .addReg(RegNo: NegSizeReg)
13525 .add(MO: MI.getOperand(i: 2))
13526 .add(MO: MI.getOperand(i: 3));
13527
13528 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13529 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4),
13530 DestReg: FinalStackPtr)
13531 .addReg(RegNo: SPReg)
13532 .addReg(RegNo: ActualNegSizeReg);
13533
13534 // Materialize a scratch register for update.
13535 int64_t NegProbeSize = -(int64_t)ProbeSize;
13536 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13537 Register ScratchReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13538 if (!isInt<16>(x: NegProbeSize)) {
13539 Register TempReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13540 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg)
13541 .addImm(Val: NegProbeSize >> 16);
13542 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI),
13543 DestReg: ScratchReg)
13544 .addReg(RegNo: TempReg)
13545 .addImm(Val: NegProbeSize & 0xFFFF);
13546 } else
13547 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: ScratchReg)
13548 .addImm(Val: NegProbeSize);
13549
13550 {
13551 // Probing leading residual part.
13552 Register Div = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13553 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::DIVD : PPC::DIVW), DestReg: Div)
13554 .addReg(RegNo: ActualNegSizeReg)
13555 .addReg(RegNo: ScratchReg);
13556 Register Mul = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13557 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::MULLD : PPC::MULLW), DestReg: Mul)
13558 .addReg(RegNo: Div)
13559 .addReg(RegNo: ScratchReg);
13560 Register NegMod = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13561 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: NegMod)
13562 .addReg(RegNo: Mul)
13563 .addReg(RegNo: ActualNegSizeReg);
13564 BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13565 .addReg(RegNo: FramePointer)
13566 .addReg(RegNo: SPReg)
13567 .addReg(RegNo: NegMod);
13568 }
13569
13570 {
13571 // Remaining part should be multiple of ProbeSize.
13572 Register CmpResult = MRI.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13573 BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::CMPD : PPC::CMPW), DestReg: CmpResult)
13574 .addReg(RegNo: SPReg)
13575 .addReg(RegNo: FinalStackPtr);
13576 BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
13577 .addImm(Val: PPC::PRED_EQ)
13578 .addReg(RegNo: CmpResult)
13579 .addMBB(MBB: TailMBB);
13580 TestMBB->addSuccessor(Succ: BlockMBB);
13581 TestMBB->addSuccessor(Succ: TailMBB);
13582 }
13583
13584 {
13585 // Touch the block.
13586 // |P...|P...|P...
13587 BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13588 .addReg(RegNo: FramePointer)
13589 .addReg(RegNo: SPReg)
13590 .addReg(RegNo: ScratchReg);
13591 BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: TestMBB);
13592 BlockMBB->addSuccessor(Succ: TestMBB);
13593 }
13594
13595 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13596 // DYNAREAOFFSET pseudo instruction to get the future result.
13597 Register MaxCallFrameSizeReg =
13598 MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13599 BuildMI(BB: TailMBB, MIMD: DL,
13600 MCID: TII->get(Opcode: isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13601 DestReg: MaxCallFrameSizeReg)
13602 .add(MO: MI.getOperand(i: 2))
13603 .add(MO: MI.getOperand(i: 3));
13604 BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), DestReg: DstReg)
13605 .addReg(RegNo: SPReg)
13606 .addReg(RegNo: MaxCallFrameSizeReg);
13607
13608 // Splice instructions after MI to TailMBB.
13609 TailMBB->splice(Where: TailMBB->end(), Other: MBB,
13610 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
13611 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13612 MBB->addSuccessor(Succ: TestMBB);
13613
13614 // Delete the pseudo instruction.
13615 MI.eraseFromParent();
13616
13617 ++NumDynamicAllocaProbed;
13618 return TailMBB;
13619}
13620
13621static bool IsSelectCC(MachineInstr &MI) {
13622 switch (MI.getOpcode()) {
13623 case PPC::SELECT_CC_I4:
13624 case PPC::SELECT_CC_I8:
13625 case PPC::SELECT_CC_F4:
13626 case PPC::SELECT_CC_F8:
13627 case PPC::SELECT_CC_F16:
13628 case PPC::SELECT_CC_VRRC:
13629 case PPC::SELECT_CC_VSFRC:
13630 case PPC::SELECT_CC_VSSRC:
13631 case PPC::SELECT_CC_VSRC:
13632 case PPC::SELECT_CC_SPE4:
13633 case PPC::SELECT_CC_SPE:
13634 return true;
13635 default:
13636 return false;
13637 }
13638}
13639
13640static bool IsSelect(MachineInstr &MI) {
13641 switch (MI.getOpcode()) {
13642 case PPC::SELECT_I4:
13643 case PPC::SELECT_I8:
13644 case PPC::SELECT_F4:
13645 case PPC::SELECT_F8:
13646 case PPC::SELECT_F16:
13647 case PPC::SELECT_SPE:
13648 case PPC::SELECT_SPE4:
13649 case PPC::SELECT_VRRC:
13650 case PPC::SELECT_VSFRC:
13651 case PPC::SELECT_VSSRC:
13652 case PPC::SELECT_VSRC:
13653 return true;
13654 default:
13655 return false;
13656 }
13657}
13658
13659MachineBasicBlock *
13660PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
13661 MachineBasicBlock *BB) const {
13662 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13663 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13664 if (Subtarget.is64BitELFABI() &&
13665 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13666 !Subtarget.isUsingPCRelativeCalls()) {
13667 // Call lowering should have added an r2 operand to indicate a dependence
13668 // on the TOC base pointer value. It can't however, because there is no
13669 // way to mark the dependence as implicit there, and so the stackmap code
13670 // will confuse it with a regular operand. Instead, add the dependence
13671 // here.
13672 MI.addOperand(Op: MachineOperand::CreateReg(Reg: PPC::X2, isDef: false, isImp: true));
13673 }
13674
13675 return emitPatchPoint(MI, MBB: BB);
13676 }
13677
13678 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13679 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13680 return emitEHSjLjSetJmp(MI, MBB: BB);
13681 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13682 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13683 return emitEHSjLjLongJmp(MI, MBB: BB);
13684 }
13685
13686 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13687
13688 // To "insert" these instructions we actually have to insert their
13689 // control-flow patterns.
13690 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13691 MachineFunction::iterator It = ++BB->getIterator();
13692
13693 MachineFunction *F = BB->getParent();
13694 MachineRegisterInfo &MRI = F->getRegInfo();
13695
13696 if (Subtarget.hasISEL() &&
13697 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13698 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13699 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13700 SmallVector<MachineOperand, 2> Cond;
13701 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13702 MI.getOpcode() == PPC::SELECT_CC_I8)
13703 Cond.push_back(Elt: MI.getOperand(i: 4));
13704 else
13705 Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
13706 Cond.push_back(Elt: MI.getOperand(i: 1));
13707
13708 DebugLoc dl = MI.getDebugLoc();
13709 TII->insertSelect(MBB&: *BB, I: MI, DL: dl, DstReg: MI.getOperand(i: 0).getReg(), Cond,
13710 TrueReg: MI.getOperand(i: 2).getReg(), FalseReg: MI.getOperand(i: 3).getReg());
13711 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13712 // The incoming instruction knows the destination vreg to set, the
13713 // condition code register to branch on, the true/false values to
13714 // select between, and a branch opcode to use.
13715
13716 // thisMBB:
13717 // ...
13718 // TrueVal = ...
13719 // cmpTY ccX, r1, r2
13720 // bCC sinkMBB
13721 // fallthrough --> copy0MBB
13722 MachineBasicBlock *thisMBB = BB;
13723 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13724 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13725 DebugLoc dl = MI.getDebugLoc();
13726 F->insert(MBBI: It, MBB: copy0MBB);
13727 F->insert(MBBI: It, MBB: sinkMBB);
13728
13729 if (isPhysRegUsedAfter(Reg: PPC::CARRY, MBI: MI.getIterator())) {
13730 copy0MBB->addLiveIn(PhysReg: PPC::CARRY);
13731 sinkMBB->addLiveIn(PhysReg: PPC::CARRY);
13732 }
13733
13734 // Set the call frame size on entry to the new basic blocks.
13735 // See https://reviews.llvm.org/D156113.
13736 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13737 copy0MBB->setCallFrameSize(CallFrameSize);
13738 sinkMBB->setCallFrameSize(CallFrameSize);
13739
13740 // Transfer the remainder of BB and its successor edges to sinkMBB.
13741 sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13742 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13743 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13744
13745 // Next, add the true and fallthrough blocks as its successors.
13746 BB->addSuccessor(Succ: copy0MBB);
13747 BB->addSuccessor(Succ: sinkMBB);
13748
13749 if (IsSelect(MI)) {
13750 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BC))
13751 .addReg(RegNo: MI.getOperand(i: 1).getReg())
13752 .addMBB(MBB: sinkMBB);
13753 } else {
13754 unsigned SelectPred = MI.getOperand(i: 4).getImm();
13755 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13756 .addImm(Val: SelectPred)
13757 .addReg(RegNo: MI.getOperand(i: 1).getReg())
13758 .addMBB(MBB: sinkMBB);
13759 }
13760
13761 // copy0MBB:
13762 // %FalseValue = ...
13763 // # fallthrough to sinkMBB
13764 BB = copy0MBB;
13765
13766 // Update machine-CFG edges
13767 BB->addSuccessor(Succ: sinkMBB);
13768
13769 // sinkMBB:
13770 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13771 // ...
13772 BB = sinkMBB;
13773 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::PHI), DestReg: MI.getOperand(i: 0).getReg())
13774 .addReg(RegNo: MI.getOperand(i: 3).getReg())
13775 .addMBB(MBB: copy0MBB)
13776 .addReg(RegNo: MI.getOperand(i: 2).getReg())
13777 .addMBB(MBB: thisMBB);
13778 } else if (MI.getOpcode() == PPC::ReadTB) {
13779 // To read the 64-bit time-base register on a 32-bit target, we read the
13780 // two halves. Should the counter have wrapped while it was being read, we
13781 // need to try again.
13782 // ...
13783 // readLoop:
13784 // mfspr Rx,TBU # load from TBU
13785 // mfspr Ry,TB # load from TB
13786 // mfspr Rz,TBU # load from TBU
13787 // cmpw crX,Rx,Rz # check if 'old'='new'
13788 // bne readLoop # branch if they're not equal
13789 // ...
13790
13791 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13792 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13793 DebugLoc dl = MI.getDebugLoc();
13794 F->insert(MBBI: It, MBB: readMBB);
13795 F->insert(MBBI: It, MBB: sinkMBB);
13796
13797 // Transfer the remainder of BB and its successor edges to sinkMBB.
13798 sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13799 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13800 sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13801
13802 BB->addSuccessor(Succ: readMBB);
13803 BB = readMBB;
13804
13805 MachineRegisterInfo &RegInfo = F->getRegInfo();
13806 Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13807 Register LoReg = MI.getOperand(i: 0).getReg();
13808 Register HiReg = MI.getOperand(i: 1).getReg();
13809
13810 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: HiReg).addImm(Val: 269);
13811 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: LoReg).addImm(Val: 268);
13812 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: ReadAgainReg).addImm(Val: 269);
13813
13814 Register CmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13815
13816 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CmpReg)
13817 .addReg(RegNo: HiReg)
13818 .addReg(RegNo: ReadAgainReg);
13819 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13820 .addImm(Val: PPC::PRED_NE)
13821 .addReg(RegNo: CmpReg)
13822 .addMBB(MBB: readMBB);
13823
13824 BB->addSuccessor(Succ: readMBB);
13825 BB->addSuccessor(Succ: sinkMBB);
13826 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13827 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::ADD4);
13828 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13829 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::ADD4);
13830 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13831 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::ADD4);
13832 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13833 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::ADD8);
13834
13835 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13836 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::AND);
13837 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13838 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::AND);
13839 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13840 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::AND);
13841 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13842 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::AND8);
13843
13844 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13845 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::OR);
13846 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13847 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::OR);
13848 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13849 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::OR);
13850 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13851 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::OR8);
13852
13853 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13854 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::XOR);
13855 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13856 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::XOR);
13857 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13858 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::XOR);
13859 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13860 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::XOR8);
13861
13862 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13863 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::NAND);
13864 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13865 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::NAND);
13866 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13867 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::NAND);
13868 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13869 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::NAND8);
13870
13871 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13872 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::SUBF);
13873 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13874 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::SUBF);
13875 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13876 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: PPC::SUBF);
13877 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13878 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: PPC::SUBF8);
13879
13880 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13881 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
13882 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13883 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
13884 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13885 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
13886 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13887 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_LT);
13888
13889 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13890 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
13891 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13892 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
13893 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13894 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
13895 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13896 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_GT);
13897
13898 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13899 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
13900 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13901 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
13902 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13903 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
13904 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13905 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_LT);
13906
13907 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13908 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
13909 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13910 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
13911 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13912 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
13913 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13914 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_GT);
13915
13916 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13917 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: 0);
13918 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13919 BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: 0);
13920 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13921 BB = EmitAtomicBinary(MI, BB, AtomicSize: 4, BinOpcode: 0);
13922 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13923 BB = EmitAtomicBinary(MI, BB, AtomicSize: 8, BinOpcode: 0);
13924 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13925 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13926 (Subtarget.hasPartwordAtomics() &&
13927 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13928 (Subtarget.hasPartwordAtomics() &&
13929 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13930 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13931
13932 auto LoadMnemonic = PPC::LDARX;
13933 auto StoreMnemonic = PPC::STDCX;
13934 switch (MI.getOpcode()) {
13935 default:
13936 llvm_unreachable("Compare and swap of unknown size");
13937 case PPC::ATOMIC_CMP_SWAP_I8:
13938 LoadMnemonic = PPC::LBARX;
13939 StoreMnemonic = PPC::STBCX;
13940 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13941 break;
13942 case PPC::ATOMIC_CMP_SWAP_I16:
13943 LoadMnemonic = PPC::LHARX;
13944 StoreMnemonic = PPC::STHCX;
13945 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13946 break;
13947 case PPC::ATOMIC_CMP_SWAP_I32:
13948 LoadMnemonic = PPC::LWARX;
13949 StoreMnemonic = PPC::STWCX;
13950 break;
13951 case PPC::ATOMIC_CMP_SWAP_I64:
13952 LoadMnemonic = PPC::LDARX;
13953 StoreMnemonic = PPC::STDCX;
13954 break;
13955 }
13956 MachineRegisterInfo &RegInfo = F->getRegInfo();
13957 Register dest = MI.getOperand(i: 0).getReg();
13958 Register ptrA = MI.getOperand(i: 1).getReg();
13959 Register ptrB = MI.getOperand(i: 2).getReg();
13960 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13961 Register oldval = MI.getOperand(i: 3).getReg();
13962 Register newval = MI.getOperand(i: 4).getReg();
13963 DebugLoc dl = MI.getDebugLoc();
13964
13965 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13966 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13967 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13968 F->insert(MBBI: It, MBB: loop1MBB);
13969 F->insert(MBBI: It, MBB: loop2MBB);
13970 F->insert(MBBI: It, MBB: exitMBB);
13971 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13972 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
13973 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13974
13975 // thisMBB:
13976 // ...
13977 // fallthrough --> loopMBB
13978 BB->addSuccessor(Succ: loop1MBB);
13979
13980 // loop1MBB:
13981 // l[bhwd]arx dest, ptr
13982 // cmp[wd] dest, oldval
13983 // bne- exitBB
13984 // loop2MBB:
13985 // st[bhwd]cx. newval, ptr
13986 // bne- loopMBB
13987 // b exitBB
13988 // exitBB:
13989 BB = loop1MBB;
13990 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13991 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::CMPD : PPC::CMPW), DestReg: CrReg)
13992 .addReg(RegNo: dest)
13993 .addReg(RegNo: oldval);
13994 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13995 .addImm(Val: PPC::PRED_NE)
13996 .addReg(RegNo: CrReg)
13997 .addMBB(MBB: exitMBB);
13998 BB->addSuccessor(Succ: loop2MBB);
13999 BB->addSuccessor(Succ: exitMBB);
14000
14001 BB = loop2MBB;
14002 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
14003 .addReg(RegNo: newval)
14004 .addReg(RegNo: ptrA)
14005 .addReg(RegNo: ptrB);
14006 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14007 .addImm(Val: PPC::PRED_NE)
14008 .addReg(RegNo: PPC::CR0)
14009 .addMBB(MBB: loop1MBB);
14010 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14011 BB->addSuccessor(Succ: loop1MBB);
14012 BB->addSuccessor(Succ: exitMBB);
14013
14014 // exitMBB:
14015 // ...
14016 BB = exitMBB;
14017 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14018 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14019 // We must use 64-bit registers for addresses when targeting 64-bit,
14020 // since we're actually doing arithmetic on them. Other registers
14021 // can be 32-bit.
14022 bool is64bit = Subtarget.isPPC64();
14023 bool isLittleEndian = Subtarget.isLittleEndian();
14024 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14025
14026 Register dest = MI.getOperand(i: 0).getReg();
14027 Register ptrA = MI.getOperand(i: 1).getReg();
14028 Register ptrB = MI.getOperand(i: 2).getReg();
14029 Register oldval = MI.getOperand(i: 3).getReg();
14030 Register newval = MI.getOperand(i: 4).getReg();
14031 DebugLoc dl = MI.getDebugLoc();
14032
14033 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14034 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14035 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14036 F->insert(MBBI: It, MBB: loop1MBB);
14037 F->insert(MBBI: It, MBB: loop2MBB);
14038 F->insert(MBBI: It, MBB: exitMBB);
14039 exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14040 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
14041 exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14042
14043 MachineRegisterInfo &RegInfo = F->getRegInfo();
14044 const TargetRegisterClass *RC =
14045 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14046 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14047
14048 Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
14049 Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14050 Register ShiftReg =
14051 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
14052 Register NewVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14053 Register NewVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14054 Register OldVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14055 Register OldVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14056 Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14057 Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14058 Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14059 Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14060 Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14061 Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14062 Register Ptr1Reg;
14063 Register TmpReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14064 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14065 Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14066 // thisMBB:
14067 // ...
14068 // fallthrough --> loopMBB
14069 BB->addSuccessor(Succ: loop1MBB);
14070
14071 // The 4-byte load must be aligned, while a char or short may be
14072 // anywhere in the word. Hence all this nasty bookkeeping code.
14073 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14074 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14075 // xori shift, shift1, 24 [16]
14076 // rlwinm ptr, ptr1, 0, 0, 29
14077 // slw newval2, newval, shift
14078 // slw oldval2, oldval,shift
14079 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14080 // slw mask, mask2, shift
14081 // and newval3, newval2, mask
14082 // and oldval3, oldval2, mask
14083 // loop1MBB:
14084 // lwarx tmpDest, ptr
14085 // and tmp, tmpDest, mask
14086 // cmpw tmp, oldval3
14087 // bne- exitBB
14088 // loop2MBB:
14089 // andc tmp2, tmpDest, mask
14090 // or tmp4, tmp2, newval3
14091 // stwcx. tmp4, ptr
14092 // bne- loop1MBB
14093 // b exitBB
14094 // exitBB:
14095 // srw dest, tmpDest, shift
14096 if (ptrA != ZeroReg) {
14097 Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
14098 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
14099 .addReg(RegNo: ptrA)
14100 .addReg(RegNo: ptrB);
14101 } else {
14102 Ptr1Reg = ptrB;
14103 }
14104
14105 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14106 // mode.
14107 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
14108 .addReg(RegNo: Ptr1Reg, flags: 0, SubReg: is64bit ? PPC::sub_32 : 0)
14109 .addImm(Val: 3)
14110 .addImm(Val: 27)
14111 .addImm(Val: is8bit ? 28 : 27);
14112 if (!isLittleEndian)
14113 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
14114 .addReg(RegNo: Shift1Reg)
14115 .addImm(Val: is8bit ? 24 : 16);
14116 if (is64bit)
14117 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
14118 .addReg(RegNo: Ptr1Reg)
14119 .addImm(Val: 0)
14120 .addImm(Val: 61);
14121 else
14122 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
14123 .addReg(RegNo: Ptr1Reg)
14124 .addImm(Val: 0)
14125 .addImm(Val: 0)
14126 .addImm(Val: 29);
14127 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: NewVal2Reg)
14128 .addReg(RegNo: newval)
14129 .addReg(RegNo: ShiftReg);
14130 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: OldVal2Reg)
14131 .addReg(RegNo: oldval)
14132 .addReg(RegNo: ShiftReg);
14133 if (is8bit)
14134 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: 255);
14135 else {
14136 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: 0);
14137 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
14138 .addReg(RegNo: Mask3Reg)
14139 .addImm(Val: 65535);
14140 }
14141 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
14142 .addReg(RegNo: Mask2Reg)
14143 .addReg(RegNo: ShiftReg);
14144 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: NewVal3Reg)
14145 .addReg(RegNo: NewVal2Reg)
14146 .addReg(RegNo: MaskReg);
14147 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: OldVal3Reg)
14148 .addReg(RegNo: OldVal2Reg)
14149 .addReg(RegNo: MaskReg);
14150
14151 BB = loop1MBB;
14152 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
14153 .addReg(RegNo: ZeroReg)
14154 .addReg(RegNo: PtrReg);
14155 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: TmpReg)
14156 .addReg(RegNo: TmpDestReg)
14157 .addReg(RegNo: MaskReg);
14158 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CrReg)
14159 .addReg(RegNo: TmpReg)
14160 .addReg(RegNo: OldVal3Reg);
14161 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14162 .addImm(Val: PPC::PRED_NE)
14163 .addReg(RegNo: CrReg)
14164 .addMBB(MBB: exitMBB);
14165 BB->addSuccessor(Succ: loop2MBB);
14166 BB->addSuccessor(Succ: exitMBB);
14167
14168 BB = loop2MBB;
14169 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
14170 .addReg(RegNo: TmpDestReg)
14171 .addReg(RegNo: MaskReg);
14172 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg)
14173 .addReg(RegNo: Tmp2Reg)
14174 .addReg(RegNo: NewVal3Reg);
14175 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
14176 .addReg(RegNo: Tmp4Reg)
14177 .addReg(RegNo: ZeroReg)
14178 .addReg(RegNo: PtrReg);
14179 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14180 .addImm(Val: PPC::PRED_NE)
14181 .addReg(RegNo: PPC::CR0)
14182 .addMBB(MBB: loop1MBB);
14183 BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14184 BB->addSuccessor(Succ: loop1MBB);
14185 BB->addSuccessor(Succ: exitMBB);
14186
14187 // exitMBB:
14188 // ...
14189 BB = exitMBB;
14190 BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: dest)
14191 .addReg(RegNo: TmpReg)
14192 .addReg(RegNo: ShiftReg);
14193 } else if (MI.getOpcode() == PPC::FADDrtz) {
14194 // This pseudo performs an FADD with rounding mode temporarily forced
14195 // to round-to-zero. We emit this via custom inserter since the FPSCR
14196 // is not modeled at the SelectionDAG level.
14197 Register Dest = MI.getOperand(i: 0).getReg();
14198 Register Src1 = MI.getOperand(i: 1).getReg();
14199 Register Src2 = MI.getOperand(i: 2).getReg();
14200 DebugLoc dl = MI.getDebugLoc();
14201
14202 MachineRegisterInfo &RegInfo = F->getRegInfo();
14203 Register MFFSReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14204
14205 // Save FPSCR value.
14206 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: MFFSReg);
14207
14208 // Set rounding mode to round-to-zero.
14209 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB1))
14210 .addImm(Val: 31)
14211 .addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14212
14213 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB0))
14214 .addImm(Val: 30)
14215 .addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14216
14217 // Perform addition.
14218 auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::FADD), DestReg: Dest)
14219 .addReg(RegNo: Src1)
14220 .addReg(RegNo: Src2);
14221 if (MI.getFlag(Flag: MachineInstr::NoFPExcept))
14222 MIB.setMIFlag(MachineInstr::NoFPExcept);
14223
14224 // Restore FPSCR value.
14225 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSFb)).addImm(Val: 1).addReg(RegNo: MFFSReg);
14226 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14227 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14228 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14229 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14230 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14231 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14232 ? PPC::ANDI8_rec
14233 : PPC::ANDI_rec;
14234 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14235 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14236
14237 MachineRegisterInfo &RegInfo = F->getRegInfo();
14238 Register Dest = RegInfo.createVirtualRegister(
14239 RegClass: Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14240
14241 DebugLoc Dl = MI.getDebugLoc();
14242 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode), DestReg: Dest)
14243 .addReg(RegNo: MI.getOperand(i: 1).getReg())
14244 .addImm(Val: 1);
14245 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14246 DestReg: MI.getOperand(i: 0).getReg())
14247 .addReg(RegNo: IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14248 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14249 DebugLoc Dl = MI.getDebugLoc();
14250 MachineRegisterInfo &RegInfo = F->getRegInfo();
14251 Register CRReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14252 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TCHECK), DestReg: CRReg);
14253 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14254 DestReg: MI.getOperand(i: 0).getReg())
14255 .addReg(RegNo: CRReg);
14256 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14257 DebugLoc Dl = MI.getDebugLoc();
14258 unsigned Imm = MI.getOperand(i: 1).getImm();
14259 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TBEGIN)).addImm(Val: Imm);
14260 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14261 DestReg: MI.getOperand(i: 0).getReg())
14262 .addReg(RegNo: PPC::CR0EQ);
14263 } else if (MI.getOpcode() == PPC::SETRNDi) {
14264 DebugLoc dl = MI.getDebugLoc();
14265 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14266
14267 // Save FPSCR value.
14268 if (MRI.use_empty(RegNo: OldFPSCRReg))
14269 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14270 else
14271 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14272
14273 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14274 // the following settings:
14275 // 00 Round to nearest
14276 // 01 Round to 0
14277 // 10 Round to +inf
14278 // 11 Round to -inf
14279
14280 // When the operand is immediate, using the two least significant bits of
14281 // the immediate to set the bits 62:63 of FPSCR.
14282 unsigned Mode = MI.getOperand(i: 1).getImm();
14283 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14284 .addImm(Val: 31)
14285 .addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14286
14287 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14288 .addImm(Val: 30)
14289 .addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14290 } else if (MI.getOpcode() == PPC::SETRND) {
14291 DebugLoc dl = MI.getDebugLoc();
14292
14293 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14294 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14295 // If the target doesn't have DirectMove, we should use stack to do the
14296 // conversion, because the target doesn't have the instructions like mtvsrd
14297 // or mfvsrd to do this conversion directly.
14298 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14299 if (Subtarget.hasDirectMove()) {
14300 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg)
14301 .addReg(RegNo: SrcReg);
14302 } else {
14303 // Use stack to do the register copy.
14304 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14305 MachineRegisterInfo &RegInfo = F->getRegInfo();
14306 const TargetRegisterClass *RC = RegInfo.getRegClass(Reg: SrcReg);
14307 if (RC == &PPC::F8RCRegClass) {
14308 // Copy register from F8RCRegClass to G8RCRegclass.
14309 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14310 "Unsupported RegClass.");
14311
14312 StoreOp = PPC::STFD;
14313 LoadOp = PPC::LD;
14314 } else {
14315 // Copy register from G8RCRegClass to F8RCRegclass.
14316 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14317 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14318 "Unsupported RegClass.");
14319 }
14320
14321 MachineFrameInfo &MFI = F->getFrameInfo();
14322 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
14323
14324 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14325 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: 0),
14326 F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14327 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14328
14329 // Store the SrcReg into the stack.
14330 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: StoreOp))
14331 .addReg(RegNo: SrcReg)
14332 .addImm(Val: 0)
14333 .addFrameIndex(Idx: FrameIdx)
14334 .addMemOperand(MMO: MMOStore);
14335
14336 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14337 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: 0),
14338 F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14339 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14340
14341 // Load from the stack where SrcReg is stored, and save to DestReg,
14342 // so we have done the RegClass conversion from RegClass::SrcReg to
14343 // RegClass::DestReg.
14344 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: LoadOp), DestReg)
14345 .addImm(Val: 0)
14346 .addFrameIndex(Idx: FrameIdx)
14347 .addMemOperand(MMO: MMOLoad);
14348 }
14349 };
14350
14351 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14352
14353 // Save FPSCR value.
14354 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14355
14356 // When the operand is gprc register, use two least significant bits of the
14357 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14358 //
14359 // copy OldFPSCRTmpReg, OldFPSCRReg
14360 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14361 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14362 // copy NewFPSCRReg, NewFPSCRTmpReg
14363 // mtfsf 255, NewFPSCRReg
14364 MachineOperand SrcOp = MI.getOperand(i: 1);
14365 MachineRegisterInfo &RegInfo = F->getRegInfo();
14366 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14367
14368 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14369
14370 Register ImDefReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14371 Register ExtSrcReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14372
14373 // The first operand of INSERT_SUBREG should be a register which has
14374 // subregisters, we only care about its RegClass, so we should use an
14375 // IMPLICIT_DEF register.
14376 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImDefReg);
14377 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::INSERT_SUBREG), DestReg: ExtSrcReg)
14378 .addReg(RegNo: ImDefReg)
14379 .add(MO: SrcOp)
14380 .addImm(Val: 1);
14381
14382 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14383 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDIMI), DestReg: NewFPSCRTmpReg)
14384 .addReg(RegNo: OldFPSCRTmpReg)
14385 .addReg(RegNo: ExtSrcReg)
14386 .addImm(Val: 0)
14387 .addImm(Val: 62);
14388
14389 Register NewFPSCRReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14390 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14391
14392 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14393 // bits of FPSCR.
14394 BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSF))
14395 .addImm(Val: 255)
14396 .addReg(RegNo: NewFPSCRReg)
14397 .addImm(Val: 0)
14398 .addImm(Val: 0);
14399 } else if (MI.getOpcode() == PPC::SETFLM) {
14400 DebugLoc Dl = MI.getDebugLoc();
14401
14402 // Result of setflm is previous FPSCR content, so we need to save it first.
14403 Register OldFPSCRReg = MI.getOperand(i: 0).getReg();
14404 if (MRI.use_empty(RegNo: OldFPSCRReg))
14405 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14406 else
14407 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14408
14409 // Put bits in 32:63 to FPSCR.
14410 Register NewFPSCRReg = MI.getOperand(i: 1).getReg();
14411 BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MTFSF))
14412 .addImm(Val: 255)
14413 .addReg(RegNo: NewFPSCRReg)
14414 .addImm(Val: 0)
14415 .addImm(Val: 0);
14416 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14417 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14418 return emitProbedAlloca(MI, MBB: BB);
14419 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14420 DebugLoc DL = MI.getDebugLoc();
14421 Register Src = MI.getOperand(i: 2).getReg();
14422 Register Lo = MI.getOperand(i: 0).getReg();
14423 Register Hi = MI.getOperand(i: 1).getReg();
14424 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14425 .addDef(RegNo: Lo)
14426 .addUse(RegNo: Src, Flags: 0, SubReg: PPC::sub_gp8_x1);
14427 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14428 .addDef(RegNo: Hi)
14429 .addUse(RegNo: Src, Flags: 0, SubReg: PPC::sub_gp8_x0);
14430 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14431 MI.getOpcode() == PPC::STQX_PSEUDO) {
14432 DebugLoc DL = MI.getDebugLoc();
14433 // Ptr is used as the ptr_rc_no_r0 part
14434 // of LQ/STQ's memory operand and adding result of RA and RB,
14435 // so it has to be g8rc_and_g8rc_nox0.
14436 Register Ptr =
14437 F->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
14438 Register Val = MI.getOperand(i: 0).getReg();
14439 Register RA = MI.getOperand(i: 1).getReg();
14440 Register RB = MI.getOperand(i: 2).getReg();
14441 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::ADD8), DestReg: Ptr).addReg(RegNo: RA).addReg(RegNo: RB);
14442 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
14443 MCID: MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(Opcode: PPC::LQ)
14444 : TII->get(Opcode: PPC::STQ))
14445 .addReg(RegNo: Val, flags: MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14446 .addImm(Val: 0)
14447 .addReg(RegNo: Ptr);
14448 } else {
14449 llvm_unreachable("Unexpected instr type to insert");
14450 }
14451
14452 MI.eraseFromParent(); // The pseudo instruction is gone now.
14453 return BB;
14454}
14455
14456//===----------------------------------------------------------------------===//
14457// Target Optimization Hooks
14458//===----------------------------------------------------------------------===//
14459
14460static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14461 // For the estimates, convergence is quadratic, so we essentially double the
14462 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14463 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14464 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14465 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14466 if (VT.getScalarType() == MVT::f64)
14467 RefinementSteps++;
14468 return RefinementSteps;
14469}
14470
14471SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14472 const DenormalMode &Mode) const {
14473 // We only have VSX Vector Test for software Square Root.
14474 EVT VT = Op.getValueType();
14475 if (!isTypeLegal(VT: MVT::i1) ||
14476 (VT != MVT::f64 &&
14477 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14478 return TargetLowering::getSqrtInputTest(Operand: Op, DAG, Mode);
14479
14480 SDLoc DL(Op);
14481 // The output register of FTSQRT is CR field.
14482 SDValue FTSQRT = DAG.getNode(Opcode: PPCISD::FTSQRT, DL, VT: MVT::i32, Operand: Op);
14483 // ftsqrt BF,FRB
14484 // Let e_b be the unbiased exponent of the double-precision
14485 // floating-point operand in register FRB.
14486 // fe_flag is set to 1 if either of the following conditions occurs.
14487 // - The double-precision floating-point operand in register FRB is a zero,
14488 // a NaN, or an infinity, or a negative value.
14489 // - e_b is less than or equal to -970.
14490 // Otherwise fe_flag is set to 0.
14491 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14492 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14493 // exponent is less than -970)
14494 SDValue SRIdxVal = DAG.getTargetConstant(Val: PPC::sub_eq, DL, VT: MVT::i32);
14495 return SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i1,
14496 Op1: FTSQRT, Op2: SRIdxVal),
14497 0);
14498}
14499
14500SDValue
14501PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14502 SelectionDAG &DAG) const {
14503 // We only have VSX Vector Square Root.
14504 EVT VT = Op.getValueType();
14505 if (VT != MVT::f64 &&
14506 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14507 return TargetLowering::getSqrtResultForDenormInput(Operand: Op, DAG);
14508
14509 return DAG.getNode(Opcode: PPCISD::FSQRT, DL: SDLoc(Op), VT, Operand: Op);
14510}
14511
14512SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14513 int Enabled, int &RefinementSteps,
14514 bool &UseOneConstNR,
14515 bool Reciprocal) const {
14516 EVT VT = Operand.getValueType();
14517 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14518 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14519 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14520 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14521 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14522 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14523
14524 // The Newton-Raphson computation with a single constant does not provide
14525 // enough accuracy on some CPUs.
14526 UseOneConstNR = !Subtarget.needsTwoConstNR();
14527 return DAG.getNode(Opcode: PPCISD::FRSQRTE, DL: SDLoc(Operand), VT, Operand);
14528 }
14529 return SDValue();
14530}
14531
14532SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14533 int Enabled,
14534 int &RefinementSteps) const {
14535 EVT VT = Operand.getValueType();
14536 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14537 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14538 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14539 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14540 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14541 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14542 return DAG.getNode(Opcode: PPCISD::FRE, DL: SDLoc(Operand), VT, Operand);
14543 }
14544 return SDValue();
14545}
14546
14547unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14548 // Note: This functionality is used only when unsafe-fp-math is enabled, and
14549 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14550 // enabled for division), this functionality is redundant with the default
14551 // combiner logic (once the division -> reciprocal/multiply transformation
14552 // has taken place). As a result, this matters more for older cores than for
14553 // newer ones.
14554
14555 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14556 // reciprocal if there are two or more FDIVs (for embedded cores with only
14557 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14558 switch (Subtarget.getCPUDirective()) {
14559 default:
14560 return 3;
14561 case PPC::DIR_440:
14562 case PPC::DIR_A2:
14563 case PPC::DIR_E500:
14564 case PPC::DIR_E500mc:
14565 case PPC::DIR_E5500:
14566 return 2;
14567 }
14568}
14569
14570// isConsecutiveLSLoc needs to work even if all adds have not yet been
14571// collapsed, and so we need to look through chains of them.
14572static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
14573 int64_t& Offset, SelectionDAG &DAG) {
14574 if (DAG.isBaseWithConstantOffset(Op: Loc)) {
14575 Base = Loc.getOperand(i: 0);
14576 Offset += cast<ConstantSDNode>(Val: Loc.getOperand(i: 1))->getSExtValue();
14577
14578 // The base might itself be a base plus an offset, and if so, accumulate
14579 // that as well.
14580 getBaseWithConstantOffset(Loc: Loc.getOperand(i: 0), Base, Offset, DAG);
14581 }
14582}
14583
14584static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
14585 unsigned Bytes, int Dist,
14586 SelectionDAG &DAG) {
14587 if (VT.getSizeInBits() / 8 != Bytes)
14588 return false;
14589
14590 SDValue BaseLoc = Base->getBasePtr();
14591 if (Loc.getOpcode() == ISD::FrameIndex) {
14592 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14593 return false;
14594 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14595 int FI = cast<FrameIndexSDNode>(Val&: Loc)->getIndex();
14596 int BFI = cast<FrameIndexSDNode>(Val&: BaseLoc)->getIndex();
14597 int FS = MFI.getObjectSize(ObjectIdx: FI);
14598 int BFS = MFI.getObjectSize(ObjectIdx: BFI);
14599 if (FS != BFS || FS != (int)Bytes) return false;
14600 return MFI.getObjectOffset(ObjectIdx: FI) == (MFI.getObjectOffset(ObjectIdx: BFI) + Dist*Bytes);
14601 }
14602
14603 SDValue Base1 = Loc, Base2 = BaseLoc;
14604 int64_t Offset1 = 0, Offset2 = 0;
14605 getBaseWithConstantOffset(Loc, Base&: Base1, Offset&: Offset1, DAG);
14606 getBaseWithConstantOffset(Loc: BaseLoc, Base&: Base2, Offset&: Offset2, DAG);
14607 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14608 return true;
14609
14610 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14611 const GlobalValue *GV1 = nullptr;
14612 const GlobalValue *GV2 = nullptr;
14613 Offset1 = 0;
14614 Offset2 = 0;
14615 bool isGA1 = TLI.isGAPlusOffset(N: Loc.getNode(), GA&: GV1, Offset&: Offset1);
14616 bool isGA2 = TLI.isGAPlusOffset(N: BaseLoc.getNode(), GA&: GV2, Offset&: Offset2);
14617 if (isGA1 && isGA2 && GV1 == GV2)
14618 return Offset1 == (Offset2 + Dist*Bytes);
14619 return false;
14620}
14621
14622// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14623// not enforce equality of the chain operands.
14624static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
14625 unsigned Bytes, int Dist,
14626 SelectionDAG &DAG) {
14627 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) {
14628 EVT VT = LS->getMemoryVT();
14629 SDValue Loc = LS->getBasePtr();
14630 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14631 }
14632
14633 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14634 EVT VT;
14635 switch (N->getConstantOperandVal(Num: 1)) {
14636 default: return false;
14637 case Intrinsic::ppc_altivec_lvx:
14638 case Intrinsic::ppc_altivec_lvxl:
14639 case Intrinsic::ppc_vsx_lxvw4x:
14640 case Intrinsic::ppc_vsx_lxvw4x_be:
14641 VT = MVT::v4i32;
14642 break;
14643 case Intrinsic::ppc_vsx_lxvd2x:
14644 case Intrinsic::ppc_vsx_lxvd2x_be:
14645 VT = MVT::v2f64;
14646 break;
14647 case Intrinsic::ppc_altivec_lvebx:
14648 VT = MVT::i8;
14649 break;
14650 case Intrinsic::ppc_altivec_lvehx:
14651 VT = MVT::i16;
14652 break;
14653 case Intrinsic::ppc_altivec_lvewx:
14654 VT = MVT::i32;
14655 break;
14656 }
14657
14658 return isConsecutiveLSLoc(Loc: N->getOperand(Num: 2), VT, Base, Bytes, Dist, DAG);
14659 }
14660
14661 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14662 EVT VT;
14663 switch (N->getConstantOperandVal(Num: 1)) {
14664 default: return false;
14665 case Intrinsic::ppc_altivec_stvx:
14666 case Intrinsic::ppc_altivec_stvxl:
14667 case Intrinsic::ppc_vsx_stxvw4x:
14668 VT = MVT::v4i32;
14669 break;
14670 case Intrinsic::ppc_vsx_stxvd2x:
14671 VT = MVT::v2f64;
14672 break;
14673 case Intrinsic::ppc_vsx_stxvw4x_be:
14674 VT = MVT::v4i32;
14675 break;
14676 case Intrinsic::ppc_vsx_stxvd2x_be:
14677 VT = MVT::v2f64;
14678 break;
14679 case Intrinsic::ppc_altivec_stvebx:
14680 VT = MVT::i8;
14681 break;
14682 case Intrinsic::ppc_altivec_stvehx:
14683 VT = MVT::i16;
14684 break;
14685 case Intrinsic::ppc_altivec_stvewx:
14686 VT = MVT::i32;
14687 break;
14688 }
14689
14690 return isConsecutiveLSLoc(Loc: N->getOperand(Num: 3), VT, Base, Bytes, Dist, DAG);
14691 }
14692
14693 return false;
14694}
14695
14696// Return true is there is a nearyby consecutive load to the one provided
14697// (regardless of alignment). We search up and down the chain, looking though
14698// token factors and other loads (but nothing else). As a result, a true result
14699// indicates that it is safe to create a new consecutive load adjacent to the
14700// load provided.
14701static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
14702 SDValue Chain = LD->getChain();
14703 EVT VT = LD->getMemoryVT();
14704
14705 SmallSet<SDNode *, 16> LoadRoots;
14706 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14707 SmallSet<SDNode *, 16> Visited;
14708
14709 // First, search up the chain, branching to follow all token-factor operands.
14710 // If we find a consecutive load, then we're done, otherwise, record all
14711 // nodes just above the top-level loads and token factors.
14712 while (!Queue.empty()) {
14713 SDNode *ChainNext = Queue.pop_back_val();
14714 if (!Visited.insert(Ptr: ChainNext).second)
14715 continue;
14716
14717 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: ChainNext)) {
14718 if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: 1, DAG))
14719 return true;
14720
14721 if (!Visited.count(Ptr: ChainLD->getChain().getNode()))
14722 Queue.push_back(Elt: ChainLD->getChain().getNode());
14723 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14724 for (const SDUse &O : ChainNext->ops())
14725 if (!Visited.count(Ptr: O.getNode()))
14726 Queue.push_back(Elt: O.getNode());
14727 } else
14728 LoadRoots.insert(Ptr: ChainNext);
14729 }
14730
14731 // Second, search down the chain, starting from the top-level nodes recorded
14732 // in the first phase. These top-level nodes are the nodes just above all
14733 // loads and token factors. Starting with their uses, recursively look though
14734 // all loads (just the chain uses) and token factors to find a consecutive
14735 // load.
14736 Visited.clear();
14737 Queue.clear();
14738
14739 for (SDNode *I : LoadRoots) {
14740 Queue.push_back(Elt: I);
14741
14742 while (!Queue.empty()) {
14743 SDNode *LoadRoot = Queue.pop_back_val();
14744 if (!Visited.insert(Ptr: LoadRoot).second)
14745 continue;
14746
14747 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: LoadRoot))
14748 if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: 1, DAG))
14749 return true;
14750
14751 for (SDNode *U : LoadRoot->users())
14752 if (((isa<MemSDNode>(Val: U) &&
14753 cast<MemSDNode>(Val: U)->getChain().getNode() == LoadRoot) ||
14754 U->getOpcode() == ISD::TokenFactor) &&
14755 !Visited.count(Ptr: U))
14756 Queue.push_back(Elt: U);
14757 }
14758 }
14759
14760 return false;
14761}
14762
14763/// This function is called when we have proved that a SETCC node can be replaced
14764/// by subtraction (and other supporting instructions) so that the result of
14765/// comparison is kept in a GPR instead of CR. This function is purely for
14766/// codegen purposes and has some flags to guide the codegen process.
14767static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14768 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14769 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14770
14771 // Zero extend the operands to the largest legal integer. Originally, they
14772 // must be of a strictly smaller size.
14773 auto Op0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: 0),
14774 N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
14775 auto Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: 1),
14776 N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
14777
14778 // Swap if needed. Depends on the condition code.
14779 if (Swap)
14780 std::swap(a&: Op0, b&: Op1);
14781
14782 // Subtract extended integers.
14783 auto SubNode = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: Op0, N2: Op1);
14784
14785 // Move the sign bit to the least significant position and zero out the rest.
14786 // Now the least significant bit carries the result of original comparison.
14787 auto Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SubNode,
14788 N2: DAG.getConstant(Val: Size - 1, DL, VT: MVT::i32));
14789 auto Final = Shifted;
14790
14791 // Complement the result if needed. Based on the condition code.
14792 if (Complement)
14793 Final = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Shifted,
14794 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i64));
14795
14796 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Final);
14797}
14798
14799SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14800 DAGCombinerInfo &DCI) const {
14801 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14802
14803 SelectionDAG &DAG = DCI.DAG;
14804 SDLoc DL(N);
14805
14806 // Size of integers being compared has a critical role in the following
14807 // analysis, so we prefer to do this when all types are legal.
14808 if (!DCI.isAfterLegalizeDAG())
14809 return SDValue();
14810
14811 // If all users of SETCC extend its value to a legal integer type
14812 // then we replace SETCC with a subtraction
14813 for (const SDNode *U : N->users())
14814 if (U->getOpcode() != ISD::ZERO_EXTEND)
14815 return SDValue();
14816
14817 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
14818 auto OpSize = N->getOperand(Num: 0).getValueSizeInBits();
14819
14820 unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
14821
14822 if (OpSize < Size) {
14823 switch (CC) {
14824 default: break;
14825 case ISD::SETULT:
14826 return generateEquivalentSub(N, Size, Complement: false, Swap: false, DL, DAG);
14827 case ISD::SETULE:
14828 return generateEquivalentSub(N, Size, Complement: true, Swap: true, DL, DAG);
14829 case ISD::SETUGT:
14830 return generateEquivalentSub(N, Size, Complement: false, Swap: true, DL, DAG);
14831 case ISD::SETUGE:
14832 return generateEquivalentSub(N, Size, Complement: true, Swap: false, DL, DAG);
14833 }
14834 }
14835
14836 return SDValue();
14837}
14838
14839SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14840 DAGCombinerInfo &DCI) const {
14841 SelectionDAG &DAG = DCI.DAG;
14842 SDLoc dl(N);
14843
14844 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14845 // If we're tracking CR bits, we need to be careful that we don't have:
14846 // trunc(binary-ops(zext(x), zext(y)))
14847 // or
14848 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14849 // such that we're unnecessarily moving things into GPRs when it would be
14850 // better to keep them in CR bits.
14851
14852 // Note that trunc here can be an actual i1 trunc, or can be the effective
14853 // truncation that comes from a setcc or select_cc.
14854 if (N->getOpcode() == ISD::TRUNCATE &&
14855 N->getValueType(ResNo: 0) != MVT::i1)
14856 return SDValue();
14857
14858 if (N->getOperand(Num: 0).getValueType() != MVT::i32 &&
14859 N->getOperand(Num: 0).getValueType() != MVT::i64)
14860 return SDValue();
14861
14862 if (N->getOpcode() == ISD::SETCC ||
14863 N->getOpcode() == ISD::SELECT_CC) {
14864 // If we're looking at a comparison, then we need to make sure that the
14865 // high bits (all except for the first) don't matter the result.
14866 ISD::CondCode CC =
14867 cast<CondCodeSDNode>(Val: N->getOperand(
14868 Num: N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14869 unsigned OpBits = N->getOperand(Num: 0).getValueSizeInBits();
14870
14871 if (ISD::isSignedIntSetCC(Code: CC)) {
14872 if (DAG.ComputeNumSignBits(Op: N->getOperand(Num: 0)) != OpBits ||
14873 DAG.ComputeNumSignBits(Op: N->getOperand(Num: 1)) != OpBits)
14874 return SDValue();
14875 } else if (ISD::isUnsignedIntSetCC(Code: CC)) {
14876 if (!DAG.MaskedValueIsZero(Op: N->getOperand(Num: 0),
14877 Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-1)) ||
14878 !DAG.MaskedValueIsZero(Op: N->getOperand(Num: 1),
14879 Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-1)))
14880 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14881 : SDValue());
14882 } else {
14883 // This is neither a signed nor an unsigned comparison, just make sure
14884 // that the high bits are equal.
14885 KnownBits Op1Known = DAG.computeKnownBits(Op: N->getOperand(Num: 0));
14886 KnownBits Op2Known = DAG.computeKnownBits(Op: N->getOperand(Num: 1));
14887
14888 // We don't really care about what is known about the first bit (if
14889 // anything), so pretend that it is known zero for both to ensure they can
14890 // be compared as constants.
14891 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(BitPosition: 0);
14892 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(BitPosition: 0);
14893
14894 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14895 Op1Known.getConstant() != Op2Known.getConstant())
14896 return SDValue();
14897 }
14898 }
14899
14900 // We now know that the higher-order bits are irrelevant, we just need to
14901 // make sure that all of the intermediate operations are bit operations, and
14902 // all inputs are extensions.
14903 if (N->getOperand(Num: 0).getOpcode() != ISD::AND &&
14904 N->getOperand(Num: 0).getOpcode() != ISD::OR &&
14905 N->getOperand(Num: 0).getOpcode() != ISD::XOR &&
14906 N->getOperand(Num: 0).getOpcode() != ISD::SELECT &&
14907 N->getOperand(Num: 0).getOpcode() != ISD::SELECT_CC &&
14908 N->getOperand(Num: 0).getOpcode() != ISD::TRUNCATE &&
14909 N->getOperand(Num: 0).getOpcode() != ISD::SIGN_EXTEND &&
14910 N->getOperand(Num: 0).getOpcode() != ISD::ZERO_EXTEND &&
14911 N->getOperand(Num: 0).getOpcode() != ISD::ANY_EXTEND)
14912 return SDValue();
14913
14914 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14915 N->getOperand(Num: 1).getOpcode() != ISD::AND &&
14916 N->getOperand(Num: 1).getOpcode() != ISD::OR &&
14917 N->getOperand(Num: 1).getOpcode() != ISD::XOR &&
14918 N->getOperand(Num: 1).getOpcode() != ISD::SELECT &&
14919 N->getOperand(Num: 1).getOpcode() != ISD::SELECT_CC &&
14920 N->getOperand(Num: 1).getOpcode() != ISD::TRUNCATE &&
14921 N->getOperand(Num: 1).getOpcode() != ISD::SIGN_EXTEND &&
14922 N->getOperand(Num: 1).getOpcode() != ISD::ZERO_EXTEND &&
14923 N->getOperand(Num: 1).getOpcode() != ISD::ANY_EXTEND)
14924 return SDValue();
14925
14926 SmallVector<SDValue, 4> Inputs;
14927 SmallVector<SDValue, 8> BinOps, PromOps;
14928 SmallPtrSet<SDNode *, 16> Visited;
14929
14930 for (unsigned i = 0; i < 2; ++i) {
14931 if (((N->getOperand(Num: i).getOpcode() == ISD::SIGN_EXTEND ||
14932 N->getOperand(Num: i).getOpcode() == ISD::ZERO_EXTEND ||
14933 N->getOperand(Num: i).getOpcode() == ISD::ANY_EXTEND) &&
14934 N->getOperand(Num: i).getOperand(i: 0).getValueType() == MVT::i1) ||
14935 isa<ConstantSDNode>(Val: N->getOperand(Num: i)))
14936 Inputs.push_back(Elt: N->getOperand(Num: i));
14937 else
14938 BinOps.push_back(Elt: N->getOperand(Num: i));
14939
14940 if (N->getOpcode() == ISD::TRUNCATE)
14941 break;
14942 }
14943
14944 // Visit all inputs, collect all binary operations (and, or, xor and
14945 // select) that are all fed by extensions.
14946 while (!BinOps.empty()) {
14947 SDValue BinOp = BinOps.pop_back_val();
14948
14949 if (!Visited.insert(Ptr: BinOp.getNode()).second)
14950 continue;
14951
14952 PromOps.push_back(Elt: BinOp);
14953
14954 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14955 // The condition of the select is not promoted.
14956 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14957 continue;
14958 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14959 continue;
14960
14961 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14962 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14963 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14964 BinOp.getOperand(i).getOperand(i: 0).getValueType() == MVT::i1) ||
14965 isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
14966 Inputs.push_back(Elt: BinOp.getOperand(i));
14967 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14968 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14969 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14970 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14971 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14972 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14973 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14974 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14975 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14976 BinOps.push_back(Elt: BinOp.getOperand(i));
14977 } else {
14978 // We have an input that is not an extension or another binary
14979 // operation; we'll abort this transformation.
14980 return SDValue();
14981 }
14982 }
14983 }
14984
14985 // Make sure that this is a self-contained cluster of operations (which
14986 // is not quite the same thing as saying that everything has only one
14987 // use).
14988 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14989 if (isa<ConstantSDNode>(Val: Inputs[i]))
14990 continue;
14991
14992 for (const SDNode *User : Inputs[i].getNode()->users()) {
14993 if (User != N && !Visited.count(Ptr: User))
14994 return SDValue();
14995
14996 // Make sure that we're not going to promote the non-output-value
14997 // operand(s) or SELECT or SELECT_CC.
14998 // FIXME: Although we could sometimes handle this, and it does occur in
14999 // practice that one of the condition inputs to the select is also one of
15000 // the outputs, we currently can't deal with this.
15001 if (User->getOpcode() == ISD::SELECT) {
15002 if (User->getOperand(Num: 0) == Inputs[i])
15003 return SDValue();
15004 } else if (User->getOpcode() == ISD::SELECT_CC) {
15005 if (User->getOperand(Num: 0) == Inputs[i] ||
15006 User->getOperand(Num: 1) == Inputs[i])
15007 return SDValue();
15008 }
15009 }
15010 }
15011
15012 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15013 for (const SDNode *User : PromOps[i].getNode()->users()) {
15014 if (User != N && !Visited.count(Ptr: User))
15015 return SDValue();
15016
15017 // Make sure that we're not going to promote the non-output-value
15018 // operand(s) or SELECT or SELECT_CC.
15019 // FIXME: Although we could sometimes handle this, and it does occur in
15020 // practice that one of the condition inputs to the select is also one of
15021 // the outputs, we currently can't deal with this.
15022 if (User->getOpcode() == ISD::SELECT) {
15023 if (User->getOperand(Num: 0) == PromOps[i])
15024 return SDValue();
15025 } else if (User->getOpcode() == ISD::SELECT_CC) {
15026 if (User->getOperand(Num: 0) == PromOps[i] ||
15027 User->getOperand(Num: 1) == PromOps[i])
15028 return SDValue();
15029 }
15030 }
15031 }
15032
15033 // Replace all inputs with the extension operand.
15034 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15035 // Constants may have users outside the cluster of to-be-promoted nodes,
15036 // and so we need to replace those as we do the promotions.
15037 if (isa<ConstantSDNode>(Val: Inputs[i]))
15038 continue;
15039 else
15040 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i], To: Inputs[i].getOperand(i: 0));
15041 }
15042
15043 std::list<HandleSDNode> PromOpHandles;
15044 for (auto &PromOp : PromOps)
15045 PromOpHandles.emplace_back(args&: PromOp);
15046
15047 // Replace all operations (these are all the same, but have a different
15048 // (i1) return type). DAG.getNode will validate that the types of
15049 // a binary operator match, so go through the list in reverse so that
15050 // we've likely promoted both operands first. Any intermediate truncations or
15051 // extensions disappear.
15052 while (!PromOpHandles.empty()) {
15053 SDValue PromOp = PromOpHandles.back().getValue();
15054 PromOpHandles.pop_back();
15055
15056 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15057 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15058 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15059 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15060 if (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: 0)) &&
15061 PromOp.getOperand(i: 0).getValueType() != MVT::i1) {
15062 // The operand is not yet ready (see comment below).
15063 PromOpHandles.emplace_front(args&: PromOp);
15064 continue;
15065 }
15066
15067 SDValue RepValue = PromOp.getOperand(i: 0);
15068 if (isa<ConstantSDNode>(Val: RepValue))
15069 RepValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: RepValue);
15070
15071 DAG.ReplaceAllUsesOfValueWith(From: PromOp, To: RepValue);
15072 continue;
15073 }
15074
15075 unsigned C;
15076 switch (PromOp.getOpcode()) {
15077 default: C = 0; break;
15078 case ISD::SELECT: C = 1; break;
15079 case ISD::SELECT_CC: C = 2; break;
15080 }
15081
15082 if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15083 PromOp.getOperand(i: C).getValueType() != MVT::i1) ||
15084 (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+1)) &&
15085 PromOp.getOperand(i: C+1).getValueType() != MVT::i1)) {
15086 // The to-be-promoted operands of this node have not yet been
15087 // promoted (this should be rare because we're going through the
15088 // list backward, but if one of the operands has several users in
15089 // this cluster of to-be-promoted nodes, it is possible).
15090 PromOpHandles.emplace_front(args&: PromOp);
15091 continue;
15092 }
15093
15094 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15095
15096 // If there are any constant inputs, make sure they're replaced now.
15097 for (unsigned i = 0; i < 2; ++i)
15098 if (isa<ConstantSDNode>(Val: Ops[C+i]))
15099 Ops[C+i] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: Ops[C+i]);
15100
15101 DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15102 To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: MVT::i1, Ops));
15103 }
15104
15105 // Now we're left with the initial truncation itself.
15106 if (N->getOpcode() == ISD::TRUNCATE)
15107 return N->getOperand(Num: 0);
15108
15109 // Otherwise, this is a comparison. The operands to be compared have just
15110 // changed type (to i1), but everything else is the same.
15111 return SDValue(N, 0);
15112}
15113
15114SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15115 DAGCombinerInfo &DCI) const {
15116 SelectionDAG &DAG = DCI.DAG;
15117 SDLoc dl(N);
15118
15119 // If we're tracking CR bits, we need to be careful that we don't have:
15120 // zext(binary-ops(trunc(x), trunc(y)))
15121 // or
15122 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15123 // such that we're unnecessarily moving things into CR bits that can more
15124 // efficiently stay in GPRs. Note that if we're not certain that the high
15125 // bits are set as required by the final extension, we still may need to do
15126 // some masking to get the proper behavior.
15127
15128 // This same functionality is important on PPC64 when dealing with
15129 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15130 // the return values of functions. Because it is so similar, it is handled
15131 // here as well.
15132
15133 if (N->getValueType(ResNo: 0) != MVT::i32 &&
15134 N->getValueType(ResNo: 0) != MVT::i64)
15135 return SDValue();
15136
15137 if (!((N->getOperand(Num: 0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15138 (N->getOperand(Num: 0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15139 return SDValue();
15140
15141 if (N->getOperand(Num: 0).getOpcode() != ISD::AND &&
15142 N->getOperand(Num: 0).getOpcode() != ISD::OR &&
15143 N->getOperand(Num: 0).getOpcode() != ISD::XOR &&
15144 N->getOperand(Num: 0).getOpcode() != ISD::SELECT &&
15145 N->getOperand(Num: 0).getOpcode() != ISD::SELECT_CC)
15146 return SDValue();
15147
15148 SmallVector<SDValue, 4> Inputs;
15149 SmallVector<SDValue, 8> BinOps(1, N->getOperand(Num: 0)), PromOps;
15150 SmallPtrSet<SDNode *, 16> Visited;
15151
15152 // Visit all inputs, collect all binary operations (and, or, xor and
15153 // select) that are all fed by truncations.
15154 while (!BinOps.empty()) {
15155 SDValue BinOp = BinOps.pop_back_val();
15156
15157 if (!Visited.insert(Ptr: BinOp.getNode()).second)
15158 continue;
15159
15160 PromOps.push_back(Elt: BinOp);
15161
15162 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15163 // The condition of the select is not promoted.
15164 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15165 continue;
15166 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15167 continue;
15168
15169 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15170 isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15171 Inputs.push_back(Elt: BinOp.getOperand(i));
15172 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15173 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15174 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15175 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15176 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15177 BinOps.push_back(Elt: BinOp.getOperand(i));
15178 } else {
15179 // We have an input that is not a truncation or another binary
15180 // operation; we'll abort this transformation.
15181 return SDValue();
15182 }
15183 }
15184 }
15185
15186 // The operands of a select that must be truncated when the select is
15187 // promoted because the operand is actually part of the to-be-promoted set.
15188 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15189
15190 // Make sure that this is a self-contained cluster of operations (which
15191 // is not quite the same thing as saying that everything has only one
15192 // use).
15193 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15194 if (isa<ConstantSDNode>(Val: Inputs[i]))
15195 continue;
15196
15197 for (SDNode *User : Inputs[i].getNode()->users()) {
15198 if (User != N && !Visited.count(Ptr: User))
15199 return SDValue();
15200
15201 // If we're going to promote the non-output-value operand(s) or SELECT or
15202 // SELECT_CC, record them for truncation.
15203 if (User->getOpcode() == ISD::SELECT) {
15204 if (User->getOperand(Num: 0) == Inputs[i])
15205 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15206 y: User->getOperand(Num: 0).getValueType()));
15207 } else if (User->getOpcode() == ISD::SELECT_CC) {
15208 if (User->getOperand(Num: 0) == Inputs[i])
15209 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15210 y: User->getOperand(Num: 0).getValueType()));
15211 if (User->getOperand(Num: 1) == Inputs[i])
15212 SelectTruncOp[1].insert(KV: std::make_pair(x&: User,
15213 y: User->getOperand(Num: 1).getValueType()));
15214 }
15215 }
15216 }
15217
15218 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15219 for (SDNode *User : PromOps[i].getNode()->users()) {
15220 if (User != N && !Visited.count(Ptr: User))
15221 return SDValue();
15222
15223 // If we're going to promote the non-output-value operand(s) or SELECT or
15224 // SELECT_CC, record them for truncation.
15225 if (User->getOpcode() == ISD::SELECT) {
15226 if (User->getOperand(Num: 0) == PromOps[i])
15227 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15228 y: User->getOperand(Num: 0).getValueType()));
15229 } else if (User->getOpcode() == ISD::SELECT_CC) {
15230 if (User->getOperand(Num: 0) == PromOps[i])
15231 SelectTruncOp[0].insert(KV: std::make_pair(x&: User,
15232 y: User->getOperand(Num: 0).getValueType()));
15233 if (User->getOperand(Num: 1) == PromOps[i])
15234 SelectTruncOp[1].insert(KV: std::make_pair(x&: User,
15235 y: User->getOperand(Num: 1).getValueType()));
15236 }
15237 }
15238 }
15239
15240 unsigned PromBits = N->getOperand(Num: 0).getValueSizeInBits();
15241 bool ReallyNeedsExt = false;
15242 if (N->getOpcode() != ISD::ANY_EXTEND) {
15243 // If all of the inputs are not already sign/zero extended, then
15244 // we'll still need to do that at the end.
15245 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15246 if (isa<ConstantSDNode>(Val: Inputs[i]))
15247 continue;
15248
15249 unsigned OpBits =
15250 Inputs[i].getOperand(i: 0).getValueSizeInBits();
15251 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15252
15253 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15254 !DAG.MaskedValueIsZero(Op: Inputs[i].getOperand(i: 0),
15255 Mask: APInt::getHighBitsSet(numBits: OpBits,
15256 hiBitsSet: OpBits-PromBits))) ||
15257 (N->getOpcode() == ISD::SIGN_EXTEND &&
15258 DAG.ComputeNumSignBits(Op: Inputs[i].getOperand(i: 0)) <
15259 (OpBits-(PromBits-1)))) {
15260 ReallyNeedsExt = true;
15261 break;
15262 }
15263 }
15264 }
15265
15266 // Replace all inputs, either with the truncation operand, or a
15267 // truncation or extension to the final output type.
15268 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15269 // Constant inputs need to be replaced with the to-be-promoted nodes that
15270 // use them because they might have users outside of the cluster of
15271 // promoted nodes.
15272 if (isa<ConstantSDNode>(Val: Inputs[i]))
15273 continue;
15274
15275 SDValue InSrc = Inputs[i].getOperand(i: 0);
15276 if (Inputs[i].getValueType() == N->getValueType(ResNo: 0))
15277 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i], To: InSrc);
15278 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15279 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15280 To: DAG.getSExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15281 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15282 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15283 To: DAG.getZExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15284 else
15285 DAG.ReplaceAllUsesOfValueWith(From: Inputs[i],
15286 To: DAG.getAnyExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: 0)));
15287 }
15288
15289 std::list<HandleSDNode> PromOpHandles;
15290 for (auto &PromOp : PromOps)
15291 PromOpHandles.emplace_back(args&: PromOp);
15292
15293 // Replace all operations (these are all the same, but have a different
15294 // (promoted) return type). DAG.getNode will validate that the types of
15295 // a binary operator match, so go through the list in reverse so that
15296 // we've likely promoted both operands first.
15297 while (!PromOpHandles.empty()) {
15298 SDValue PromOp = PromOpHandles.back().getValue();
15299 PromOpHandles.pop_back();
15300
15301 unsigned C;
15302 switch (PromOp.getOpcode()) {
15303 default: C = 0; break;
15304 case ISD::SELECT: C = 1; break;
15305 case ISD::SELECT_CC: C = 2; break;
15306 }
15307
15308 if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15309 PromOp.getOperand(i: C).getValueType() != N->getValueType(ResNo: 0)) ||
15310 (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+1)) &&
15311 PromOp.getOperand(i: C+1).getValueType() != N->getValueType(ResNo: 0))) {
15312 // The to-be-promoted operands of this node have not yet been
15313 // promoted (this should be rare because we're going through the
15314 // list backward, but if one of the operands has several users in
15315 // this cluster of to-be-promoted nodes, it is possible).
15316 PromOpHandles.emplace_front(args&: PromOp);
15317 continue;
15318 }
15319
15320 // For SELECT and SELECT_CC nodes, we do a similar check for any
15321 // to-be-promoted comparison inputs.
15322 if (PromOp.getOpcode() == ISD::SELECT ||
15323 PromOp.getOpcode() == ISD::SELECT_CC) {
15324 if ((SelectTruncOp[0].count(Val: PromOp.getNode()) &&
15325 PromOp.getOperand(i: 0).getValueType() != N->getValueType(ResNo: 0)) ||
15326 (SelectTruncOp[1].count(Val: PromOp.getNode()) &&
15327 PromOp.getOperand(i: 1).getValueType() != N->getValueType(ResNo: 0))) {
15328 PromOpHandles.emplace_front(args&: PromOp);
15329 continue;
15330 }
15331 }
15332
15333 SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
15334
15335 // If this node has constant inputs, then they'll need to be promoted here.
15336 for (unsigned i = 0; i < 2; ++i) {
15337 if (!isa<ConstantSDNode>(Val: Ops[C+i]))
15338 continue;
15339 if (Ops[C+i].getValueType() == N->getValueType(ResNo: 0))
15340 continue;
15341
15342 if (N->getOpcode() == ISD::SIGN_EXTEND)
15343 Ops[C+i] = DAG.getSExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15344 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15345 Ops[C+i] = DAG.getZExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15346 else
15347 Ops[C+i] = DAG.getAnyExtOrTrunc(Op: Ops[C+i], DL: dl, VT: N->getValueType(ResNo: 0));
15348 }
15349
15350 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15351 // truncate them again to the original value type.
15352 if (PromOp.getOpcode() == ISD::SELECT ||
15353 PromOp.getOpcode() == ISD::SELECT_CC) {
15354 auto SI0 = SelectTruncOp[0].find(Val: PromOp.getNode());
15355 if (SI0 != SelectTruncOp[0].end())
15356 Ops[0] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI0->second, Operand: Ops[0]);
15357 auto SI1 = SelectTruncOp[1].find(Val: PromOp.getNode());
15358 if (SI1 != SelectTruncOp[1].end())
15359 Ops[1] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI1->second, Operand: Ops[1]);
15360 }
15361
15362 DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15363 To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: N->getValueType(ResNo: 0), Ops));
15364 }
15365
15366 // Now we're left with the initial extension itself.
15367 if (!ReallyNeedsExt)
15368 return N->getOperand(Num: 0);
15369
15370 // To zero extend, just mask off everything except for the first bit (in the
15371 // i1 case).
15372 if (N->getOpcode() == ISD::ZERO_EXTEND)
15373 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 0),
15374 N2: DAG.getConstant(Val: APInt::getLowBitsSet(
15375 numBits: N->getValueSizeInBits(ResNo: 0), loBitsSet: PromBits),
15376 DL: dl, VT: N->getValueType(ResNo: 0)));
15377
15378 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15379 "Invalid extension type");
15380 EVT ShiftAmountTy = getShiftAmountTy(LHSTy: N->getValueType(ResNo: 0), DL: DAG.getDataLayout());
15381 SDValue ShiftCst =
15382 DAG.getConstant(Val: N->getValueSizeInBits(ResNo: 0) - PromBits, DL: dl, VT: ShiftAmountTy);
15383 return DAG.getNode(
15384 Opcode: ISD::SRA, DL: dl, VT: N->getValueType(ResNo: 0),
15385 N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 0), N2: ShiftCst),
15386 N2: ShiftCst);
15387}
15388
15389SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15390 DAGCombinerInfo &DCI) const {
15391 assert(N->getOpcode() == ISD::SETCC &&
15392 "Should be called with a SETCC node");
15393
15394 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
15395 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15396 SDValue LHS = N->getOperand(Num: 0);
15397 SDValue RHS = N->getOperand(Num: 1);
15398
15399 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15400 if (LHS.getOpcode() == ISD::SUB && isNullConstant(V: LHS.getOperand(i: 0)) &&
15401 LHS.hasOneUse())
15402 std::swap(a&: LHS, b&: RHS);
15403
15404 // x == 0-y --> x+y == 0
15405 // x != 0-y --> x+y != 0
15406 if (RHS.getOpcode() == ISD::SUB && isNullConstant(V: RHS.getOperand(i: 0)) &&
15407 RHS.hasOneUse()) {
15408 SDLoc DL(N);
15409 SelectionDAG &DAG = DCI.DAG;
15410 EVT VT = N->getValueType(ResNo: 0);
15411 EVT OpVT = LHS.getValueType();
15412 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS, N2: RHS.getOperand(i: 1));
15413 return DAG.getSetCC(DL, VT, LHS: Add, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond: CC);
15414 }
15415 }
15416
15417 return DAGCombineTruncBoolExt(N, DCI);
15418}
15419
15420// Is this an extending load from an f32 to an f64?
15421static bool isFPExtLoad(SDValue Op) {
15422 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: Op.getNode()))
15423 return LD->getExtensionType() == ISD::EXTLOAD &&
15424 Op.getValueType() == MVT::f64;
15425 return false;
15426}
15427
15428/// Reduces the number of fp-to-int conversion when building a vector.
15429///
15430/// If this vector is built out of floating to integer conversions,
15431/// transform it to a vector built out of floating point values followed by a
15432/// single floating to integer conversion of the vector.
15433/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15434/// becomes (fptosi (build_vector ($A, $B, ...)))
15435SDValue PPCTargetLowering::
15436combineElementTruncationToVectorTruncation(SDNode *N,
15437 DAGCombinerInfo &DCI) const {
15438 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15439 "Should be called with a BUILD_VECTOR node");
15440
15441 SelectionDAG &DAG = DCI.DAG;
15442 SDLoc dl(N);
15443
15444 SDValue FirstInput = N->getOperand(Num: 0);
15445 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15446 "The input operand must be an fp-to-int conversion.");
15447
15448 // This combine happens after legalization so the fp_to_[su]i nodes are
15449 // already converted to PPCSISD nodes.
15450 unsigned FirstConversion = FirstInput.getOperand(i: 0).getOpcode();
15451 if (FirstConversion == PPCISD::FCTIDZ ||
15452 FirstConversion == PPCISD::FCTIDUZ ||
15453 FirstConversion == PPCISD::FCTIWZ ||
15454 FirstConversion == PPCISD::FCTIWUZ) {
15455 bool IsSplat = true;
15456 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15457 FirstConversion == PPCISD::FCTIWUZ;
15458 EVT SrcVT = FirstInput.getOperand(i: 0).getValueType();
15459 SmallVector<SDValue, 4> Ops;
15460 EVT TargetVT = N->getValueType(ResNo: 0);
15461 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15462 SDValue NextOp = N->getOperand(Num: i);
15463 if (NextOp.getOpcode() != PPCISD::MFVSR)
15464 return SDValue();
15465 unsigned NextConversion = NextOp.getOperand(i: 0).getOpcode();
15466 if (NextConversion != FirstConversion)
15467 return SDValue();
15468 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15469 // This is not valid if the input was originally double precision. It is
15470 // also not profitable to do unless this is an extending load in which
15471 // case doing this combine will allow us to combine consecutive loads.
15472 if (Is32Bit && !isFPExtLoad(Op: NextOp.getOperand(i: 0).getOperand(i: 0)))
15473 return SDValue();
15474 if (N->getOperand(Num: i) != FirstInput)
15475 IsSplat = false;
15476 }
15477
15478 // If this is a splat, we leave it as-is since there will be only a single
15479 // fp-to-int conversion followed by a splat of the integer. This is better
15480 // for 32-bit and smaller ints and neutral for 64-bit ints.
15481 if (IsSplat)
15482 return SDValue();
15483
15484 // Now that we know we have the right type of node, get its operands
15485 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15486 SDValue In = N->getOperand(Num: i).getOperand(i: 0);
15487 if (Is32Bit) {
15488 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15489 // here, we know that all inputs are extending loads so this is safe).
15490 if (In.isUndef())
15491 Ops.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
15492 else {
15493 SDValue Trunc =
15494 DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: In.getOperand(i: 0),
15495 N2: DAG.getIntPtrConstant(Val: 1, DL: dl, /*isTarget=*/true));
15496 Ops.push_back(Elt: Trunc);
15497 }
15498 } else
15499 Ops.push_back(Elt: In.isUndef() ? DAG.getUNDEF(VT: SrcVT) : In.getOperand(i: 0));
15500 }
15501
15502 unsigned Opcode;
15503 if (FirstConversion == PPCISD::FCTIDZ ||
15504 FirstConversion == PPCISD::FCTIWZ)
15505 Opcode = ISD::FP_TO_SINT;
15506 else
15507 Opcode = ISD::FP_TO_UINT;
15508
15509 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15510 SDValue BV = DAG.getBuildVector(VT: NewVT, DL: dl, Ops);
15511 return DAG.getNode(Opcode, DL: dl, VT: TargetVT, Operand: BV);
15512 }
15513 return SDValue();
15514}
15515
15516/// Reduce the number of loads when building a vector.
15517///
15518/// Building a vector out of multiple loads can be converted to a load
15519/// of the vector type if the loads are consecutive. If the loads are
15520/// consecutive but in descending order, a shuffle is added at the end
15521/// to reorder the vector.
15522static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
15523 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15524 "Should be called with a BUILD_VECTOR node");
15525
15526 SDLoc dl(N);
15527
15528 // Return early for non byte-sized type, as they can't be consecutive.
15529 if (!N->getValueType(ResNo: 0).getVectorElementType().isByteSized())
15530 return SDValue();
15531
15532 bool InputsAreConsecutiveLoads = true;
15533 bool InputsAreReverseConsecutive = true;
15534 unsigned ElemSize = N->getValueType(ResNo: 0).getScalarType().getStoreSize();
15535 SDValue FirstInput = N->getOperand(Num: 0);
15536 bool IsRoundOfExtLoad = false;
15537 LoadSDNode *FirstLoad = nullptr;
15538
15539 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15540 FirstInput.getOperand(i: 0).getOpcode() == ISD::LOAD) {
15541 FirstLoad = cast<LoadSDNode>(Val: FirstInput.getOperand(i: 0));
15542 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15543 }
15544 // Not a build vector of (possibly fp_rounded) loads.
15545 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15546 N->getNumOperands() == 1)
15547 return SDValue();
15548
15549 if (!IsRoundOfExtLoad)
15550 FirstLoad = cast<LoadSDNode>(Val&: FirstInput);
15551
15552 SmallVector<LoadSDNode *, 4> InputLoads;
15553 InputLoads.push_back(Elt: FirstLoad);
15554 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15555 // If any inputs are fp_round(extload), they all must be.
15556 if (IsRoundOfExtLoad && N->getOperand(Num: i).getOpcode() != ISD::FP_ROUND)
15557 return SDValue();
15558
15559 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(Num: i).getOperand(i: 0) :
15560 N->getOperand(Num: i);
15561 if (NextInput.getOpcode() != ISD::LOAD)
15562 return SDValue();
15563
15564 SDValue PreviousInput =
15565 IsRoundOfExtLoad ? N->getOperand(Num: i-1).getOperand(i: 0) : N->getOperand(Num: i-1);
15566 LoadSDNode *LD1 = cast<LoadSDNode>(Val&: PreviousInput);
15567 LoadSDNode *LD2 = cast<LoadSDNode>(Val&: NextInput);
15568
15569 // If any inputs are fp_round(extload), they all must be.
15570 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15571 return SDValue();
15572
15573 // We only care about regular loads. The PPC-specific load intrinsics
15574 // will not lead to a merge opportunity.
15575 if (!DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: ElemSize, Dist: 1))
15576 InputsAreConsecutiveLoads = false;
15577 if (!DAG.areNonVolatileConsecutiveLoads(LD: LD1, Base: LD2, Bytes: ElemSize, Dist: 1))
15578 InputsAreReverseConsecutive = false;
15579
15580 // Exit early if the loads are neither consecutive nor reverse consecutive.
15581 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15582 return SDValue();
15583 InputLoads.push_back(Elt: LD2);
15584 }
15585
15586 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15587 "The loads cannot be both consecutive and reverse consecutive.");
15588
15589 SDValue WideLoad;
15590 SDValue ReturnSDVal;
15591 if (InputsAreConsecutiveLoads) {
15592 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15593 WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: 0), dl, Chain: FirstLoad->getChain(),
15594 Ptr: FirstLoad->getBasePtr(), PtrInfo: FirstLoad->getPointerInfo(),
15595 Alignment: FirstLoad->getAlign());
15596 ReturnSDVal = WideLoad;
15597 } else if (InputsAreReverseConsecutive) {
15598 LoadSDNode *LastLoad = InputLoads.back();
15599 assert(LastLoad && "Input needs to be a LoadSDNode.");
15600 WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: 0), dl, Chain: LastLoad->getChain(),
15601 Ptr: LastLoad->getBasePtr(), PtrInfo: LastLoad->getPointerInfo(),
15602 Alignment: LastLoad->getAlign());
15603 SmallVector<int, 16> Ops;
15604 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15605 Ops.push_back(Elt: i);
15606
15607 ReturnSDVal = DAG.getVectorShuffle(VT: N->getValueType(ResNo: 0), dl, N1: WideLoad,
15608 N2: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)), Mask: Ops);
15609 } else
15610 return SDValue();
15611
15612 for (auto *LD : InputLoads)
15613 DAG.makeEquivalentMemoryOrdering(OldLoad: LD, NewMemOp: WideLoad);
15614 return ReturnSDVal;
15615}
15616
15617// This function adds the required vector_shuffle needed to get
15618// the elements of the vector extract in the correct position
15619// as specified by the CorrectElems encoding.
15620static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
15621 SDValue Input, uint64_t Elems,
15622 uint64_t CorrectElems) {
15623 SDLoc dl(N);
15624
15625 unsigned NumElems = Input.getValueType().getVectorNumElements();
15626 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15627
15628 // Knowing the element indices being extracted from the original
15629 // vector and the order in which they're being inserted, just put
15630 // them at element indices required for the instruction.
15631 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15632 if (DAG.getDataLayout().isLittleEndian())
15633 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15634 else
15635 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15636 CorrectElems = CorrectElems >> 8;
15637 Elems = Elems >> 8;
15638 }
15639
15640 SDValue Shuffle =
15641 DAG.getVectorShuffle(VT: Input.getValueType(), dl, N1: Input,
15642 N2: DAG.getUNDEF(VT: Input.getValueType()), Mask: ShuffleMask);
15643
15644 EVT VT = N->getValueType(ResNo: 0);
15645 SDValue Conv = DAG.getBitcast(VT, V: Shuffle);
15646
15647 EVT ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(),
15648 VT: Input.getValueType().getVectorElementType(),
15649 NumElements: VT.getVectorNumElements());
15650 return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT, N1: Conv,
15651 N2: DAG.getValueType(ExtVT));
15652}
15653
15654// Look for build vector patterns where input operands come from sign
15655// extended vector_extract elements of specific indices. If the correct indices
15656// aren't used, add a vector shuffle to fix up the indices and create
15657// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15658// during instruction selection.
15659static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
15660 // This array encodes the indices that the vector sign extend instructions
15661 // extract from when extending from one type to another for both BE and LE.
15662 // The right nibble of each byte corresponds to the LE incides.
15663 // and the left nibble of each byte corresponds to the BE incides.
15664 // For example: 0x3074B8FC byte->word
15665 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15666 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15667 // For example: 0x000070F8 byte->double word
15668 // For LE: the allowed indices are: 0x0,0x8
15669 // For BE: the allowed indices are: 0x7,0xF
15670 uint64_t TargetElems[] = {
15671 0x3074B8FC, // b->w
15672 0x000070F8, // b->d
15673 0x10325476, // h->w
15674 0x00003074, // h->d
15675 0x00001032, // w->d
15676 };
15677
15678 uint64_t Elems = 0;
15679 int Index;
15680 SDValue Input;
15681
15682 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15683 if (!Op)
15684 return false;
15685 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15686 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15687 return false;
15688
15689 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15690 // of the right width.
15691 SDValue Extract = Op.getOperand(i: 0);
15692 if (Extract.getOpcode() == ISD::ANY_EXTEND)
15693 Extract = Extract.getOperand(i: 0);
15694 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15695 return false;
15696
15697 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: 1));
15698 if (!ExtOp)
15699 return false;
15700
15701 Index = ExtOp->getZExtValue();
15702 if (Input && Input != Extract.getOperand(i: 0))
15703 return false;
15704
15705 if (!Input)
15706 Input = Extract.getOperand(i: 0);
15707
15708 Elems = Elems << 8;
15709 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15710 Elems |= Index;
15711
15712 return true;
15713 };
15714
15715 // If the build vector operands aren't sign extended vector extracts,
15716 // of the same input vector, then return.
15717 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15718 if (!isSExtOfVecExtract(N->getOperand(Num: i))) {
15719 return SDValue();
15720 }
15721 }
15722
15723 // If the vector extract indices are not correct, add the appropriate
15724 // vector_shuffle.
15725 int TgtElemArrayIdx;
15726 int InputSize = Input.getValueType().getScalarSizeInBits();
15727 int OutputSize = N->getValueType(ResNo: 0).getScalarSizeInBits();
15728 if (InputSize + OutputSize == 40)
15729 TgtElemArrayIdx = 0;
15730 else if (InputSize + OutputSize == 72)
15731 TgtElemArrayIdx = 1;
15732 else if (InputSize + OutputSize == 48)
15733 TgtElemArrayIdx = 2;
15734 else if (InputSize + OutputSize == 80)
15735 TgtElemArrayIdx = 3;
15736 else if (InputSize + OutputSize == 96)
15737 TgtElemArrayIdx = 4;
15738 else
15739 return SDValue();
15740
15741 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15742 CorrectElems = DAG.getDataLayout().isLittleEndian()
15743 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15744 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15745 if (Elems != CorrectElems) {
15746 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15747 }
15748
15749 // Regular lowering will catch cases where a shuffle is not needed.
15750 return SDValue();
15751}
15752
15753// Look for the pattern of a load from a narrow width to i128, feeding
15754// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15755// (LXVRZX). This node represents a zero extending load that will be matched
15756// to the Load VSX Vector Rightmost instructions.
15757static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
15758 SDLoc DL(N);
15759
15760 // This combine is only eligible for a BUILD_VECTOR of v1i128.
15761 if (N->getValueType(ResNo: 0) != MVT::v1i128)
15762 return SDValue();
15763
15764 SDValue Operand = N->getOperand(Num: 0);
15765 // Proceed with the transformation if the operand to the BUILD_VECTOR
15766 // is a load instruction.
15767 if (Operand.getOpcode() != ISD::LOAD)
15768 return SDValue();
15769
15770 auto *LD = cast<LoadSDNode>(Val&: Operand);
15771 EVT MemoryType = LD->getMemoryVT();
15772
15773 // This transformation is only valid if the we are loading either a byte,
15774 // halfword, word, or doubleword.
15775 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15776 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15777
15778 // Ensure that the load from the narrow width is being zero extended to i128.
15779 if (!ValidLDType ||
15780 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15781 LD->getExtensionType() != ISD::EXTLOAD))
15782 return SDValue();
15783
15784 SDValue LoadOps[] = {
15785 LD->getChain(), LD->getBasePtr(),
15786 DAG.getIntPtrConstant(Val: MemoryType.getScalarSizeInBits(), DL)};
15787
15788 return DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVRZX, dl: DL,
15789 VTList: DAG.getVTList(VT1: MVT::v1i128, VT2: MVT::Other),
15790 Ops: LoadOps, MemVT: MemoryType, MMO: LD->getMemOperand());
15791}
15792
15793SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15794 DAGCombinerInfo &DCI) const {
15795 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15796 "Should be called with a BUILD_VECTOR node");
15797
15798 SelectionDAG &DAG = DCI.DAG;
15799 SDLoc dl(N);
15800
15801 if (!Subtarget.hasVSX())
15802 return SDValue();
15803
15804 // The target independent DAG combiner will leave a build_vector of
15805 // float-to-int conversions intact. We can generate MUCH better code for
15806 // a float-to-int conversion of a vector of floats.
15807 SDValue FirstInput = N->getOperand(Num: 0);
15808 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15809 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15810 if (Reduced)
15811 return Reduced;
15812 }
15813
15814 // If we're building a vector out of consecutive loads, just load that
15815 // vector type.
15816 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15817 if (Reduced)
15818 return Reduced;
15819
15820 // If we're building a vector out of extended elements from another vector
15821 // we have P9 vector integer extend instructions. The code assumes legal
15822 // input types (i.e. it can't handle things like v4i16) so do not run before
15823 // legalization.
15824 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15825 Reduced = combineBVOfVecSExt(N, DAG);
15826 if (Reduced)
15827 return Reduced;
15828 }
15829
15830 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15831 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15832 // is a load from <valid narrow width> to i128.
15833 if (Subtarget.isISA3_1()) {
15834 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15835 if (BVOfZLoad)
15836 return BVOfZLoad;
15837 }
15838
15839 if (N->getValueType(ResNo: 0) != MVT::v2f64)
15840 return SDValue();
15841
15842 // Looking for:
15843 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15844 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15845 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15846 return SDValue();
15847 if (N->getOperand(Num: 1).getOpcode() != ISD::SINT_TO_FP &&
15848 N->getOperand(Num: 1).getOpcode() != ISD::UINT_TO_FP)
15849 return SDValue();
15850 if (FirstInput.getOpcode() != N->getOperand(Num: 1).getOpcode())
15851 return SDValue();
15852
15853 SDValue Ext1 = FirstInput.getOperand(i: 0);
15854 SDValue Ext2 = N->getOperand(Num: 1).getOperand(i: 0);
15855 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15856 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15857 return SDValue();
15858
15859 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Val: Ext1.getOperand(i: 1));
15860 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Val: Ext2.getOperand(i: 1));
15861 if (!Ext1Op || !Ext2Op)
15862 return SDValue();
15863 if (Ext1.getOperand(i: 0).getValueType() != MVT::v4i32 ||
15864 Ext1.getOperand(i: 0) != Ext2.getOperand(i: 0))
15865 return SDValue();
15866
15867 int FirstElem = Ext1Op->getZExtValue();
15868 int SecondElem = Ext2Op->getZExtValue();
15869 int SubvecIdx;
15870 if (FirstElem == 0 && SecondElem == 1)
15871 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15872 else if (FirstElem == 2 && SecondElem == 3)
15873 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15874 else
15875 return SDValue();
15876
15877 SDValue SrcVec = Ext1.getOperand(i: 0);
15878 auto NodeType = (N->getOperand(Num: 1).getOpcode() == ISD::SINT_TO_FP) ?
15879 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
15880 return DAG.getNode(Opcode: NodeType, DL: dl, VT: MVT::v2f64,
15881 N1: SrcVec, N2: DAG.getIntPtrConstant(Val: SubvecIdx, DL: dl));
15882}
15883
15884SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15885 DAGCombinerInfo &DCI) const {
15886 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15887 N->getOpcode() == ISD::UINT_TO_FP) &&
15888 "Need an int -> FP conversion node here");
15889
15890 if (useSoftFloat() || !Subtarget.has64BitSupport())
15891 return SDValue();
15892
15893 SelectionDAG &DAG = DCI.DAG;
15894 SDLoc dl(N);
15895 SDValue Op(N, 0);
15896
15897 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15898 // from the hardware.
15899 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15900 return SDValue();
15901 if (!Op.getOperand(i: 0).getValueType().isSimple())
15902 return SDValue();
15903 if (Op.getOperand(i: 0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15904 Op.getOperand(i: 0).getValueType().getSimpleVT() > MVT(MVT::i64))
15905 return SDValue();
15906
15907 SDValue FirstOperand(Op.getOperand(i: 0));
15908 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15909 (FirstOperand.getValueType() == MVT::i8 ||
15910 FirstOperand.getValueType() == MVT::i16);
15911 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15912 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15913 bool DstDouble = Op.getValueType() == MVT::f64;
15914 unsigned ConvOp = Signed ?
15915 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15916 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15917 SDValue WidthConst =
15918 DAG.getIntPtrConstant(Val: FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15919 DL: dl, isTarget: false);
15920 LoadSDNode *LDN = cast<LoadSDNode>(Val: FirstOperand.getNode());
15921 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15922 SDValue Ld = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXSIZX, dl,
15923 VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
15924 Ops, MemVT: MVT::i8, MMO: LDN->getMemOperand());
15925 DAG.makeEquivalentMemoryOrdering(OldLoad: LDN, NewMemOp: Ld);
15926
15927 // For signed conversion, we need to sign-extend the value in the VSR
15928 if (Signed) {
15929 SDValue ExtOps[] = { Ld, WidthConst };
15930 SDValue Ext = DAG.getNode(Opcode: PPCISD::VEXTS, DL: dl, VT: MVT::f64, Ops: ExtOps);
15931 return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ext);
15932 } else
15933 return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ld);
15934 }
15935
15936
15937 // For i32 intermediate values, unfortunately, the conversion functions
15938 // leave the upper 32 bits of the value are undefined. Within the set of
15939 // scalar instructions, we have no method for zero- or sign-extending the
15940 // value. Thus, we cannot handle i32 intermediate values here.
15941 if (Op.getOperand(i: 0).getValueType() == MVT::i32)
15942 return SDValue();
15943
15944 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15945 "UINT_TO_FP is supported only with FPCVT");
15946
15947 // If we have FCFIDS, then use it when converting to single-precision.
15948 // Otherwise, convert to double-precision and then round.
15949 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15950 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15951 : PPCISD::FCFIDS)
15952 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15953 : PPCISD::FCFID);
15954 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15955 ? MVT::f32
15956 : MVT::f64;
15957
15958 // If we're converting from a float, to an int, and back to a float again,
15959 // then we don't need the store/load pair at all.
15960 if ((Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_UINT &&
15961 Subtarget.hasFPCVT()) ||
15962 (Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_SINT)) {
15963 SDValue Src = Op.getOperand(i: 0).getOperand(i: 0);
15964 if (Src.getValueType() == MVT::f32) {
15965 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
15966 DCI.AddToWorklist(N: Src.getNode());
15967 } else if (Src.getValueType() != MVT::f64) {
15968 // Make sure that we don't pick up a ppc_fp128 source value.
15969 return SDValue();
15970 }
15971
15972 unsigned FCTOp =
15973 Op.getOperand(i: 0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15974 PPCISD::FCTIDUZ;
15975
15976 SDValue Tmp = DAG.getNode(Opcode: FCTOp, DL: dl, VT: MVT::f64, Operand: Src);
15977 SDValue FP = DAG.getNode(Opcode: FCFOp, DL: dl, VT: FCFTy, Operand: Tmp);
15978
15979 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15980 FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
15981 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
15982 DCI.AddToWorklist(N: FP.getNode());
15983 }
15984
15985 return FP;
15986 }
15987
15988 return SDValue();
15989}
15990
15991// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15992// builtins) into loads with swaps.
15993SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
15994 DAGCombinerInfo &DCI) const {
15995 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15996 // load combines.
15997 if (DCI.isBeforeLegalizeOps())
15998 return SDValue();
15999
16000 SelectionDAG &DAG = DCI.DAG;
16001 SDLoc dl(N);
16002 SDValue Chain;
16003 SDValue Base;
16004 MachineMemOperand *MMO;
16005
16006 switch (N->getOpcode()) {
16007 default:
16008 llvm_unreachable("Unexpected opcode for little endian VSX load");
16009 case ISD::LOAD: {
16010 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16011 Chain = LD->getChain();
16012 Base = LD->getBasePtr();
16013 MMO = LD->getMemOperand();
16014 // If the MMO suggests this isn't a load of a full vector, leave
16015 // things alone. For a built-in, we have to make the change for
16016 // correctness, so if there is a size problem that will be a bug.
16017 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16018 return SDValue();
16019 break;
16020 }
16021 case ISD::INTRINSIC_W_CHAIN: {
16022 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16023 Chain = Intrin->getChain();
16024 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16025 // us what we want. Get operand 2 instead.
16026 Base = Intrin->getOperand(Num: 2);
16027 MMO = Intrin->getMemOperand();
16028 break;
16029 }
16030 }
16031
16032 MVT VecTy = N->getValueType(ResNo: 0).getSimpleVT();
16033
16034 SDValue LoadOps[] = { Chain, Base };
16035 SDValue Load = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVD2X, dl,
16036 VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other),
16037 Ops: LoadOps, MemVT: MVT::v2f64, MMO);
16038
16039 DCI.AddToWorklist(N: Load.getNode());
16040 Chain = Load.getValue(R: 1);
16041 SDValue Swap = DAG.getNode(
16042 Opcode: PPCISD::XXSWAPD, DL: dl, VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Load);
16043 DCI.AddToWorklist(N: Swap.getNode());
16044
16045 // Add a bitcast if the resulting load type doesn't match v2f64.
16046 if (VecTy != MVT::v2f64) {
16047 SDValue N = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: Swap);
16048 DCI.AddToWorklist(N: N.getNode());
16049 // Package {bitcast value, swap's chain} to match Load's shape.
16050 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VT1: VecTy, VT2: MVT::Other),
16051 N1: N, N2: Swap.getValue(R: 1));
16052 }
16053
16054 return Swap;
16055}
16056
16057// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16058// builtins) into stores with swaps.
16059SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
16060 DAGCombinerInfo &DCI) const {
16061 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16062 // store combines.
16063 if (DCI.isBeforeLegalizeOps())
16064 return SDValue();
16065
16066 SelectionDAG &DAG = DCI.DAG;
16067 SDLoc dl(N);
16068 SDValue Chain;
16069 SDValue Base;
16070 unsigned SrcOpnd;
16071 MachineMemOperand *MMO;
16072
16073 switch (N->getOpcode()) {
16074 default:
16075 llvm_unreachable("Unexpected opcode for little endian VSX store");
16076 case ISD::STORE: {
16077 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
16078 Chain = ST->getChain();
16079 Base = ST->getBasePtr();
16080 MMO = ST->getMemOperand();
16081 SrcOpnd = 1;
16082 // If the MMO suggests this isn't a store of a full vector, leave
16083 // things alone. For a built-in, we have to make the change for
16084 // correctness, so if there is a size problem that will be a bug.
16085 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16086 return SDValue();
16087 break;
16088 }
16089 case ISD::INTRINSIC_VOID: {
16090 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16091 Chain = Intrin->getChain();
16092 // Intrin->getBasePtr() oddly does not get what we want.
16093 Base = Intrin->getOperand(Num: 3);
16094 MMO = Intrin->getMemOperand();
16095 SrcOpnd = 2;
16096 break;
16097 }
16098 }
16099
16100 SDValue Src = N->getOperand(Num: SrcOpnd);
16101 MVT VecTy = Src.getValueType().getSimpleVT();
16102
16103 // All stores are done as v2f64 and possible bit cast.
16104 if (VecTy != MVT::v2f64) {
16105 Src = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: Src);
16106 DCI.AddToWorklist(N: Src.getNode());
16107 }
16108
16109 SDValue Swap = DAG.getNode(Opcode: PPCISD::XXSWAPD, DL: dl,
16110 VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Src);
16111 DCI.AddToWorklist(N: Swap.getNode());
16112 Chain = Swap.getValue(R: 1);
16113 SDValue StoreOps[] = { Chain, Swap, Base };
16114 SDValue Store = DAG.getMemIntrinsicNode(Opcode: PPCISD::STXVD2X, dl,
16115 VTList: DAG.getVTList(VT: MVT::Other),
16116 Ops: StoreOps, MemVT: VecTy, MMO);
16117 DCI.AddToWorklist(N: Store.getNode());
16118 return Store;
16119}
16120
16121// Handle DAG combine for STORE (FP_TO_INT F).
16122SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16123 DAGCombinerInfo &DCI) const {
16124 SelectionDAG &DAG = DCI.DAG;
16125 SDLoc dl(N);
16126 unsigned Opcode = N->getOperand(Num: 1).getOpcode();
16127 (void)Opcode;
16128 bool Strict = N->getOperand(Num: 1)->isStrictFPOpcode();
16129
16130 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16131 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16132 && "Not a FP_TO_INT Instruction!");
16133
16134 SDValue Val = N->getOperand(Num: 1).getOperand(i: Strict ? 1 : 0);
16135 EVT Op1VT = N->getOperand(Num: 1).getValueType();
16136 EVT ResVT = Val.getValueType();
16137
16138 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(VT: ResVT))
16139 return SDValue();
16140
16141 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16142 bool ValidTypeForStoreFltAsInt =
16143 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16144 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16145
16146 // TODO: Lower conversion from f128 on all VSX targets
16147 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16148 return SDValue();
16149
16150 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16151 cast<StoreSDNode>(Val: N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16152 return SDValue();
16153
16154 Val = convertFPToInt(Op: N->getOperand(Num: 1), DAG, Subtarget);
16155
16156 // Set number of bytes being converted.
16157 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16158 SDValue Ops[] = {N->getOperand(Num: 0), Val, N->getOperand(Num: 2),
16159 DAG.getIntPtrConstant(Val: ByteSize, DL: dl, isTarget: false),
16160 DAG.getValueType(Op1VT)};
16161
16162 Val = DAG.getMemIntrinsicNode(Opcode: PPCISD::ST_VSR_SCAL_INT, dl,
16163 VTList: DAG.getVTList(VT: MVT::Other), Ops,
16164 MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16165 MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16166
16167 return Val;
16168}
16169
16170static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16171 // Check that the source of the element keeps flipping
16172 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16173 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16174 for (int i = 1, e = Mask.size(); i < e; i++) {
16175 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16176 return false;
16177 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16178 return false;
16179 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16180 }
16181 return true;
16182}
16183
16184static bool isSplatBV(SDValue Op) {
16185 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16186 return false;
16187 SDValue FirstOp;
16188
16189 // Find first non-undef input.
16190 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16191 FirstOp = Op.getOperand(i);
16192 if (!FirstOp.isUndef())
16193 break;
16194 }
16195
16196 // All inputs are undef or the same as the first non-undef input.
16197 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16198 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16199 return false;
16200 return true;
16201}
16202
16203static SDValue isScalarToVec(SDValue Op) {
16204 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16205 return Op;
16206 if (Op.getOpcode() != ISD::BITCAST)
16207 return SDValue();
16208 Op = Op.getOperand(i: 0);
16209 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16210 return Op;
16211 return SDValue();
16212}
16213
16214// Fix up the shuffle mask to account for the fact that the result of
16215// scalar_to_vector is not in lane zero. This just takes all values in
16216// the ranges specified by the min/max indices and adds the number of
16217// elements required to ensure each element comes from the respective
16218// position in the valid lane.
16219// On little endian, that's just the corresponding element in the other
16220// half of the vector. On big endian, it is in the same half but right
16221// justified rather than left justified in that half.
16222static void fixupShuffleMaskForPermutedSToV(
16223 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16224 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16225 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16226 int LHSEltFixup =
16227 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16228 int RHSEltFixup =
16229 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16230 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16231 int Idx = ShuffV[I];
16232 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16233 ShuffV[I] += LHSEltFixup;
16234 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16235 ShuffV[I] += RHSEltFixup;
16236 }
16237}
16238
16239// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16240// the original is:
16241// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16242// In such a case, just change the shuffle mask to extract the element
16243// from the permuted index.
16244static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
16245 const PPCSubtarget &Subtarget) {
16246 SDLoc dl(OrigSToV);
16247 EVT VT = OrigSToV.getValueType();
16248 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16249 "Expecting a SCALAR_TO_VECTOR here");
16250 SDValue Input = OrigSToV.getOperand(i: 0);
16251
16252 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16253 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Val: Input.getOperand(i: 1));
16254 SDValue OrigVector = Input.getOperand(i: 0);
16255
16256 // Can't handle non-const element indices or different vector types
16257 // for the input to the extract and the output of the scalar_to_vector.
16258 if (Idx && VT == OrigVector.getValueType()) {
16259 unsigned NumElts = VT.getVectorNumElements();
16260 assert(
16261 NumElts > 1 &&
16262 "Cannot produce a permuted scalar_to_vector for one element vector");
16263 SmallVector<int, 16> NewMask(NumElts, -1);
16264 unsigned ResultInElt = NumElts / 2;
16265 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16266 NewMask[ResultInElt] = Idx->getZExtValue();
16267 return DAG.getVectorShuffle(VT, dl, N1: OrigVector, N2: OrigVector, Mask: NewMask);
16268 }
16269 }
16270 return DAG.getNode(Opcode: PPCISD::SCALAR_TO_VECTOR_PERMUTED, DL: dl, VT,
16271 Operand: OrigSToV.getOperand(i: 0));
16272}
16273
16274static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
16275 int HalfVec, int LHSLastElementDefined,
16276 int RHSLastElementDefined) {
16277 for (int Index : ShuffV) {
16278 if (Index < 0) // Skip explicitly undefined mask indices.
16279 continue;
16280 // Handle first input vector of the vector_shuffle.
16281 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16282 (Index > LHSLastElementDefined))
16283 return false;
16284 // Handle second input vector of the vector_shuffle.
16285 if ((RHSLastElementDefined >= 0) &&
16286 (Index > HalfVec + RHSLastElementDefined))
16287 return false;
16288 }
16289 return true;
16290}
16291
16292static SDValue generateSToVPermutedForVecShuffle(
16293 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16294 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16295 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16296 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16297 // Set up the values for the shuffle vector fixup.
16298 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16299 // The last element depends on if the input comes from the LHS or RHS.
16300 //
16301 // For example:
16302 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16303 //
16304 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16305 // because elements 1 and higher of a scalar_to_vector are undefined.
16306 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16307 // because elements 1 and higher of a scalar_to_vector are undefined.
16308 // It is also not 4 because the original scalar_to_vector is wider and
16309 // actually contains two i32 elements.
16310 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16311 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16312 : FirstElt;
16313 SDValue SToVPermuted = getSToVPermuted(OrigSToV: SToVNode, DAG, Subtarget);
16314 if (SToVPermuted.getValueType() != VecShuffOperandType)
16315 SToVPermuted = DAG.getBitcast(VT: VecShuffOperandType, V: SToVPermuted);
16316 return SToVPermuted;
16317}
16318
16319// On little endian subtargets, combine shuffles such as:
16320// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16321// into:
16322// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16323// because the latter can be matched to a single instruction merge.
16324// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16325// to put the value into element zero. Adjust the shuffle mask so that the
16326// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16327// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16328// nodes with elements smaller than doubleword because all the ways
16329// of getting scalar data into a vector register put the value in the
16330// rightmost element of the left half of the vector.
16331SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16332 SelectionDAG &DAG) const {
16333 SDValue LHS = SVN->getOperand(Num: 0);
16334 SDValue RHS = SVN->getOperand(Num: 1);
16335 auto Mask = SVN->getMask();
16336 int NumElts = LHS.getValueType().getVectorNumElements();
16337 SDValue Res(SVN, 0);
16338 SDLoc dl(SVN);
16339 bool IsLittleEndian = Subtarget.isLittleEndian();
16340
16341 // On big endian targets this is only useful for subtargets with direct moves.
16342 // On little endian targets it would be useful for all subtargets with VSX.
16343 // However adding special handling for LE subtargets without direct moves
16344 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16345 // which includes direct moves.
16346 if (!Subtarget.hasDirectMove())
16347 return Res;
16348
16349 // If this is not a shuffle of a shuffle and the first element comes from
16350 // the second vector, canonicalize to the commuted form. This will make it
16351 // more likely to match one of the single instruction patterns.
16352 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16353 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16354 std::swap(a&: LHS, b&: RHS);
16355 Res = DAG.getCommutedVectorShuffle(SV: *SVN);
16356 Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
16357 }
16358
16359 // Adjust the shuffle mask if either input vector comes from a
16360 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16361 // form (to prevent the need for a swap).
16362 SmallVector<int, 16> ShuffV(Mask);
16363 SDValue SToVLHS = isScalarToVec(Op: LHS);
16364 SDValue SToVRHS = isScalarToVec(Op: RHS);
16365 if (SToVLHS || SToVRHS) {
16366 EVT VT = SVN->getValueType(ResNo: 0);
16367 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16368 int ShuffleNumElts = ShuffV.size();
16369 int HalfVec = ShuffleNumElts / 2;
16370 // The width of the "valid lane" (i.e. the lane that contains the value that
16371 // is vectorized) needs to be expressed in terms of the number of elements
16372 // of the shuffle. It is thereby the ratio of the values before and after
16373 // any bitcast, which will be set later on if the LHS or RHS are
16374 // SCALAR_TO_VECTOR nodes.
16375 unsigned LHSNumValidElts = HalfVec;
16376 unsigned RHSNumValidElts = HalfVec;
16377
16378 // Initially assume that neither input is permuted. These will be adjusted
16379 // accordingly if either input is. Note, that -1 means that all elements
16380 // are undefined.
16381 int LHSFirstElt = 0;
16382 int RHSFirstElt = ShuffleNumElts;
16383 int LHSLastElt = -1;
16384 int RHSLastElt = -1;
16385
16386 // Get the permuted scalar to vector nodes for the source(s) that come from
16387 // ISD::SCALAR_TO_VECTOR.
16388 // On big endian systems, this only makes sense for element sizes smaller
16389 // than 64 bits since for 64-bit elements, all instructions already put
16390 // the value into element zero. Since scalar size of LHS and RHS may differ
16391 // after isScalarToVec, this should be checked using their own sizes.
16392 int LHSScalarSize = 0;
16393 int RHSScalarSize = 0;
16394 if (SToVLHS) {
16395 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16396 if (!IsLittleEndian && LHSScalarSize >= 64)
16397 return Res;
16398 }
16399 if (SToVRHS) {
16400 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16401 if (!IsLittleEndian && RHSScalarSize >= 64)
16402 return Res;
16403 }
16404 if (LHSScalarSize != 0)
16405 LHS = generateSToVPermutedForVecShuffle(
16406 ScalarSize: LHSScalarSize, ShuffleEltWidth, NumValidElts&: LHSNumValidElts, FirstElt: LHSFirstElt,
16407 LastElt&: LHSLastElt, VecShuffOperand: LHS, SToVNode: SToVLHS, DAG, Subtarget);
16408 if (RHSScalarSize != 0)
16409 RHS = generateSToVPermutedForVecShuffle(
16410 ScalarSize: RHSScalarSize, ShuffleEltWidth, NumValidElts&: RHSNumValidElts, FirstElt: RHSFirstElt,
16411 LastElt&: RHSLastElt, VecShuffOperand: RHS, SToVNode: SToVRHS, DAG, Subtarget);
16412
16413 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElementDefined: LHSLastElt, RHSLastElementDefined: RHSLastElt))
16414 return Res;
16415
16416 // Fix up the shuffle mask to reflect where the desired element actually is.
16417 // The minimum and maximum indices that correspond to element zero for both
16418 // the LHS and RHS are computed and will control which shuffle mask entries
16419 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16420 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16421 fixupShuffleMaskForPermutedSToV(
16422 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16423 LHSNumValidElts, RHSNumValidElts, Subtarget);
16424 Res = DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: 0), dl, N1: LHS, N2: RHS, Mask: ShuffV);
16425
16426 // We may have simplified away the shuffle. We won't be able to do anything
16427 // further with it here.
16428 if (!isa<ShuffleVectorSDNode>(Val: Res))
16429 return Res;
16430 Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
16431 }
16432
16433 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16434 // The common case after we commuted the shuffle is that the RHS is a splat
16435 // and we have elements coming in from the splat at indices that are not
16436 // conducive to using a merge.
16437 // Example:
16438 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16439 if (!isSplatBV(Op: TheSplat))
16440 return Res;
16441
16442 // We are looking for a mask such that all even elements are from
16443 // one vector and all odd elements from the other.
16444 if (!isAlternatingShuffMask(Mask, NumElts))
16445 return Res;
16446
16447 // Adjust the mask so we are pulling in the same index from the splat
16448 // as the index from the interesting vector in consecutive elements.
16449 if (IsLittleEndian) {
16450 // Example (even elements from first vector):
16451 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16452 if (Mask[0] < NumElts)
16453 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16454 if (ShuffV[i] < 0)
16455 continue;
16456 // If element from non-splat is undef, pick first element from splat.
16457 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16458 }
16459 // Example (odd elements from first vector):
16460 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16461 else
16462 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16463 if (ShuffV[i] < 0)
16464 continue;
16465 // If element from non-splat is undef, pick first element from splat.
16466 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16467 }
16468 } else {
16469 // Example (even elements from first vector):
16470 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16471 if (Mask[0] < NumElts)
16472 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16473 if (ShuffV[i] < 0)
16474 continue;
16475 // If element from non-splat is undef, pick first element from splat.
16476 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16477 }
16478 // Example (odd elements from first vector):
16479 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16480 else
16481 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16482 if (ShuffV[i] < 0)
16483 continue;
16484 // If element from non-splat is undef, pick first element from splat.
16485 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16486 }
16487 }
16488
16489 // If the RHS has undefs, we need to remove them since we may have created
16490 // a shuffle that adds those instead of the splat value.
16491 SDValue SplatVal =
16492 cast<BuildVectorSDNode>(Val: TheSplat.getNode())->getSplatValue();
16493 TheSplat = DAG.getSplatBuildVector(VT: TheSplat.getValueType(), DL: dl, Op: SplatVal);
16494
16495 if (IsLittleEndian)
16496 RHS = TheSplat;
16497 else
16498 LHS = TheSplat;
16499 return DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: 0), dl, N1: LHS, N2: RHS, Mask: ShuffV);
16500}
16501
16502SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16503 LSBaseSDNode *LSBase,
16504 DAGCombinerInfo &DCI) const {
16505 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16506 "Not a reverse memop pattern!");
16507
16508 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16509 auto Mask = SVN->getMask();
16510 int i = 0;
16511 auto I = Mask.rbegin();
16512 auto E = Mask.rend();
16513
16514 for (; I != E; ++I) {
16515 if (*I != i)
16516 return false;
16517 i++;
16518 }
16519 return true;
16520 };
16521
16522 SelectionDAG &DAG = DCI.DAG;
16523 EVT VT = SVN->getValueType(ResNo: 0);
16524
16525 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16526 return SDValue();
16527
16528 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16529 // See comment in PPCVSXSwapRemoval.cpp.
16530 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16531 if (!Subtarget.hasP9Vector())
16532 return SDValue();
16533
16534 if(!IsElementReverse(SVN))
16535 return SDValue();
16536
16537 if (LSBase->getOpcode() == ISD::LOAD) {
16538 // If the load return value 0 has more than one user except the
16539 // shufflevector instruction, it is not profitable to replace the
16540 // shufflevector with a reverse load.
16541 for (SDUse &Use : LSBase->uses())
16542 if (Use.getResNo() == 0 &&
16543 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16544 return SDValue();
16545
16546 SDLoc dl(LSBase);
16547 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16548 return DAG.getMemIntrinsicNode(
16549 Opcode: PPCISD::LOAD_VEC_BE, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops: LoadOps,
16550 MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
16551 }
16552
16553 if (LSBase->getOpcode() == ISD::STORE) {
16554 // If there are other uses of the shuffle, the swap cannot be avoided.
16555 // Forcing the use of an X-Form (since swapped stores only have
16556 // X-Forms) without removing the swap is unprofitable.
16557 if (!SVN->hasOneUse())
16558 return SDValue();
16559
16560 SDLoc dl(LSBase);
16561 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(Num: 0),
16562 LSBase->getBasePtr()};
16563 return DAG.getMemIntrinsicNode(
16564 Opcode: PPCISD::STORE_VEC_BE, dl, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps,
16565 MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
16566 }
16567
16568 llvm_unreachable("Expected a load or store node here");
16569}
16570
16571static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16572 unsigned IntrinsicID = Intrin.getConstantOperandVal(i: 1);
16573 if (IntrinsicID == Intrinsic::ppc_stdcx)
16574 StoreWidth = 8;
16575 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16576 StoreWidth = 4;
16577 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16578 StoreWidth = 2;
16579 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16580 StoreWidth = 1;
16581 else
16582 return false;
16583 return true;
16584}
16585
16586static SDValue DAGCombineAddc(SDNode *N,
16587 llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
16588 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(Value: 1)) {
16589 // (ADDC (ADDE 0, 0, C), -1) -> C
16590 SDValue LHS = N->getOperand(Num: 0);
16591 SDValue RHS = N->getOperand(Num: 1);
16592 if (LHS->getOpcode() == PPCISD::ADDE &&
16593 isNullConstant(V: LHS->getOperand(Num: 0)) &&
16594 isNullConstant(V: LHS->getOperand(Num: 1)) && isAllOnesConstant(V: RHS)) {
16595 return DCI.CombineTo(N, Res0: SDValue(N, 0), Res1: LHS->getOperand(Num: 2));
16596 }
16597 }
16598 return SDValue();
16599}
16600
16601SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
16602 DAGCombinerInfo &DCI) const {
16603 SelectionDAG &DAG = DCI.DAG;
16604 SDLoc dl(N);
16605 switch (N->getOpcode()) {
16606 default: break;
16607 case ISD::ADD:
16608 return combineADD(N, DCI);
16609 case ISD::AND: {
16610 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16611 // original input as that will prevent us from selecting optimal rotates.
16612 // This only matters if the input to the extend is i32 widened to i64.
16613 SDValue Op1 = N->getOperand(Num: 0);
16614 SDValue Op2 = N->getOperand(Num: 1);
16615 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16616 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16617 !isa<ConstantSDNode>(Val: Op2) || N->getValueType(ResNo: 0) != MVT::i64 ||
16618 Op1.getOperand(i: 0).getValueType() != MVT::i32)
16619 break;
16620 SDValue NarrowOp = Op1.getOperand(i: 0);
16621 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16622 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16623 break;
16624
16625 uint64_t Imm = Op2->getAsZExtVal();
16626 // Make sure that the constant is narrow enough to fit in the narrow type.
16627 if (!isUInt<32>(x: Imm))
16628 break;
16629 SDValue ConstOp = DAG.getConstant(Val: Imm, DL: dl, VT: MVT::i32);
16630 SDValue NarrowAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: NarrowOp, N2: ConstOp);
16631 return DAG.getZExtOrTrunc(Op: NarrowAnd, DL: dl, VT: N->getValueType(ResNo: 0));
16632 }
16633 case ISD::SHL:
16634 return combineSHL(N, DCI);
16635 case ISD::SRA:
16636 return combineSRA(N, DCI);
16637 case ISD::SRL:
16638 return combineSRL(N, DCI);
16639 case ISD::MUL:
16640 return combineMUL(N, DCI);
16641 case ISD::FMA:
16642 case PPCISD::FNMSUB:
16643 return combineFMALike(N, DCI);
16644 case PPCISD::SHL:
16645 if (isNullConstant(V: N->getOperand(Num: 0))) // 0 << V -> 0.
16646 return N->getOperand(Num: 0);
16647 break;
16648 case PPCISD::SRL:
16649 if (isNullConstant(V: N->getOperand(Num: 0))) // 0 >>u V -> 0.
16650 return N->getOperand(Num: 0);
16651 break;
16652 case PPCISD::SRA:
16653 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0))) {
16654 if (C->isZero() || // 0 >>s V -> 0.
16655 C->isAllOnes()) // -1 >>s V -> -1.
16656 return N->getOperand(Num: 0);
16657 }
16658 break;
16659 case ISD::SIGN_EXTEND:
16660 case ISD::ZERO_EXTEND:
16661 case ISD::ANY_EXTEND:
16662 return DAGCombineExtBoolTrunc(N, DCI);
16663 case ISD::TRUNCATE:
16664 return combineTRUNCATE(N, DCI);
16665 case ISD::SETCC:
16666 if (SDValue CSCC = combineSetCC(N, DCI))
16667 return CSCC;
16668 [[fallthrough]];
16669 case ISD::SELECT_CC:
16670 return DAGCombineTruncBoolExt(N, DCI);
16671 case ISD::SINT_TO_FP:
16672 case ISD::UINT_TO_FP:
16673 return combineFPToIntToFP(N, DCI);
16674 case ISD::VECTOR_SHUFFLE:
16675 if (ISD::isNormalLoad(N: N->getOperand(Num: 0).getNode())) {
16676 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(Val: N->getOperand(Num: 0));
16677 return combineVReverseMemOP(SVN: cast<ShuffleVectorSDNode>(Val: N), LSBase, DCI);
16678 }
16679 return combineVectorShuffle(SVN: cast<ShuffleVectorSDNode>(Val: N), DAG&: DCI.DAG);
16680 case ISD::STORE: {
16681
16682 EVT Op1VT = N->getOperand(Num: 1).getValueType();
16683 unsigned Opcode = N->getOperand(Num: 1).getOpcode();
16684
16685 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16686 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16687 SDValue Val = combineStoreFPToInt(N, DCI);
16688 if (Val)
16689 return Val;
16690 }
16691
16692 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16693 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: 1));
16694 SDValue Val= combineVReverseMemOP(SVN, LSBase: cast<LSBaseSDNode>(Val: N), DCI);
16695 if (Val)
16696 return Val;
16697 }
16698
16699 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16700 if (cast<StoreSDNode>(Val: N)->isUnindexed() && Opcode == ISD::BSWAP &&
16701 N->getOperand(Num: 1).getNode()->hasOneUse() &&
16702 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16703 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16704
16705 // STBRX can only handle simple types and it makes no sense to store less
16706 // two bytes in byte-reversed order.
16707 EVT mVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
16708 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16709 break;
16710
16711 SDValue BSwapOp = N->getOperand(Num: 1).getOperand(i: 0);
16712 // Do an any-extend to 32-bits if this is a half-word input.
16713 if (BSwapOp.getValueType() == MVT::i16)
16714 BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: BSwapOp);
16715
16716 // If the type of BSWAP operand is wider than stored memory width
16717 // it need to be shifted to the right side before STBRX.
16718 if (Op1VT.bitsGT(VT: mVT)) {
16719 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16720 BSwapOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: Op1VT, N1: BSwapOp,
16721 N2: DAG.getConstant(Val: Shift, DL: dl, VT: MVT::i32));
16722 // Need to truncate if this is a bswap of i64 stored as i32/i16.
16723 if (Op1VT == MVT::i64)
16724 BSwapOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: BSwapOp);
16725 }
16726
16727 SDValue Ops[] = {
16728 N->getOperand(Num: 0), BSwapOp, N->getOperand(Num: 2), DAG.getValueType(mVT)
16729 };
16730 return
16731 DAG.getMemIntrinsicNode(Opcode: PPCISD::STBRX, dl, VTList: DAG.getVTList(VT: MVT::Other),
16732 Ops, MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16733 MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16734 }
16735
16736 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16737 // So it can increase the chance of CSE constant construction.
16738 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16739 isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) && Op1VT == MVT::i32) {
16740 // Need to sign-extended to 64-bits to handle negative values.
16741 EVT MemVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
16742 uint64_t Val64 = SignExtend64(X: N->getConstantOperandVal(Num: 1),
16743 B: MemVT.getSizeInBits());
16744 SDValue Const64 = DAG.getConstant(Val: Val64, DL: dl, VT: MVT::i64);
16745
16746 auto *ST = cast<StoreSDNode>(Val: N);
16747 SDValue NewST = DAG.getStore(Chain: ST->getChain(), dl, Val: Const64,
16748 Ptr: ST->getBasePtr(), Offset: ST->getOffset(), SVT: MemVT,
16749 MMO: ST->getMemOperand(), AM: ST->getAddressingMode(),
16750 /*IsTruncating=*/true);
16751 // Note we use CombineTo here to prevent DAGCombiner from visiting the
16752 // new store which will change the constant by removing non-demanded bits.
16753 return ST->isUnindexed()
16754 ? DCI.CombineTo(N, Res: NewST, /*AddTo=*/false)
16755 : DCI.CombineTo(N, Res0: NewST, Res1: NewST.getValue(R: 1), /*AddTo=*/false);
16756 }
16757
16758 // For little endian, VSX stores require generating xxswapd/lxvd2x.
16759 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16760 if (Op1VT.isSimple()) {
16761 MVT StoreVT = Op1VT.getSimpleVT();
16762 if (Subtarget.needsSwapsForVSXMemOps() &&
16763 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16764 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16765 return expandVSXStoreForLE(N, DCI);
16766 }
16767 break;
16768 }
16769 case ISD::LOAD: {
16770 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16771 EVT VT = LD->getValueType(ResNo: 0);
16772
16773 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16774 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16775 if (VT.isSimple()) {
16776 MVT LoadVT = VT.getSimpleVT();
16777 if (Subtarget.needsSwapsForVSXMemOps() &&
16778 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16779 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16780 return expandVSXLoadForLE(N, DCI);
16781 }
16782
16783 // We sometimes end up with a 64-bit integer load, from which we extract
16784 // two single-precision floating-point numbers. This happens with
16785 // std::complex<float>, and other similar structures, because of the way we
16786 // canonicalize structure copies. However, if we lack direct moves,
16787 // then the final bitcasts from the extracted integer values to the
16788 // floating-point numbers turn into store/load pairs. Even with direct moves,
16789 // just loading the two floating-point numbers is likely better.
16790 auto ReplaceTwoFloatLoad = [&]() {
16791 if (VT != MVT::i64)
16792 return false;
16793
16794 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16795 LD->isVolatile())
16796 return false;
16797
16798 // We're looking for a sequence like this:
16799 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16800 // t16: i64 = srl t13, Constant:i32<32>
16801 // t17: i32 = truncate t16
16802 // t18: f32 = bitcast t17
16803 // t19: i32 = truncate t13
16804 // t20: f32 = bitcast t19
16805
16806 if (!LD->hasNUsesOfValue(NUses: 2, Value: 0))
16807 return false;
16808
16809 auto UI = LD->user_begin();
16810 while (UI.getUse().getResNo() != 0) ++UI;
16811 SDNode *Trunc = *UI++;
16812 while (UI.getUse().getResNo() != 0) ++UI;
16813 SDNode *RightShift = *UI;
16814 if (Trunc->getOpcode() != ISD::TRUNCATE)
16815 std::swap(a&: Trunc, b&: RightShift);
16816
16817 if (Trunc->getOpcode() != ISD::TRUNCATE ||
16818 Trunc->getValueType(ResNo: 0) != MVT::i32 ||
16819 !Trunc->hasOneUse())
16820 return false;
16821 if (RightShift->getOpcode() != ISD::SRL ||
16822 !isa<ConstantSDNode>(Val: RightShift->getOperand(Num: 1)) ||
16823 RightShift->getConstantOperandVal(Num: 1) != 32 ||
16824 !RightShift->hasOneUse())
16825 return false;
16826
16827 SDNode *Trunc2 = *RightShift->user_begin();
16828 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16829 Trunc2->getValueType(ResNo: 0) != MVT::i32 ||
16830 !Trunc2->hasOneUse())
16831 return false;
16832
16833 SDNode *Bitcast = *Trunc->user_begin();
16834 SDNode *Bitcast2 = *Trunc2->user_begin();
16835
16836 if (Bitcast->getOpcode() != ISD::BITCAST ||
16837 Bitcast->getValueType(ResNo: 0) != MVT::f32)
16838 return false;
16839 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16840 Bitcast2->getValueType(ResNo: 0) != MVT::f32)
16841 return false;
16842
16843 if (Subtarget.isLittleEndian())
16844 std::swap(a&: Bitcast, b&: Bitcast2);
16845
16846 // Bitcast has the second float (in memory-layout order) and Bitcast2
16847 // has the first one.
16848
16849 SDValue BasePtr = LD->getBasePtr();
16850 if (LD->isIndexed()) {
16851 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16852 "Non-pre-inc AM on PPC?");
16853 BasePtr =
16854 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
16855 N2: LD->getOffset());
16856 }
16857
16858 auto MMOFlags =
16859 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16860 SDValue FloatLoad = DAG.getLoad(VT: MVT::f32, dl, Chain: LD->getChain(), Ptr: BasePtr,
16861 PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign(),
16862 MMOFlags, AAInfo: LD->getAAInfo());
16863 SDValue AddPtr =
16864 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(),
16865 N1: BasePtr, N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
16866 SDValue FloatLoad2 = DAG.getLoad(
16867 VT: MVT::f32, dl, Chain: SDValue(FloatLoad.getNode(), 1), Ptr: AddPtr,
16868 PtrInfo: LD->getPointerInfo().getWithOffset(O: 4),
16869 Alignment: commonAlignment(A: LD->getAlign(), Offset: 4), MMOFlags, AAInfo: LD->getAAInfo());
16870
16871 if (LD->isIndexed()) {
16872 // Note that DAGCombine should re-form any pre-increment load(s) from
16873 // what is produced here if that makes sense.
16874 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, 1), To: BasePtr);
16875 }
16876
16877 DCI.CombineTo(N: Bitcast2, Res: FloatLoad);
16878 DCI.CombineTo(N: Bitcast, Res: FloatLoad2);
16879
16880 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, LD->isIndexed() ? 2 : 1),
16881 To: SDValue(FloatLoad2.getNode(), 1));
16882 return true;
16883 };
16884
16885 if (ReplaceTwoFloatLoad())
16886 return SDValue(N, 0);
16887
16888 EVT MemVT = LD->getMemoryVT();
16889 Type *Ty = MemVT.getTypeForEVT(Context&: *DAG.getContext());
16890 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16891 if (LD->isUnindexed() && VT.isVector() &&
16892 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16893 // P8 and later hardware should just use LOAD.
16894 !Subtarget.hasP8Vector() &&
16895 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16896 VT == MVT::v4f32))) &&
16897 LD->getAlign() < ABIAlignment) {
16898 // This is a type-legal unaligned Altivec load.
16899 SDValue Chain = LD->getChain();
16900 SDValue Ptr = LD->getBasePtr();
16901 bool isLittleEndian = Subtarget.isLittleEndian();
16902
16903 // This implements the loading of unaligned vectors as described in
16904 // the venerable Apple Velocity Engine overview. Specifically:
16905 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16906 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16907 //
16908 // The general idea is to expand a sequence of one or more unaligned
16909 // loads into an alignment-based permutation-control instruction (lvsl
16910 // or lvsr), a series of regular vector loads (which always truncate
16911 // their input address to an aligned address), and a series of
16912 // permutations. The results of these permutations are the requested
16913 // loaded values. The trick is that the last "extra" load is not taken
16914 // from the address you might suspect (sizeof(vector) bytes after the
16915 // last requested load), but rather sizeof(vector) - 1 bytes after the
16916 // last requested vector. The point of this is to avoid a page fault if
16917 // the base address happened to be aligned. This works because if the
16918 // base address is aligned, then adding less than a full vector length
16919 // will cause the last vector in the sequence to be (re)loaded.
16920 // Otherwise, the next vector will be fetched as you might suspect was
16921 // necessary.
16922
16923 // We might be able to reuse the permutation generation from
16924 // a different base address offset from this one by an aligned amount.
16925 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16926 // optimization later.
16927 Intrinsic::ID Intr, IntrLD, IntrPerm;
16928 MVT PermCntlTy, PermTy, LDTy;
16929 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16930 : Intrinsic::ppc_altivec_lvsl;
16931 IntrLD = Intrinsic::ppc_altivec_lvx;
16932 IntrPerm = Intrinsic::ppc_altivec_vperm;
16933 PermCntlTy = MVT::v16i8;
16934 PermTy = MVT::v4i32;
16935 LDTy = MVT::v4i32;
16936
16937 SDValue PermCntl = BuildIntrinsicOp(IID: Intr, Op: Ptr, DAG, dl, DestVT: PermCntlTy);
16938
16939 // Create the new MMO for the new base load. It is like the original MMO,
16940 // but represents an area in memory almost twice the vector size centered
16941 // on the original address. If the address is unaligned, we might start
16942 // reading up to (sizeof(vector)-1) bytes below the address of the
16943 // original unaligned load.
16944 MachineFunction &MF = DAG.getMachineFunction();
16945 MachineMemOperand *BaseMMO =
16946 MF.getMachineMemOperand(MMO: LD->getMemOperand(),
16947 Offset: -(int64_t)MemVT.getStoreSize()+1,
16948 Size: 2*MemVT.getStoreSize()-1);
16949
16950 // Create the new base load.
16951 SDValue LDXIntID =
16952 DAG.getTargetConstant(Val: IntrLD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
16953 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16954 SDValue BaseLoad =
16955 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
16956 VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
16957 Ops: BaseLoadOps, MemVT: LDTy, MMO: BaseMMO);
16958
16959 // Note that the value of IncOffset (which is provided to the next
16960 // load's pointer info offset value, and thus used to calculate the
16961 // alignment), and the value of IncValue (which is actually used to
16962 // increment the pointer value) are different! This is because we
16963 // require the next load to appear to be aligned, even though it
16964 // is actually offset from the base pointer by a lesser amount.
16965 int IncOffset = VT.getSizeInBits() / 8;
16966 int IncValue = IncOffset;
16967
16968 // Walk (both up and down) the chain looking for another load at the real
16969 // (aligned) offset (the alignment of the other load does not matter in
16970 // this case). If found, then do not use the offset reduction trick, as
16971 // that will prevent the loads from being later combined (as they would
16972 // otherwise be duplicates).
16973 if (!findConsecutiveLoad(LD, DAG))
16974 --IncValue;
16975
16976 SDValue Increment =
16977 DAG.getConstant(Val: IncValue, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
16978 Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Ptr.getValueType(), N1: Ptr, N2: Increment);
16979
16980 MachineMemOperand *ExtraMMO =
16981 MF.getMachineMemOperand(MMO: LD->getMemOperand(),
16982 Offset: 1, Size: 2*MemVT.getStoreSize()-1);
16983 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16984 SDValue ExtraLoad =
16985 DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
16986 VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
16987 Ops: ExtraLoadOps, MemVT: LDTy, MMO: ExtraMMO);
16988
16989 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
16990 N1: BaseLoad.getValue(R: 1), N2: ExtraLoad.getValue(R: 1));
16991
16992 // Because vperm has a big-endian bias, we must reverse the order
16993 // of the input vectors and complement the permute control vector
16994 // when generating little endian code. We have already handled the
16995 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16996 // and ExtraLoad here.
16997 SDValue Perm;
16998 if (isLittleEndian)
16999 Perm = BuildIntrinsicOp(IID: IntrPerm,
17000 Op0: ExtraLoad, Op1: BaseLoad, Op2: PermCntl, DAG, dl);
17001 else
17002 Perm = BuildIntrinsicOp(IID: IntrPerm,
17003 Op0: BaseLoad, Op1: ExtraLoad, Op2: PermCntl, DAG, dl);
17004
17005 if (VT != PermTy)
17006 Perm = Subtarget.hasAltivec()
17007 ? DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Perm)
17008 : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Perm,
17009 N2: DAG.getTargetConstant(Val: 1, DL: dl, VT: MVT::i64));
17010 // second argument is 1 because this rounding
17011 // is always exact.
17012
17013 // The output of the permutation is our loaded result, the TokenFactor is
17014 // our new chain.
17015 DCI.CombineTo(N, Res0: Perm, Res1: TF);
17016 return SDValue(N, 0);
17017 }
17018 }
17019 break;
17020 case ISD::INTRINSIC_WO_CHAIN: {
17021 bool isLittleEndian = Subtarget.isLittleEndian();
17022 unsigned IID = N->getConstantOperandVal(Num: 0);
17023 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17024 : Intrinsic::ppc_altivec_lvsl);
17025 if (IID == Intr && N->getOperand(Num: 1)->getOpcode() == ISD::ADD) {
17026 SDValue Add = N->getOperand(Num: 1);
17027
17028 int Bits = 4 /* 16 byte alignment */;
17029
17030 if (DAG.MaskedValueIsZero(Op: Add->getOperand(Num: 1),
17031 Mask: APInt::getAllOnes(numBits: Bits /* alignment */)
17032 .zext(width: Add.getScalarValueSizeInBits()))) {
17033 SDNode *BasePtr = Add->getOperand(Num: 0).getNode();
17034 for (SDNode *U : BasePtr->users()) {
17035 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17036 U->getConstantOperandVal(Num: 0) == IID) {
17037 // We've found another LVSL/LVSR, and this address is an aligned
17038 // multiple of that one. The results will be the same, so use the
17039 // one we've just found instead.
17040
17041 return SDValue(U, 0);
17042 }
17043 }
17044 }
17045
17046 if (isa<ConstantSDNode>(Val: Add->getOperand(Num: 1))) {
17047 SDNode *BasePtr = Add->getOperand(Num: 0).getNode();
17048 for (SDNode *U : BasePtr->users()) {
17049 if (U->getOpcode() == ISD::ADD &&
17050 isa<ConstantSDNode>(Val: U->getOperand(Num: 1)) &&
17051 (Add->getConstantOperandVal(Num: 1) - U->getConstantOperandVal(Num: 1)) %
17052 (1ULL << Bits) ==
17053 0) {
17054 SDNode *OtherAdd = U;
17055 for (SDNode *V : OtherAdd->users()) {
17056 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17057 V->getConstantOperandVal(Num: 0) == IID) {
17058 return SDValue(V, 0);
17059 }
17060 }
17061 }
17062 }
17063 }
17064 }
17065
17066 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17067 // Expose the vabsduw/h/b opportunity for down stream
17068 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17069 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17070 IID == Intrinsic::ppc_altivec_vmaxsh ||
17071 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17072 SDValue V1 = N->getOperand(Num: 1);
17073 SDValue V2 = N->getOperand(Num: 2);
17074 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17075 V1.getSimpleValueType() == MVT::v8i16 ||
17076 V1.getSimpleValueType() == MVT::v16i8) &&
17077 V1.getSimpleValueType() == V2.getSimpleValueType()) {
17078 // (0-a, a)
17079 if (V1.getOpcode() == ISD::SUB &&
17080 ISD::isBuildVectorAllZeros(N: V1.getOperand(i: 0).getNode()) &&
17081 V1.getOperand(i: 1) == V2) {
17082 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V2.getValueType(), Operand: V2);
17083 }
17084 // (a, 0-a)
17085 if (V2.getOpcode() == ISD::SUB &&
17086 ISD::isBuildVectorAllZeros(N: V2.getOperand(i: 0).getNode()) &&
17087 V2.getOperand(i: 1) == V1) {
17088 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17089 }
17090 // (x-y, y-x)
17091 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17092 V1.getOperand(i: 0) == V2.getOperand(i: 1) &&
17093 V1.getOperand(i: 1) == V2.getOperand(i: 0)) {
17094 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17095 }
17096 }
17097 }
17098 }
17099
17100 break;
17101 case ISD::INTRINSIC_W_CHAIN:
17102 switch (N->getConstantOperandVal(Num: 1)) {
17103 default:
17104 break;
17105 case Intrinsic::ppc_altivec_vsum4sbs:
17106 case Intrinsic::ppc_altivec_vsum4shs:
17107 case Intrinsic::ppc_altivec_vsum4ubs: {
17108 // These sum-across intrinsics only have a chain due to the side effect
17109 // that they may set the SAT bit. If we know the SAT bit will not be set
17110 // for some inputs, we can replace any uses of their chain with the
17111 // input chain.
17112 if (BuildVectorSDNode *BVN =
17113 dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 3))) {
17114 APInt APSplatBits, APSplatUndef;
17115 unsigned SplatBitSize;
17116 bool HasAnyUndefs;
17117 bool BVNIsConstantSplat = BVN->isConstantSplat(
17118 SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 0,
17119 isBigEndian: !Subtarget.isLittleEndian());
17120 // If the constant splat vector is 0, the SAT bit will not be set.
17121 if (BVNIsConstantSplat && APSplatBits == 0)
17122 DAG.ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: N->getOperand(Num: 0));
17123 }
17124 return SDValue();
17125 }
17126 case Intrinsic::ppc_vsx_lxvw4x:
17127 case Intrinsic::ppc_vsx_lxvd2x:
17128 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17129 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17130 if (Subtarget.needsSwapsForVSXMemOps())
17131 return expandVSXLoadForLE(N, DCI);
17132 break;
17133 }
17134 break;
17135 case ISD::INTRINSIC_VOID:
17136 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17137 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17138 if (Subtarget.needsSwapsForVSXMemOps()) {
17139 switch (N->getConstantOperandVal(Num: 1)) {
17140 default:
17141 break;
17142 case Intrinsic::ppc_vsx_stxvw4x:
17143 case Intrinsic::ppc_vsx_stxvd2x:
17144 return expandVSXStoreForLE(N, DCI);
17145 }
17146 }
17147 break;
17148 case ISD::BSWAP: {
17149 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17150 // For subtargets without LDBRX, we can still do better than the default
17151 // expansion even for 64-bit BSWAP (LOAD).
17152 bool Is64BitBswapOn64BitTgt =
17153 Subtarget.isPPC64() && N->getValueType(ResNo: 0) == MVT::i64;
17154 bool IsSingleUseNormalLd = ISD::isNormalLoad(N: N->getOperand(Num: 0).getNode()) &&
17155 N->getOperand(Num: 0).hasOneUse();
17156 if (IsSingleUseNormalLd &&
17157 (N->getValueType(ResNo: 0) == MVT::i32 || N->getValueType(ResNo: 0) == MVT::i16 ||
17158 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17159 SDValue Load = N->getOperand(Num: 0);
17160 LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
17161 // Create the byte-swapping load.
17162 SDValue Ops[] = {
17163 LD->getChain(), // Chain
17164 LD->getBasePtr(), // Ptr
17165 DAG.getValueType(N->getValueType(ResNo: 0)) // VT
17166 };
17167 SDValue BSLoad =
17168 DAG.getMemIntrinsicNode(Opcode: PPCISD::LBRX, dl,
17169 VTList: DAG.getVTList(VT1: N->getValueType(ResNo: 0) == MVT::i64 ?
17170 MVT::i64 : MVT::i32, VT2: MVT::Other),
17171 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
17172
17173 // If this is an i16 load, insert the truncate.
17174 SDValue ResVal = BSLoad;
17175 if (N->getValueType(ResNo: 0) == MVT::i16)
17176 ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i16, Operand: BSLoad);
17177
17178 // First, combine the bswap away. This makes the value produced by the
17179 // load dead.
17180 DCI.CombineTo(N, Res: ResVal);
17181
17182 // Next, combine the load away, we give it a bogus result value but a real
17183 // chain result. The result value is dead because the bswap is dead.
17184 DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: 1));
17185
17186 // Return N so it doesn't get rechecked!
17187 return SDValue(N, 0);
17188 }
17189 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17190 // before legalization so that the BUILD_PAIR is handled correctly.
17191 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17192 !IsSingleUseNormalLd)
17193 return SDValue();
17194 LoadSDNode *LD = cast<LoadSDNode>(Val: N->getOperand(Num: 0));
17195
17196 // Can't split volatile or atomic loads.
17197 if (!LD->isSimple())
17198 return SDValue();
17199 SDValue BasePtr = LD->getBasePtr();
17200 SDValue Lo = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr,
17201 PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign());
17202 Lo = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Lo);
17203 BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
17204 N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
17205 MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
17206 MMO: LD->getMemOperand(), Offset: 4, Size: 4);
17207 SDValue Hi = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr, MMO: NewMMO);
17208 Hi = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Hi);
17209 SDValue Res;
17210 if (Subtarget.isLittleEndian())
17211 Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Hi, N2: Lo);
17212 else
17213 Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
17214 SDValue TF =
17215 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
17216 N1: Hi.getOperand(i: 0).getValue(R: 1), N2: Lo.getOperand(i: 0).getValue(R: 1));
17217 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LD, 1), To: TF);
17218 return Res;
17219 }
17220 case PPCISD::VCMP:
17221 // If a VCMP_rec node already exists with exactly the same operands as this
17222 // node, use its result instead of this node (VCMP_rec computes both a CR6
17223 // and a normal output).
17224 //
17225 if (!N->getOperand(Num: 0).hasOneUse() &&
17226 !N->getOperand(Num: 1).hasOneUse() &&
17227 !N->getOperand(Num: 2).hasOneUse()) {
17228
17229 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17230 SDNode *VCMPrecNode = nullptr;
17231
17232 SDNode *LHSN = N->getOperand(Num: 0).getNode();
17233 for (SDNode *User : LHSN->users())
17234 if (User->getOpcode() == PPCISD::VCMP_rec &&
17235 User->getOperand(Num: 1) == N->getOperand(Num: 1) &&
17236 User->getOperand(Num: 2) == N->getOperand(Num: 2) &&
17237 User->getOperand(Num: 0) == N->getOperand(Num: 0)) {
17238 VCMPrecNode = User;
17239 break;
17240 }
17241
17242 // If there is no VCMP_rec node, or if the flag value has a single use,
17243 // don't transform this.
17244 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(NUses: 0, Value: 1))
17245 break;
17246
17247 // Look at the (necessarily single) use of the flag value. If it has a
17248 // chain, this transformation is more complex. Note that multiple things
17249 // could use the value result, which we should ignore.
17250 SDNode *FlagUser = nullptr;
17251 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17252 FlagUser == nullptr; ++UI) {
17253 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17254 SDNode *User = UI->getUser();
17255 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17256 if (User->getOperand(Num: i) == SDValue(VCMPrecNode, 1)) {
17257 FlagUser = User;
17258 break;
17259 }
17260 }
17261 }
17262
17263 // If the user is a MFOCRF instruction, we know this is safe.
17264 // Otherwise we give up for right now.
17265 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17266 return SDValue(VCMPrecNode, 0);
17267 }
17268 break;
17269 case ISD::BR_CC: {
17270 // If this is a branch on an altivec predicate comparison, lower this so
17271 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17272 // lowering is done pre-legalize, because the legalizer lowers the predicate
17273 // compare down to code that is difficult to reassemble.
17274 // This code also handles branches that depend on the result of a store
17275 // conditional.
17276 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 1))->get();
17277 SDValue LHS = N->getOperand(Num: 2), RHS = N->getOperand(Num: 3);
17278
17279 int CompareOpc;
17280 bool isDot;
17281
17282 if (!isa<ConstantSDNode>(Val: RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17283 break;
17284
17285 // Since we are doing this pre-legalize, the RHS can be a constant of
17286 // arbitrary bitwidth which may cause issues when trying to get the value
17287 // from the underlying APInt.
17288 auto RHSAPInt = RHS->getAsAPIntVal();
17289 if (!RHSAPInt.isIntN(N: 64))
17290 break;
17291
17292 unsigned Val = RHSAPInt.getZExtValue();
17293 auto isImpossibleCompare = [&]() {
17294 // If this is a comparison against something other than 0/1, then we know
17295 // that the condition is never/always true.
17296 if (Val != 0 && Val != 1) {
17297 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17298 return N->getOperand(Num: 0);
17299 // Always !=, turn it into an unconditional branch.
17300 return DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other,
17301 N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 4));
17302 }
17303 return SDValue();
17304 };
17305 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17306 unsigned StoreWidth = 0;
17307 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17308 isStoreConditional(Intrin: LHS, StoreWidth)) {
17309 if (SDValue Impossible = isImpossibleCompare())
17310 return Impossible;
17311 PPC::Predicate CompOpc;
17312 // eq 0 => ne
17313 // ne 0 => eq
17314 // eq 1 => eq
17315 // ne 1 => ne
17316 if (Val == 0)
17317 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17318 else
17319 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17320
17321 SDValue Ops[] = {LHS.getOperand(i: 0), LHS.getOperand(i: 2), LHS.getOperand(i: 3),
17322 DAG.getConstant(Val: StoreWidth, DL: dl, VT: MVT::i32)};
17323 auto *MemNode = cast<MemSDNode>(Val&: LHS);
17324 SDValue ConstSt = DAG.getMemIntrinsicNode(
17325 Opcode: PPCISD::STORE_COND, dl,
17326 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other, VT3: MVT::Glue), Ops,
17327 MemVT: MemNode->getMemoryVT(), MMO: MemNode->getMemOperand());
17328
17329 SDValue InChain;
17330 // Unchain the branch from the original store conditional.
17331 if (N->getOperand(Num: 0) == LHS.getValue(R: 1))
17332 InChain = LHS.getOperand(i: 0);
17333 else if (N->getOperand(Num: 0).getOpcode() == ISD::TokenFactor) {
17334 SmallVector<SDValue, 4> InChains;
17335 SDValue InTF = N->getOperand(Num: 0);
17336 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17337 if (InTF.getOperand(i) != LHS.getValue(R: 1))
17338 InChains.push_back(Elt: InTF.getOperand(i));
17339 InChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: InChains);
17340 }
17341
17342 return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: InChain,
17343 N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
17344 N3: DAG.getRegister(Reg: PPC::CR0, VT: MVT::i32), N4: N->getOperand(Num: 4),
17345 N5: ConstSt.getValue(R: 2));
17346 }
17347
17348 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17349 getVectorCompareInfo(Intrin: LHS, CompareOpc, isDot, Subtarget)) {
17350 assert(isDot && "Can't compare against a vector result!");
17351
17352 if (SDValue Impossible = isImpossibleCompare())
17353 return Impossible;
17354
17355 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17356 // Create the PPCISD altivec 'dot' comparison node.
17357 SDValue Ops[] = {
17358 LHS.getOperand(i: 2), // LHS of compare
17359 LHS.getOperand(i: 3), // RHS of compare
17360 DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
17361 };
17362 EVT VTs[] = { LHS.getOperand(i: 2).getValueType(), MVT::Glue };
17363 SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
17364
17365 // Unpack the result based on how the target uses it.
17366 PPC::Predicate CompOpc;
17367 switch (LHS.getConstantOperandVal(i: 1)) {
17368 default: // Can't happen, don't crash on invalid number though.
17369 case 0: // Branch on the value of the EQ bit of CR6.
17370 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17371 break;
17372 case 1: // Branch on the inverted value of the EQ bit of CR6.
17373 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17374 break;
17375 case 2: // Branch on the value of the LT bit of CR6.
17376 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17377 break;
17378 case 3: // Branch on the inverted value of the LT bit of CR6.
17379 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17380 break;
17381 }
17382
17383 return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: N->getOperand(Num: 0),
17384 N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
17385 N3: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32),
17386 N4: N->getOperand(Num: 4), N5: CompNode.getValue(R: 1));
17387 }
17388 break;
17389 }
17390 case ISD::BUILD_VECTOR:
17391 return DAGCombineBuildVector(N, DCI);
17392 case PPCISD::ADDC:
17393 return DAGCombineAddc(N, DCI);
17394 }
17395
17396 return SDValue();
17397}
17398
17399SDValue
17400PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
17401 SelectionDAG &DAG,
17402 SmallVectorImpl<SDNode *> &Created) const {
17403 // fold (sdiv X, pow2)
17404 EVT VT = N->getValueType(ResNo: 0);
17405 if (VT == MVT::i64 && !Subtarget.isPPC64())
17406 return SDValue();
17407 if ((VT != MVT::i32 && VT != MVT::i64) ||
17408 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17409 return SDValue();
17410
17411 SDLoc DL(N);
17412 SDValue N0 = N->getOperand(Num: 0);
17413
17414 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17415 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17416 SDValue ShiftAmt = DAG.getConstant(Val: Lg2, DL, VT);
17417
17418 SDValue Op = DAG.getNode(Opcode: PPCISD::SRA_ADDZE, DL, VT, N1: N0, N2: ShiftAmt);
17419 Created.push_back(Elt: Op.getNode());
17420
17421 if (IsNegPow2) {
17422 Op = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
17423 Created.push_back(Elt: Op.getNode());
17424 }
17425
17426 return Op;
17427}
17428
17429//===----------------------------------------------------------------------===//
17430// Inline Assembly Support
17431//===----------------------------------------------------------------------===//
17432
17433void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
17434 KnownBits &Known,
17435 const APInt &DemandedElts,
17436 const SelectionDAG &DAG,
17437 unsigned Depth) const {
17438 Known.resetAll();
17439 switch (Op.getOpcode()) {
17440 default: break;
17441 case PPCISD::LBRX: {
17442 // lhbrx is known to have the top bits cleared out.
17443 if (cast<VTSDNode>(Val: Op.getOperand(i: 2))->getVT() == MVT::i16)
17444 Known.Zero = 0xFFFF0000;
17445 break;
17446 }
17447 case PPCISD::ADDE: {
17448 if (Op.getResNo() == 0) {
17449 // (0|1), _ = ADDE 0, 0, CARRY
17450 SDValue LHS = Op.getOperand(i: 0);
17451 SDValue RHS = Op.getOperand(i: 1);
17452 if (isNullConstant(V: LHS) && isNullConstant(V: RHS))
17453 Known.Zero = ~1ULL;
17454 }
17455 break;
17456 }
17457 case ISD::INTRINSIC_WO_CHAIN: {
17458 switch (Op.getConstantOperandVal(i: 0)) {
17459 default: break;
17460 case Intrinsic::ppc_altivec_vcmpbfp_p:
17461 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17462 case Intrinsic::ppc_altivec_vcmpequb_p:
17463 case Intrinsic::ppc_altivec_vcmpequh_p:
17464 case Intrinsic::ppc_altivec_vcmpequw_p:
17465 case Intrinsic::ppc_altivec_vcmpequd_p:
17466 case Intrinsic::ppc_altivec_vcmpequq_p:
17467 case Intrinsic::ppc_altivec_vcmpgefp_p:
17468 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17469 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17470 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17471 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17472 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17473 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17474 case Intrinsic::ppc_altivec_vcmpgtub_p:
17475 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17476 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17477 case Intrinsic::ppc_altivec_vcmpgtud_p:
17478 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17479 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17480 break;
17481 }
17482 break;
17483 }
17484 case ISD::INTRINSIC_W_CHAIN: {
17485 switch (Op.getConstantOperandVal(i: 1)) {
17486 default:
17487 break;
17488 case Intrinsic::ppc_load2r:
17489 // Top bits are cleared for load2r (which is the same as lhbrx).
17490 Known.Zero = 0xFFFF0000;
17491 break;
17492 }
17493 break;
17494 }
17495 }
17496}
17497
17498Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
17499 switch (Subtarget.getCPUDirective()) {
17500 default: break;
17501 case PPC::DIR_970:
17502 case PPC::DIR_PWR4:
17503 case PPC::DIR_PWR5:
17504 case PPC::DIR_PWR5X:
17505 case PPC::DIR_PWR6:
17506 case PPC::DIR_PWR6X:
17507 case PPC::DIR_PWR7:
17508 case PPC::DIR_PWR8:
17509 case PPC::DIR_PWR9:
17510 case PPC::DIR_PWR10:
17511 case PPC::DIR_PWR11:
17512 case PPC::DIR_PWR_FUTURE: {
17513 if (!ML)
17514 break;
17515
17516 if (!DisableInnermostLoopAlign32) {
17517 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17518 // so that we can decrease cache misses and branch-prediction misses.
17519 // Actual alignment of the loop will depend on the hotness check and other
17520 // logic in alignBlocks.
17521 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17522 return Align(32);
17523 }
17524
17525 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17526
17527 // For small loops (between 5 and 8 instructions), align to a 32-byte
17528 // boundary so that the entire loop fits in one instruction-cache line.
17529 uint64_t LoopSize = 0;
17530 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17531 for (const MachineInstr &J : **I) {
17532 LoopSize += TII->getInstSizeInBytes(MI: J);
17533 if (LoopSize > 32)
17534 break;
17535 }
17536
17537 if (LoopSize > 16 && LoopSize <= 32)
17538 return Align(32);
17539
17540 break;
17541 }
17542 }
17543
17544 return TargetLowering::getPrefLoopAlignment(ML);
17545}
17546
17547/// getConstraintType - Given a constraint, return the type of
17548/// constraint it is for this target.
17549PPCTargetLowering::ConstraintType
17550PPCTargetLowering::getConstraintType(StringRef Constraint) const {
17551 if (Constraint.size() == 1) {
17552 switch (Constraint[0]) {
17553 default: break;
17554 case 'b':
17555 case 'r':
17556 case 'f':
17557 case 'd':
17558 case 'v':
17559 case 'y':
17560 return C_RegisterClass;
17561 case 'Z':
17562 // FIXME: While Z does indicate a memory constraint, it specifically
17563 // indicates an r+r address (used in conjunction with the 'y' modifier
17564 // in the replacement string). Currently, we're forcing the base
17565 // register to be r0 in the asm printer (which is interpreted as zero)
17566 // and forming the complete address in the second register. This is
17567 // suboptimal.
17568 return C_Memory;
17569 }
17570 } else if (Constraint == "wc") { // individual CR bits.
17571 return C_RegisterClass;
17572 } else if (Constraint == "wa" || Constraint == "wd" ||
17573 Constraint == "wf" || Constraint == "ws" ||
17574 Constraint == "wi" || Constraint == "ww") {
17575 return C_RegisterClass; // VSX registers.
17576 }
17577 return TargetLowering::getConstraintType(Constraint);
17578}
17579
17580/// Examine constraint type and operand type and determine a weight value.
17581/// This object must already have been set up with the operand type
17582/// and the current alternative constraint selected.
17583TargetLowering::ConstraintWeight
17584PPCTargetLowering::getSingleConstraintMatchWeight(
17585 AsmOperandInfo &info, const char *constraint) const {
17586 ConstraintWeight weight = CW_Invalid;
17587 Value *CallOperandVal = info.CallOperandVal;
17588 // If we don't have a value, we can't do a match,
17589 // but allow it at the lowest weight.
17590 if (!CallOperandVal)
17591 return CW_Default;
17592 Type *type = CallOperandVal->getType();
17593
17594 // Look at the constraint type.
17595 if (StringRef(constraint) == "wc" && type->isIntegerTy(Bitwidth: 1))
17596 return CW_Register; // an individual CR bit.
17597 else if ((StringRef(constraint) == "wa" ||
17598 StringRef(constraint) == "wd" ||
17599 StringRef(constraint) == "wf") &&
17600 type->isVectorTy())
17601 return CW_Register;
17602 else if (StringRef(constraint) == "wi" && type->isIntegerTy(Bitwidth: 64))
17603 return CW_Register; // just hold 64-bit integers data.
17604 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17605 return CW_Register;
17606 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17607 return CW_Register;
17608
17609 switch (*constraint) {
17610 default:
17611 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
17612 break;
17613 case 'b':
17614 if (type->isIntegerTy())
17615 weight = CW_Register;
17616 break;
17617 case 'f':
17618 if (type->isFloatTy())
17619 weight = CW_Register;
17620 break;
17621 case 'd':
17622 if (type->isDoubleTy())
17623 weight = CW_Register;
17624 break;
17625 case 'v':
17626 if (type->isVectorTy())
17627 weight = CW_Register;
17628 break;
17629 case 'y':
17630 weight = CW_Register;
17631 break;
17632 case 'Z':
17633 weight = CW_Memory;
17634 break;
17635 }
17636 return weight;
17637}
17638
17639std::pair<unsigned, const TargetRegisterClass *>
17640PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
17641 StringRef Constraint,
17642 MVT VT) const {
17643 if (Constraint.size() == 1) {
17644 // GCC RS6000 Constraint Letters
17645 switch (Constraint[0]) {
17646 case 'b': // R1-R31
17647 if (VT == MVT::i64 && Subtarget.isPPC64())
17648 return std::make_pair(x: 0U, y: &PPC::G8RC_NOX0RegClass);
17649 return std::make_pair(x: 0U, y: &PPC::GPRC_NOR0RegClass);
17650 case 'r': // R0-R31
17651 if (VT == MVT::i64 && Subtarget.isPPC64())
17652 return std::make_pair(x: 0U, y: &PPC::G8RCRegClass);
17653 return std::make_pair(x: 0U, y: &PPC::GPRCRegClass);
17654 // 'd' and 'f' constraints are both defined to be "the floating point
17655 // registers", where one is for 32-bit and the other for 64-bit. We don't
17656 // really care overly much here so just give them all the same reg classes.
17657 case 'd':
17658 case 'f':
17659 if (Subtarget.hasSPE()) {
17660 if (VT == MVT::f32 || VT == MVT::i32)
17661 return std::make_pair(x: 0U, y: &PPC::GPRCRegClass);
17662 if (VT == MVT::f64 || VT == MVT::i64)
17663 return std::make_pair(x: 0U, y: &PPC::SPERCRegClass);
17664 } else {
17665 if (VT == MVT::f32 || VT == MVT::i32)
17666 return std::make_pair(x: 0U, y: &PPC::F4RCRegClass);
17667 if (VT == MVT::f64 || VT == MVT::i64)
17668 return std::make_pair(x: 0U, y: &PPC::F8RCRegClass);
17669 }
17670 break;
17671 case 'v':
17672 if (Subtarget.hasAltivec() && VT.isVector())
17673 return std::make_pair(x: 0U, y: &PPC::VRRCRegClass);
17674 else if (Subtarget.hasVSX())
17675 // Scalars in Altivec registers only make sense with VSX.
17676 return std::make_pair(x: 0U, y: &PPC::VFRCRegClass);
17677 break;
17678 case 'y': // crrc
17679 return std::make_pair(x: 0U, y: &PPC::CRRCRegClass);
17680 }
17681 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17682 // An individual CR bit.
17683 return std::make_pair(x: 0U, y: &PPC::CRBITRCRegClass);
17684 } else if ((Constraint == "wa" || Constraint == "wd" ||
17685 Constraint == "wf" || Constraint == "wi") &&
17686 Subtarget.hasVSX()) {
17687 // A VSX register for either a scalar (FP) or vector. There is no
17688 // support for single precision scalars on subtargets prior to Power8.
17689 if (VT.isVector())
17690 return std::make_pair(x: 0U, y: &PPC::VSRCRegClass);
17691 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17692 return std::make_pair(x: 0U, y: &PPC::VSSRCRegClass);
17693 return std::make_pair(x: 0U, y: &PPC::VSFRCRegClass);
17694 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17695 if (VT == MVT::f32 && Subtarget.hasP8Vector())
17696 return std::make_pair(x: 0U, y: &PPC::VSSRCRegClass);
17697 else
17698 return std::make_pair(x: 0U, y: &PPC::VSFRCRegClass);
17699 } else if (Constraint == "lr") {
17700 if (VT == MVT::i64)
17701 return std::make_pair(x: 0U, y: &PPC::LR8RCRegClass);
17702 else
17703 return std::make_pair(x: 0U, y: &PPC::LRRCRegClass);
17704 }
17705
17706 // Handle special cases of physical registers that are not properly handled
17707 // by the base class.
17708 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17709 // If we name a VSX register, we can't defer to the base class because it
17710 // will not recognize the correct register (their names will be VSL{0-31}
17711 // and V{0-31} so they won't match). So we match them here.
17712 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17713 int VSNum = atoi(nptr: Constraint.data() + 3);
17714 assert(VSNum >= 0 && VSNum <= 63 &&
17715 "Attempted to access a vsr out of range");
17716 if (VSNum < 32)
17717 return std::make_pair(x: PPC::VSL0 + VSNum, y: &PPC::VSRCRegClass);
17718 return std::make_pair(x: PPC::V0 + VSNum - 32, y: &PPC::VSRCRegClass);
17719 }
17720
17721 // For float registers, we can't defer to the base class as it will match
17722 // the SPILLTOVSRRC class.
17723 if (Constraint.size() > 3 && Constraint[1] == 'f') {
17724 int RegNum = atoi(nptr: Constraint.data() + 2);
17725 if (RegNum > 31 || RegNum < 0)
17726 report_fatal_error(reason: "Invalid floating point register number");
17727 if (VT == MVT::f32 || VT == MVT::i32)
17728 return Subtarget.hasSPE()
17729 ? std::make_pair(x: PPC::R0 + RegNum, y: &PPC::GPRCRegClass)
17730 : std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F4RCRegClass);
17731 if (VT == MVT::f64 || VT == MVT::i64)
17732 return Subtarget.hasSPE()
17733 ? std::make_pair(x: PPC::S0 + RegNum, y: &PPC::SPERCRegClass)
17734 : std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F8RCRegClass);
17735 }
17736 }
17737
17738 std::pair<unsigned, const TargetRegisterClass *> R =
17739 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
17740
17741 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17742 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17743 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17744 // register.
17745 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17746 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17747 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17748 PPC::GPRCRegClass.contains(Reg: R.first))
17749 return std::make_pair(x: TRI->getMatchingSuperReg(Reg: R.first,
17750 SubIdx: PPC::sub_32, RC: &PPC::G8RCRegClass),
17751 y: &PPC::G8RCRegClass);
17752
17753 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17754 if (!R.second && StringRef("{cc}").equals_insensitive(RHS: Constraint)) {
17755 R.first = PPC::CR0;
17756 R.second = &PPC::CRRCRegClass;
17757 }
17758 // FIXME: This warning should ideally be emitted in the front end.
17759 const auto &TM = getTargetMachine();
17760 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17761 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17762 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17763 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17764 errs() << "warning: vector registers 20 to 32 are reserved in the "
17765 "default AIX AltiVec ABI and cannot be used\n";
17766 }
17767
17768 return R;
17769}
17770
17771/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17772/// vector. If it is invalid, don't add anything to Ops.
17773void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
17774 StringRef Constraint,
17775 std::vector<SDValue> &Ops,
17776 SelectionDAG &DAG) const {
17777 SDValue Result;
17778
17779 // Only support length 1 constraints.
17780 if (Constraint.size() > 1)
17781 return;
17782
17783 char Letter = Constraint[0];
17784 switch (Letter) {
17785 default: break;
17786 case 'I':
17787 case 'J':
17788 case 'K':
17789 case 'L':
17790 case 'M':
17791 case 'N':
17792 case 'O':
17793 case 'P': {
17794 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Val&: Op);
17795 if (!CST) return; // Must be an immediate to match.
17796 SDLoc dl(Op);
17797 int64_t Value = CST->getSExtValue();
17798 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17799 // numbers are printed as such.
17800 switch (Letter) {
17801 default: llvm_unreachable("Unknown constraint letter!");
17802 case 'I': // "I" is a signed 16-bit constant.
17803 if (isInt<16>(x: Value))
17804 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17805 break;
17806 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
17807 if (isShiftedUInt<16, 16>(x: Value))
17808 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17809 break;
17810 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
17811 if (isShiftedInt<16, 16>(x: Value))
17812 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17813 break;
17814 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
17815 if (isUInt<16>(x: Value))
17816 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17817 break;
17818 case 'M': // "M" is a constant that is greater than 31.
17819 if (Value > 31)
17820 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17821 break;
17822 case 'N': // "N" is a positive constant that is an exact power of two.
17823 if (Value > 0 && isPowerOf2_64(Value))
17824 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17825 break;
17826 case 'O': // "O" is the constant zero.
17827 if (Value == 0)
17828 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17829 break;
17830 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
17831 if (isInt<16>(x: -Value))
17832 Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17833 break;
17834 }
17835 break;
17836 }
17837 }
17838
17839 if (Result.getNode()) {
17840 Ops.push_back(x: Result);
17841 return;
17842 }
17843
17844 // Handle standard constraint letters.
17845 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17846}
17847
17848void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
17849 SmallVectorImpl<SDValue> &Ops,
17850 SelectionDAG &DAG) const {
17851 if (I.getNumOperands() <= 1)
17852 return;
17853 if (!isa<ConstantSDNode>(Val: Ops[1].getNode()))
17854 return;
17855 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17856 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17857 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17858 return;
17859
17860 if (MDNode *MDN = I.getMetadata(KindID: LLVMContext::MD_annotation))
17861 Ops.push_back(Elt: DAG.getMDNode(MD: MDN));
17862}
17863
17864// isLegalAddressingMode - Return true if the addressing mode represented
17865// by AM is legal for this target, for a load/store of the specified type.
17866bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
17867 const AddrMode &AM, Type *Ty,
17868 unsigned AS,
17869 Instruction *I) const {
17870 // Vector type r+i form is supported since power9 as DQ form. We don't check
17871 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17872 // imm form is preferred and the offset can be adjusted to use imm form later
17873 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17874 // max offset to check legal addressing mode, we should be a little aggressive
17875 // to contain other offsets for that LSRUse.
17876 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17877 return false;
17878
17879 // PPC allows a sign-extended 16-bit immediate field.
17880 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17881 return false;
17882
17883 // No global is ever allowed as a base.
17884 if (AM.BaseGV)
17885 return false;
17886
17887 // PPC only support r+r,
17888 switch (AM.Scale) {
17889 case 0: // "r+i" or just "i", depending on HasBaseReg.
17890 break;
17891 case 1:
17892 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17893 return false;
17894 // Otherwise we have r+r or r+i.
17895 break;
17896 case 2:
17897 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17898 return false;
17899 // Allow 2*r as r+r.
17900 break;
17901 default:
17902 // No other scales are supported.
17903 return false;
17904 }
17905
17906 return true;
17907}
17908
17909SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17910 SelectionDAG &DAG) const {
17911 MachineFunction &MF = DAG.getMachineFunction();
17912 MachineFrameInfo &MFI = MF.getFrameInfo();
17913 MFI.setReturnAddressIsTaken(true);
17914
17915 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
17916 return SDValue();
17917
17918 SDLoc dl(Op);
17919 unsigned Depth = Op.getConstantOperandVal(i: 0);
17920
17921 // Make sure the function does not optimize away the store of the RA to
17922 // the stack.
17923 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17924 FuncInfo->setLRStoreRequired();
17925 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
17926
17927 if (Depth > 0) {
17928 // The link register (return address) is saved in the caller's frame
17929 // not the callee's stack frame. So we must get the caller's frame
17930 // address and load the return address at the LR offset from there.
17931 SDValue FrameAddr =
17932 DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
17933 Ptr: LowerFRAMEADDR(Op, DAG), PtrInfo: MachinePointerInfo());
17934 SDValue Offset =
17935 DAG.getConstant(Val: Subtarget.getFrameLowering()->getReturnSaveOffset(), DL: dl,
17936 VT: Subtarget.getScalarIntVT());
17937 return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(),
17938 Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FrameAddr, N2: Offset),
17939 PtrInfo: MachinePointerInfo());
17940 }
17941
17942 // Just load the return address off the stack.
17943 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17944 return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: RetAddrFI,
17945 PtrInfo: MachinePointerInfo());
17946}
17947
17948SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17949 SelectionDAG &DAG) const {
17950 SDLoc dl(Op);
17951 unsigned Depth = Op.getConstantOperandVal(i: 0);
17952
17953 MachineFunction &MF = DAG.getMachineFunction();
17954 MachineFrameInfo &MFI = MF.getFrameInfo();
17955 MFI.setFrameAddressIsTaken(true);
17956
17957 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
17958 bool isPPC64 = PtrVT == MVT::i64;
17959
17960 // Naked functions never have a frame pointer, and so we use r1. For all
17961 // other functions, this decision must be delayed until during PEI.
17962 unsigned FrameReg;
17963 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
17964 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17965 else
17966 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17967
17968 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: FrameReg,
17969 VT: PtrVT);
17970 while (Depth--)
17971 FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
17972 Ptr: FrameAddr, PtrInfo: MachinePointerInfo());
17973 return FrameAddr;
17974}
17975
17976#define GET_REGISTER_MATCHER
17977#include "PPCGenAsmMatcher.inc"
17978
17979Register PPCTargetLowering::getRegisterByName(const char *RegName, LLT VT,
17980 const MachineFunction &MF) const {
17981 bool IsPPC64 = Subtarget.isPPC64();
17982
17983 bool Is64Bit = IsPPC64 && VT == LLT::scalar(SizeInBits: 64);
17984 if (!Is64Bit && VT != LLT::scalar(SizeInBits: 32))
17985 report_fatal_error(reason: "Invalid register global variable type");
17986
17987 Register Reg = MatchRegisterName(Name: RegName);
17988 if (!Reg)
17989 return Reg;
17990
17991 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
17992 // Need followup investigation as to why.
17993 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
17994 report_fatal_error(reason: Twine("Trying to reserve an invalid register \"" +
17995 StringRef(RegName) + "\"."));
17996
17997 // Convert GPR to GP8R register for 64bit.
17998 if (Is64Bit && StringRef(RegName).starts_with_insensitive(Prefix: "r"))
17999 Reg = Reg.id() - PPC::R0 + PPC::X0;
18000
18001 return Reg;
18002}
18003
18004bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
18005 // 32-bit SVR4 ABI access everything as got-indirect.
18006 if (Subtarget.is32BitELFABI())
18007 return true;
18008
18009 // AIX accesses everything indirectly through the TOC, which is similar to
18010 // the GOT.
18011 if (Subtarget.isAIXABI())
18012 return true;
18013
18014 CodeModel::Model CModel = getTargetMachine().getCodeModel();
18015 // If it is small or large code model, module locals are accessed
18016 // indirectly by loading their address from .toc/.got.
18017 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18018 return true;
18019
18020 // JumpTable and BlockAddress are accessed as got-indirect.
18021 if (isa<JumpTableSDNode>(Val: GA) || isa<BlockAddressSDNode>(Val: GA))
18022 return true;
18023
18024 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: GA))
18025 return Subtarget.isGVIndirectSymbol(GV: G->getGlobal());
18026
18027 return false;
18028}
18029
18030bool
18031PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
18032 // The PowerPC target isn't yet aware of offsets.
18033 return false;
18034}
18035
18036bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
18037 const CallInst &I,
18038 MachineFunction &MF,
18039 unsigned Intrinsic) const {
18040 switch (Intrinsic) {
18041 case Intrinsic::ppc_atomicrmw_xchg_i128:
18042 case Intrinsic::ppc_atomicrmw_add_i128:
18043 case Intrinsic::ppc_atomicrmw_sub_i128:
18044 case Intrinsic::ppc_atomicrmw_nand_i128:
18045 case Intrinsic::ppc_atomicrmw_and_i128:
18046 case Intrinsic::ppc_atomicrmw_or_i128:
18047 case Intrinsic::ppc_atomicrmw_xor_i128:
18048 case Intrinsic::ppc_cmpxchg_i128:
18049 Info.opc = ISD::INTRINSIC_W_CHAIN;
18050 Info.memVT = MVT::i128;
18051 Info.ptrVal = I.getArgOperand(i: 0);
18052 Info.offset = 0;
18053 Info.align = Align(16);
18054 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
18055 MachineMemOperand::MOVolatile;
18056 return true;
18057 case Intrinsic::ppc_atomic_load_i128:
18058 Info.opc = ISD::INTRINSIC_W_CHAIN;
18059 Info.memVT = MVT::i128;
18060 Info.ptrVal = I.getArgOperand(i: 0);
18061 Info.offset = 0;
18062 Info.align = Align(16);
18063 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
18064 return true;
18065 case Intrinsic::ppc_atomic_store_i128:
18066 Info.opc = ISD::INTRINSIC_VOID;
18067 Info.memVT = MVT::i128;
18068 Info.ptrVal = I.getArgOperand(i: 2);
18069 Info.offset = 0;
18070 Info.align = Align(16);
18071 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
18072 return true;
18073 case Intrinsic::ppc_altivec_lvx:
18074 case Intrinsic::ppc_altivec_lvxl:
18075 case Intrinsic::ppc_altivec_lvebx:
18076 case Intrinsic::ppc_altivec_lvehx:
18077 case Intrinsic::ppc_altivec_lvewx:
18078 case Intrinsic::ppc_vsx_lxvd2x:
18079 case Intrinsic::ppc_vsx_lxvw4x:
18080 case Intrinsic::ppc_vsx_lxvd2x_be:
18081 case Intrinsic::ppc_vsx_lxvw4x_be:
18082 case Intrinsic::ppc_vsx_lxvl:
18083 case Intrinsic::ppc_vsx_lxvll: {
18084 EVT VT;
18085 switch (Intrinsic) {
18086 case Intrinsic::ppc_altivec_lvebx:
18087 VT = MVT::i8;
18088 break;
18089 case Intrinsic::ppc_altivec_lvehx:
18090 VT = MVT::i16;
18091 break;
18092 case Intrinsic::ppc_altivec_lvewx:
18093 VT = MVT::i32;
18094 break;
18095 case Intrinsic::ppc_vsx_lxvd2x:
18096 case Intrinsic::ppc_vsx_lxvd2x_be:
18097 VT = MVT::v2f64;
18098 break;
18099 default:
18100 VT = MVT::v4i32;
18101 break;
18102 }
18103
18104 Info.opc = ISD::INTRINSIC_W_CHAIN;
18105 Info.memVT = VT;
18106 Info.ptrVal = I.getArgOperand(i: 0);
18107 Info.offset = -VT.getStoreSize()+1;
18108 Info.size = 2*VT.getStoreSize()-1;
18109 Info.align = Align(1);
18110 Info.flags = MachineMemOperand::MOLoad;
18111 return true;
18112 }
18113 case Intrinsic::ppc_altivec_stvx:
18114 case Intrinsic::ppc_altivec_stvxl:
18115 case Intrinsic::ppc_altivec_stvebx:
18116 case Intrinsic::ppc_altivec_stvehx:
18117 case Intrinsic::ppc_altivec_stvewx:
18118 case Intrinsic::ppc_vsx_stxvd2x:
18119 case Intrinsic::ppc_vsx_stxvw4x:
18120 case Intrinsic::ppc_vsx_stxvd2x_be:
18121 case Intrinsic::ppc_vsx_stxvw4x_be:
18122 case Intrinsic::ppc_vsx_stxvl:
18123 case Intrinsic::ppc_vsx_stxvll: {
18124 EVT VT;
18125 switch (Intrinsic) {
18126 case Intrinsic::ppc_altivec_stvebx:
18127 VT = MVT::i8;
18128 break;
18129 case Intrinsic::ppc_altivec_stvehx:
18130 VT = MVT::i16;
18131 break;
18132 case Intrinsic::ppc_altivec_stvewx:
18133 VT = MVT::i32;
18134 break;
18135 case Intrinsic::ppc_vsx_stxvd2x:
18136 case Intrinsic::ppc_vsx_stxvd2x_be:
18137 VT = MVT::v2f64;
18138 break;
18139 default:
18140 VT = MVT::v4i32;
18141 break;
18142 }
18143
18144 Info.opc = ISD::INTRINSIC_VOID;
18145 Info.memVT = VT;
18146 Info.ptrVal = I.getArgOperand(i: 1);
18147 Info.offset = -VT.getStoreSize()+1;
18148 Info.size = 2*VT.getStoreSize()-1;
18149 Info.align = Align(1);
18150 Info.flags = MachineMemOperand::MOStore;
18151 return true;
18152 }
18153 case Intrinsic::ppc_stdcx:
18154 case Intrinsic::ppc_stwcx:
18155 case Intrinsic::ppc_sthcx:
18156 case Intrinsic::ppc_stbcx: {
18157 EVT VT;
18158 auto Alignment = Align(8);
18159 switch (Intrinsic) {
18160 case Intrinsic::ppc_stdcx:
18161 VT = MVT::i64;
18162 break;
18163 case Intrinsic::ppc_stwcx:
18164 VT = MVT::i32;
18165 Alignment = Align(4);
18166 break;
18167 case Intrinsic::ppc_sthcx:
18168 VT = MVT::i16;
18169 Alignment = Align(2);
18170 break;
18171 case Intrinsic::ppc_stbcx:
18172 VT = MVT::i8;
18173 Alignment = Align(1);
18174 break;
18175 }
18176 Info.opc = ISD::INTRINSIC_W_CHAIN;
18177 Info.memVT = VT;
18178 Info.ptrVal = I.getArgOperand(i: 0);
18179 Info.offset = 0;
18180 Info.align = Alignment;
18181 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
18182 return true;
18183 }
18184 default:
18185 break;
18186 }
18187
18188 return false;
18189}
18190
18191/// It returns EVT::Other if the type should be determined using generic
18192/// target-independent logic.
18193EVT PPCTargetLowering::getOptimalMemOpType(
18194 const MemOp &Op, const AttributeList &FuncAttributes) const {
18195 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18196 // We should use Altivec/VSX loads and stores when available. For unaligned
18197 // addresses, unaligned VSX loads are only fast starting with the P8.
18198 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18199 if (Op.isMemset() && Subtarget.hasVSX()) {
18200 uint64_t TailSize = Op.size() % 16;
18201 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18202 // element if vector element type matches tail store. For tail size
18203 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18204 if (TailSize > 2 && TailSize <= 4) {
18205 return MVT::v8i16;
18206 }
18207 return MVT::v4i32;
18208 }
18209 if (Op.isAligned(AlignCheck: Align(16)) || Subtarget.hasP8Vector())
18210 return MVT::v4i32;
18211 }
18212 }
18213
18214 if (Subtarget.isPPC64()) {
18215 return MVT::i64;
18216 }
18217
18218 return MVT::i32;
18219}
18220
18221/// Returns true if it is beneficial to convert a load of a constant
18222/// to just the constant itself.
18223bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
18224 Type *Ty) const {
18225 assert(Ty->isIntegerTy());
18226
18227 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18228 return !(BitSize == 0 || BitSize > 64);
18229}
18230
18231bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
18232 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18233 return false;
18234 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18235 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18236 return NumBits1 == 64 && NumBits2 == 32;
18237}
18238
18239bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
18240 if (!VT1.isInteger() || !VT2.isInteger())
18241 return false;
18242 unsigned NumBits1 = VT1.getSizeInBits();
18243 unsigned NumBits2 = VT2.getSizeInBits();
18244 return NumBits1 == 64 && NumBits2 == 32;
18245}
18246
18247bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
18248 // Generally speaking, zexts are not free, but they are free when they can be
18249 // folded with other operations.
18250 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18251 EVT MemVT = LD->getMemoryVT();
18252 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18253 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18254 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18255 LD->getExtensionType() == ISD::ZEXTLOAD))
18256 return true;
18257 }
18258
18259 // FIXME: Add other cases...
18260 // - 32-bit shifts with a zext to i64
18261 // - zext after ctlz, bswap, etc.
18262 // - zext after and by a constant mask
18263
18264 return TargetLowering::isZExtFree(Val, VT2);
18265}
18266
18267bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18268 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18269 "invalid fpext types");
18270 // Extending to float128 is not free.
18271 if (DestVT == MVT::f128)
18272 return false;
18273 return true;
18274}
18275
18276bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
18277 return isInt<16>(x: Imm) || isUInt<16>(x: Imm);
18278}
18279
18280bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
18281 return isInt<16>(x: Imm) || isUInt<16>(x: Imm);
18282}
18283
18284bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
18285 MachineMemOperand::Flags,
18286 unsigned *Fast) const {
18287 if (DisablePPCUnaligned)
18288 return false;
18289
18290 // PowerPC supports unaligned memory access for simple non-vector types.
18291 // Although accessing unaligned addresses is not as efficient as accessing
18292 // aligned addresses, it is generally more efficient than manual expansion,
18293 // and generally only traps for software emulation when crossing page
18294 // boundaries.
18295
18296 if (!VT.isSimple())
18297 return false;
18298
18299 if (VT.isFloatingPoint() && !VT.isVector() &&
18300 !Subtarget.allowsUnalignedFPAccess())
18301 return false;
18302
18303 if (VT.getSimpleVT().isVector()) {
18304 if (Subtarget.hasVSX()) {
18305 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18306 VT != MVT::v4f32 && VT != MVT::v4i32)
18307 return false;
18308 } else {
18309 return false;
18310 }
18311 }
18312
18313 if (VT == MVT::ppcf128)
18314 return false;
18315
18316 if (Fast)
18317 *Fast = 1;
18318
18319 return true;
18320}
18321
18322bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
18323 SDValue C) const {
18324 // Check integral scalar types.
18325 if (!VT.isScalarInteger())
18326 return false;
18327 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
18328 if (!ConstNode->getAPIntValue().isSignedIntN(N: 64))
18329 return false;
18330 // This transformation will generate >= 2 operations. But the following
18331 // cases will generate <= 2 instructions during ISEL. So exclude them.
18332 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18333 // HW instruction, ie. MULLI
18334 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18335 // instruction is needed than case 1, ie. MULLI and RLDICR
18336 int64_t Imm = ConstNode->getSExtValue();
18337 unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
18338 Imm >>= Shift;
18339 if (isInt<16>(x: Imm))
18340 return false;
18341 uint64_t UImm = static_cast<uint64_t>(Imm);
18342 if (isPowerOf2_64(Value: UImm + 1) || isPowerOf2_64(Value: UImm - 1) ||
18343 isPowerOf2_64(Value: 1 - UImm) || isPowerOf2_64(Value: -1 - UImm))
18344 return true;
18345 }
18346 return false;
18347}
18348
18349bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
18350 EVT VT) const {
18351 return isFMAFasterThanFMulAndFAdd(
18352 F: MF.getFunction(), Ty: VT.getTypeForEVT(Context&: MF.getFunction().getContext()));
18353}
18354
18355bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
18356 Type *Ty) const {
18357 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18358 return false;
18359 switch (Ty->getScalarType()->getTypeID()) {
18360 case Type::FloatTyID:
18361 case Type::DoubleTyID:
18362 return true;
18363 case Type::FP128TyID:
18364 return Subtarget.hasP9Vector();
18365 default:
18366 return false;
18367 }
18368}
18369
18370// FIXME: add more patterns which are not profitable to hoist.
18371bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
18372 if (!I->hasOneUse())
18373 return true;
18374
18375 Instruction *User = I->user_back();
18376 assert(User && "A single use instruction with no uses.");
18377
18378 switch (I->getOpcode()) {
18379 case Instruction::FMul: {
18380 // Don't break FMA, PowerPC prefers FMA.
18381 if (User->getOpcode() != Instruction::FSub &&
18382 User->getOpcode() != Instruction::FAdd)
18383 return true;
18384
18385 const TargetOptions &Options = getTargetMachine().Options;
18386 const Function *F = I->getFunction();
18387 const DataLayout &DL = F->getDataLayout();
18388 Type *Ty = User->getOperand(i: 0)->getType();
18389
18390 return !(
18391 isFMAFasterThanFMulAndFAdd(F: *F, Ty) &&
18392 isOperationLegalOrCustom(Op: ISD::FMA, VT: getValueType(DL, Ty)) &&
18393 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
18394 }
18395 case Instruction::Load: {
18396 // Don't break "store (load float*)" pattern, this pattern will be combined
18397 // to "store (load int32)" in later InstCombine pass. See function
18398 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18399 // cycles than loading a 32 bit integer.
18400 LoadInst *LI = cast<LoadInst>(Val: I);
18401 // For the loads that combineLoadToOperationType does nothing, like
18402 // ordered load, it should be profitable to hoist them.
18403 // For swifterror load, it can only be used for pointer to pointer type, so
18404 // later type check should get rid of this case.
18405 if (!LI->isUnordered())
18406 return true;
18407
18408 if (User->getOpcode() != Instruction::Store)
18409 return true;
18410
18411 if (I->getType()->getTypeID() != Type::FloatTyID)
18412 return true;
18413
18414 return false;
18415 }
18416 default:
18417 return true;
18418 }
18419 return true;
18420}
18421
18422const MCPhysReg *
18423PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
18424 // LR is a callee-save register, but we must treat it as clobbered by any call
18425 // site. Hence we include LR in the scratch registers, which are in turn added
18426 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18427 // to CTR, which is used by any indirect call.
18428 static const MCPhysReg ScratchRegs[] = {
18429 PPC::X12, PPC::LR8, PPC::CTR8, 0
18430 };
18431
18432 return ScratchRegs;
18433}
18434
18435Register PPCTargetLowering::getExceptionPointerRegister(
18436 const Constant *PersonalityFn) const {
18437 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18438}
18439
18440Register PPCTargetLowering::getExceptionSelectorRegister(
18441 const Constant *PersonalityFn) const {
18442 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18443}
18444
18445bool
18446PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
18447 EVT VT , unsigned DefinedValues) const {
18448 if (VT == MVT::v2i64)
18449 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18450
18451 if (Subtarget.hasVSX())
18452 return true;
18453
18454 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
18455}
18456
18457Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
18458 if (DisableILPPref || Subtarget.enableMachineScheduler())
18459 return TargetLowering::getSchedulingPreference(N);
18460
18461 return Sched::ILP;
18462}
18463
18464// Create a fast isel object.
18465FastISel *
18466PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
18467 const TargetLibraryInfo *LibInfo) const {
18468 return PPC::createFastISel(FuncInfo, LibInfo);
18469}
18470
18471// 'Inverted' means the FMA opcode after negating one multiplicand.
18472// For example, (fma -a b c) = (fnmsub a b c)
18473static unsigned invertFMAOpcode(unsigned Opc) {
18474 switch (Opc) {
18475 default:
18476 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18477 case ISD::FMA:
18478 return PPCISD::FNMSUB;
18479 case PPCISD::FNMSUB:
18480 return ISD::FMA;
18481 }
18482}
18483
18484SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
18485 bool LegalOps, bool OptForSize,
18486 NegatibleCost &Cost,
18487 unsigned Depth) const {
18488 if (Depth > SelectionDAG::MaxRecursionDepth)
18489 return SDValue();
18490
18491 unsigned Opc = Op.getOpcode();
18492 EVT VT = Op.getValueType();
18493 SDNodeFlags Flags = Op.getNode()->getFlags();
18494
18495 switch (Opc) {
18496 case PPCISD::FNMSUB:
18497 if (!Op.hasOneUse() || !isTypeLegal(VT))
18498 break;
18499
18500 const TargetOptions &Options = getTargetMachine().Options;
18501 SDValue N0 = Op.getOperand(i: 0);
18502 SDValue N1 = Op.getOperand(i: 1);
18503 SDValue N2 = Op.getOperand(i: 2);
18504 SDLoc Loc(Op);
18505
18506 NegatibleCost N2Cost = NegatibleCost::Expensive;
18507 SDValue NegN2 =
18508 getNegatedExpression(Op: N2, DAG, LegalOps, OptForSize, Cost&: N2Cost, Depth: Depth + 1);
18509
18510 if (!NegN2)
18511 return SDValue();
18512
18513 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18514 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18515 // These transformations may change sign of zeroes. For example,
18516 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18517 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18518 // Try and choose the cheaper one to negate.
18519 NegatibleCost N0Cost = NegatibleCost::Expensive;
18520 SDValue NegN0 = getNegatedExpression(Op: N0, DAG, LegalOps, OptForSize,
18521 Cost&: N0Cost, Depth: Depth + 1);
18522
18523 NegatibleCost N1Cost = NegatibleCost::Expensive;
18524 SDValue NegN1 = getNegatedExpression(Op: N1, DAG, LegalOps, OptForSize,
18525 Cost&: N1Cost, Depth: Depth + 1);
18526
18527 if (NegN0 && N0Cost <= N1Cost) {
18528 Cost = std::min(a: N0Cost, b: N2Cost);
18529 return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: NegN0, N2: N1, N3: NegN2, Flags);
18530 } else if (NegN1) {
18531 Cost = std::min(a: N1Cost, b: N2Cost);
18532 return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: N0, N2: NegN1, N3: NegN2, Flags);
18533 }
18534 }
18535
18536 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18537 if (isOperationLegal(Op: ISD::FMA, VT)) {
18538 Cost = N2Cost;
18539 return DAG.getNode(Opcode: ISD::FMA, DL: Loc, VT, N1: N0, N2: N1, N3: NegN2, Flags);
18540 }
18541
18542 break;
18543 }
18544
18545 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18546 Cost, Depth);
18547}
18548
18549// Override to enable LOAD_STACK_GUARD lowering on Linux.
18550bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const {
18551 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18552 return true;
18553 return TargetLowering::useLoadStackGuardNode(M);
18554}
18555
18556// Override to disable global variable loading on Linux and insert AIX canary
18557// word declaration.
18558void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
18559 if (Subtarget.isAIXABI()) {
18560 M.getOrInsertGlobal(Name: AIXSSPCanaryWordName,
18561 Ty: PointerType::getUnqual(C&: M.getContext()));
18562 return;
18563 }
18564 if (!Subtarget.isTargetLinux())
18565 return TargetLowering::insertSSPDeclarations(M);
18566}
18567
18568Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
18569 if (Subtarget.isAIXABI())
18570 return M.getGlobalVariable(Name: AIXSSPCanaryWordName);
18571 return TargetLowering::getSDagStackGuard(M);
18572}
18573
18574bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
18575 bool ForCodeSize) const {
18576 if (!VT.isSimple() || !Subtarget.hasVSX())
18577 return false;
18578
18579 switch(VT.getSimpleVT().SimpleTy) {
18580 default:
18581 // For FP types that are currently not supported by PPC backend, return
18582 // false. Examples: f16, f80.
18583 return false;
18584 case MVT::f32:
18585 case MVT::f64: {
18586 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18587 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18588 return true;
18589 }
18590 bool IsExact;
18591 APSInt IntResult(16, false);
18592 // The rounding mode doesn't really matter because we only care about floats
18593 // that can be converted to integers exactly.
18594 Imm.convertToInteger(Result&: IntResult, RM: APFloat::rmTowardZero, IsExact: &IsExact);
18595 // For exact values in the range [-16, 15] we can materialize the float.
18596 if (IsExact && IntResult <= 15 && IntResult >= -16)
18597 return true;
18598 return Imm.isZero();
18599 }
18600 case MVT::ppcf128:
18601 return Imm.isPosZero();
18602 }
18603}
18604
18605// For vector shift operation op, fold
18606// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18607static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
18608 SelectionDAG &DAG) {
18609 SDValue N0 = N->getOperand(Num: 0);
18610 SDValue N1 = N->getOperand(Num: 1);
18611 EVT VT = N0.getValueType();
18612 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18613 unsigned Opcode = N->getOpcode();
18614 unsigned TargetOpcode;
18615
18616 switch (Opcode) {
18617 default:
18618 llvm_unreachable("Unexpected shift operation");
18619 case ISD::SHL:
18620 TargetOpcode = PPCISD::SHL;
18621 break;
18622 case ISD::SRL:
18623 TargetOpcode = PPCISD::SRL;
18624 break;
18625 case ISD::SRA:
18626 TargetOpcode = PPCISD::SRA;
18627 break;
18628 }
18629
18630 if (VT.isVector() && TLI.isOperationLegal(Op: Opcode, VT) &&
18631 N1->getOpcode() == ISD::AND)
18632 if (ConstantSDNode *Mask = isConstOrConstSplat(N: N1->getOperand(Num: 1)))
18633 if (Mask->getZExtValue() == OpSizeInBits - 1)
18634 return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc(N), VT, N1: N0, N2: N1->getOperand(Num: 0));
18635
18636 return SDValue();
18637}
18638
18639SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
18640 DAGCombinerInfo &DCI) const {
18641 EVT VT = N->getValueType(ResNo: 0);
18642 assert(VT.isVector() && "Vector type expected.");
18643
18644 unsigned Opc = N->getOpcode();
18645 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
18646 "Unexpected opcode.");
18647
18648 if (!isOperationLegal(Op: Opc, VT))
18649 return SDValue();
18650
18651 EVT EltTy = VT.getScalarType();
18652 unsigned EltBits = EltTy.getSizeInBits();
18653 if (EltTy != MVT::i64 && EltTy != MVT::i32)
18654 return SDValue();
18655
18656 SDValue N1 = N->getOperand(Num: 1);
18657 uint64_t SplatBits = 0;
18658 bool AddSplatCase = false;
18659 unsigned OpcN1 = N1.getOpcode();
18660 if (OpcN1 == PPCISD::VADD_SPLAT &&
18661 N1.getConstantOperandVal(i: 1) == VT.getVectorNumElements()) {
18662 AddSplatCase = true;
18663 SplatBits = N1.getConstantOperandVal(i: 0);
18664 }
18665
18666 if (!AddSplatCase) {
18667 if (OpcN1 != ISD::BUILD_VECTOR)
18668 return SDValue();
18669
18670 unsigned SplatBitSize;
18671 bool HasAnyUndefs;
18672 APInt APSplatBits, APSplatUndef;
18673 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Val&: N1);
18674 bool BVNIsConstantSplat =
18675 BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
18676 HasAnyUndefs, MinSplatBits: 0, isBigEndian: !Subtarget.isLittleEndian());
18677 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
18678 return SDValue();
18679 SplatBits = APSplatBits.getZExtValue();
18680 }
18681
18682 SDLoc DL(N);
18683 SDValue N0 = N->getOperand(Num: 0);
18684 // PPC vector shifts by word/double look at only the low 5/6 bits of the
18685 // shift vector, which means the max value is 31/63. A shift vector of all
18686 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
18687 // -16 to 15 range.
18688 if (SplatBits == (EltBits - 1)) {
18689 unsigned NewOpc;
18690 switch (Opc) {
18691 case ISD::SHL:
18692 NewOpc = PPCISD::SHL;
18693 break;
18694 case ISD::SRL:
18695 NewOpc = PPCISD::SRL;
18696 break;
18697 case ISD::SRA:
18698 NewOpc = PPCISD::SRA;
18699 break;
18700 }
18701 SDValue SplatOnes = getCanonicalConstSplat(Val: 255, SplatSize: 1, VT, DAG&: DCI.DAG, dl: DL);
18702 return DCI.DAG.getNode(Opcode: NewOpc, DL, VT, N1: N0, N2: SplatOnes);
18703 }
18704
18705 if (Opc != ISD::SHL || !isOperationLegal(Op: ISD::ADD, VT))
18706 return SDValue();
18707
18708 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
18709 // before the BUILD_VECTOR is replaced by a load.
18710 if (EltTy != MVT::i64 || SplatBits != 1)
18711 return SDValue();
18712
18713 return DCI.DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT, N1: N0, N2: N0);
18714}
18715
18716SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18717 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
18718 return Value;
18719
18720 if (N->getValueType(ResNo: 0).isVector())
18721 return combineVectorShift(N, DCI);
18722
18723 SDValue N0 = N->getOperand(Num: 0);
18724 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
18725 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18726 N0.getOpcode() != ISD::SIGN_EXTEND ||
18727 N0.getOperand(i: 0).getValueType() != MVT::i32 || CN1 == nullptr ||
18728 N->getValueType(ResNo: 0) != MVT::i64)
18729 return SDValue();
18730
18731 // We can't save an operation here if the value is already extended, and
18732 // the existing shift is easier to combine.
18733 SDValue ExtsSrc = N0.getOperand(i: 0);
18734 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18735 ExtsSrc.getOperand(i: 0).getOpcode() == ISD::AssertSext)
18736 return SDValue();
18737
18738 SDLoc DL(N0);
18739 SDValue ShiftBy = SDValue(CN1, 0);
18740 // We want the shift amount to be i32 on the extswli, but the shift could
18741 // have an i64.
18742 if (ShiftBy.getValueType() == MVT::i64)
18743 ShiftBy = DCI.DAG.getConstant(Val: CN1->getZExtValue(), DL, VT: MVT::i32);
18744
18745 return DCI.DAG.getNode(Opcode: PPCISD::EXTSWSLI, DL, VT: MVT::i64, N1: N0->getOperand(Num: 0),
18746 N2: ShiftBy);
18747}
18748
18749SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18750 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
18751 return Value;
18752
18753 if (N->getValueType(ResNo: 0).isVector())
18754 return combineVectorShift(N, DCI);
18755
18756 return SDValue();
18757}
18758
18759SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18760 if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
18761 return Value;
18762
18763 if (N->getValueType(ResNo: 0).isVector())
18764 return combineVectorShift(N, DCI);
18765
18766 return SDValue();
18767}
18768
18769// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18770// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18771// When C is zero, the equation (addi Z, -C) can be simplified to Z
18772// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18773static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
18774 const PPCSubtarget &Subtarget) {
18775 if (!Subtarget.isPPC64())
18776 return SDValue();
18777
18778 SDValue LHS = N->getOperand(Num: 0);
18779 SDValue RHS = N->getOperand(Num: 1);
18780
18781 auto isZextOfCompareWithConstant = [](SDValue Op) {
18782 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18783 Op.getValueType() != MVT::i64)
18784 return false;
18785
18786 SDValue Cmp = Op.getOperand(i: 0);
18787 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18788 Cmp.getOperand(i: 0).getValueType() != MVT::i64)
18789 return false;
18790
18791 if (auto *Constant = dyn_cast<ConstantSDNode>(Val: Cmp.getOperand(i: 1))) {
18792 int64_t NegConstant = 0 - Constant->getSExtValue();
18793 // Due to the limitations of the addi instruction,
18794 // -C is required to be [-32768, 32767].
18795 return isInt<16>(x: NegConstant);
18796 }
18797
18798 return false;
18799 };
18800
18801 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18802 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18803
18804 // If there is a pattern, canonicalize a zext operand to the RHS.
18805 if (LHSHasPattern && !RHSHasPattern)
18806 std::swap(a&: LHS, b&: RHS);
18807 else if (!LHSHasPattern && !RHSHasPattern)
18808 return SDValue();
18809
18810 SDLoc DL(N);
18811 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
18812 SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: CarryType);
18813 SDValue Cmp = RHS.getOperand(i: 0);
18814 SDValue Z = Cmp.getOperand(i: 0);
18815 auto *Constant = cast<ConstantSDNode>(Val: Cmp.getOperand(i: 1));
18816 int64_t NegConstant = 0 - Constant->getSExtValue();
18817
18818 switch(cast<CondCodeSDNode>(Val: Cmp.getOperand(i: 2))->get()) {
18819 default: break;
18820 case ISD::SETNE: {
18821 // when C == 0
18822 // --> addze X, (addic Z, -1).carry
18823 // /
18824 // add X, (zext(setne Z, C))--
18825 // \ when -32768 <= -C <= 32767 && C != 0
18826 // --> addze X, (addic (addi Z, -C), -1).carry
18827 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
18828 N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
18829 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18830 SDValue Addc =
18831 DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
18832 N1: AddOrZ, N2: DAG.getAllOnesConstant(DL, VT: MVT::i64),
18833 N3: DAG.getConstant(Val: 0, DL, VT: CarryType));
18834 return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
18835 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i64),
18836 N3: SDValue(Addc.getNode(), 1));
18837 }
18838 case ISD::SETEQ: {
18839 // when C == 0
18840 // --> addze X, (subfic Z, 0).carry
18841 // /
18842 // add X, (zext(sete Z, C))--
18843 // \ when -32768 <= -C <= 32767 && C != 0
18844 // --> addze X, (subfic (addi Z, -C), 0).carry
18845 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
18846 N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
18847 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18848 SDValue Subc =
18849 DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
18850 N1: DAG.getConstant(Val: 0, DL, VT: MVT::i64), N2: AddOrZ,
18851 N3: DAG.getConstant(Val: 0, DL, VT: CarryType));
18852 SDValue Invert = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Subc.getValue(R: 1),
18853 N2: DAG.getConstant(Val: 1UL, DL, VT: CarryType));
18854 return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
18855 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i64), N3: Invert);
18856 }
18857 }
18858
18859 return SDValue();
18860}
18861
18862// Transform
18863// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18864// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18865// In this case both C1 and C2 must be known constants.
18866// C1+C2 must fit into a 34 bit signed integer.
18867static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
18868 const PPCSubtarget &Subtarget) {
18869 if (!Subtarget.isUsingPCRelativeCalls())
18870 return SDValue();
18871
18872 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18873 // If we find that node try to cast the Global Address and the Constant.
18874 SDValue LHS = N->getOperand(Num: 0);
18875 SDValue RHS = N->getOperand(Num: 1);
18876
18877 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18878 std::swap(a&: LHS, b&: RHS);
18879
18880 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18881 return SDValue();
18882
18883 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18884 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(Val: LHS.getOperand(i: 0));
18885 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(Val&: RHS);
18886
18887 // Check that both casts succeeded.
18888 if (!GSDN || !ConstNode)
18889 return SDValue();
18890
18891 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18892 SDLoc DL(GSDN);
18893
18894 // The signed int offset needs to fit in 34 bits.
18895 if (!isInt<34>(x: NewOffset))
18896 return SDValue();
18897
18898 // The new global address is a copy of the old global address except
18899 // that it has the updated Offset.
18900 SDValue GA =
18901 DAG.getTargetGlobalAddress(GV: GSDN->getGlobal(), DL, VT: GSDN->getValueType(ResNo: 0),
18902 offset: NewOffset, TargetFlags: GSDN->getTargetFlags());
18903 SDValue MatPCRel =
18904 DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: GSDN->getValueType(ResNo: 0), Operand: GA);
18905 return MatPCRel;
18906}
18907
18908SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18909 if (auto Value = combineADDToADDZE(N, DAG&: DCI.DAG, Subtarget))
18910 return Value;
18911
18912 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DAG&: DCI.DAG, Subtarget))
18913 return Value;
18914
18915 return SDValue();
18916}
18917
18918// Detect TRUNCATE operations on bitcasts of float128 values.
18919// What we are looking for here is the situtation where we extract a subset
18920// of bits from a 128 bit float.
18921// This can be of two forms:
18922// 1) BITCAST of f128 feeding TRUNCATE
18923// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18924// The reason this is required is because we do not have a legal i128 type
18925// and so we want to prevent having to store the f128 and then reload part
18926// of it.
18927SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18928 DAGCombinerInfo &DCI) const {
18929 // If we are using CRBits then try that first.
18930 if (Subtarget.useCRBits()) {
18931 // Check if CRBits did anything and return that if it did.
18932 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18933 return CRTruncValue;
18934 }
18935
18936 SDLoc dl(N);
18937 SDValue Op0 = N->getOperand(Num: 0);
18938
18939 // Looking for a truncate of i128 to i64.
18940 if (Op0.getValueType() != MVT::i128 || N->getValueType(ResNo: 0) != MVT::i64)
18941 return SDValue();
18942
18943 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18944
18945 // SRL feeding TRUNCATE.
18946 if (Op0.getOpcode() == ISD::SRL) {
18947 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
18948 // The right shift has to be by 64 bits.
18949 if (!ConstNode || ConstNode->getZExtValue() != 64)
18950 return SDValue();
18951
18952 // Switch the element number to extract.
18953 EltToExtract = EltToExtract ? 0 : 1;
18954 // Update Op0 past the SRL.
18955 Op0 = Op0.getOperand(i: 0);
18956 }
18957
18958 // BITCAST feeding a TRUNCATE possibly via SRL.
18959 if (Op0.getOpcode() == ISD::BITCAST &&
18960 Op0.getValueType() == MVT::i128 &&
18961 Op0.getOperand(i: 0).getValueType() == MVT::f128) {
18962 SDValue Bitcast = DCI.DAG.getBitcast(VT: MVT::v2i64, V: Op0.getOperand(i: 0));
18963 return DCI.DAG.getNode(
18964 Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Bitcast,
18965 N2: DCI.DAG.getTargetConstant(Val: EltToExtract, DL: dl, VT: MVT::i32));
18966 }
18967 return SDValue();
18968}
18969
18970SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18971 SelectionDAG &DAG = DCI.DAG;
18972
18973 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N: N->getOperand(Num: 1));
18974 if (!ConstOpOrElement)
18975 return SDValue();
18976
18977 // An imul is usually smaller than the alternative sequence for legal type.
18978 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
18979 isOperationLegal(Op: ISD::MUL, VT: N->getValueType(ResNo: 0)))
18980 return SDValue();
18981
18982 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18983 switch (this->Subtarget.getCPUDirective()) {
18984 default:
18985 // TODO: enhance the condition for subtarget before pwr8
18986 return false;
18987 case PPC::DIR_PWR8:
18988 // type mul add shl
18989 // scalar 4 1 1
18990 // vector 7 2 2
18991 return true;
18992 case PPC::DIR_PWR9:
18993 case PPC::DIR_PWR10:
18994 case PPC::DIR_PWR11:
18995 case PPC::DIR_PWR_FUTURE:
18996 // type mul add shl
18997 // scalar 5 2 2
18998 // vector 7 2 2
18999
19000 // The cycle RATIO of related operations are showed as a table above.
19001 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19002 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19003 // are 4, it is always profitable; but for 3 instrs patterns
19004 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19005 // So we should only do it for vector type.
19006 return IsAddOne && IsNeg ? VT.isVector() : true;
19007 }
19008 };
19009
19010 EVT VT = N->getValueType(ResNo: 0);
19011 SDLoc DL(N);
19012
19013 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19014 bool IsNeg = MulAmt.isNegative();
19015 APInt MulAmtAbs = MulAmt.abs();
19016
19017 if ((MulAmtAbs - 1).isPowerOf2()) {
19018 // (mul x, 2^N + 1) => (add (shl x, N), x)
19019 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19020
19021 if (!IsProfitable(IsNeg, true, VT))
19022 return SDValue();
19023
19024 SDValue Op0 = N->getOperand(Num: 0);
19025 SDValue Op1 =
19026 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
19027 N2: DAG.getConstant(Val: (MulAmtAbs - 1).logBase2(), DL, VT));
19028 SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
19029
19030 if (!IsNeg)
19031 return Res;
19032
19033 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Res);
19034 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19035 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19036 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19037
19038 if (!IsProfitable(IsNeg, false, VT))
19039 return SDValue();
19040
19041 SDValue Op0 = N->getOperand(Num: 0);
19042 SDValue Op1 =
19043 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0),
19044 N2: DAG.getConstant(Val: (MulAmtAbs + 1).logBase2(), DL, VT));
19045
19046 if (!IsNeg)
19047 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op1, N2: Op0);
19048 else
19049 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
19050
19051 } else {
19052 return SDValue();
19053 }
19054}
19055
19056// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19057// in combiner since we need to check SD flags and other subtarget features.
19058SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19059 DAGCombinerInfo &DCI) const {
19060 SDValue N0 = N->getOperand(Num: 0);
19061 SDValue N1 = N->getOperand(Num: 1);
19062 SDValue N2 = N->getOperand(Num: 2);
19063 SDNodeFlags Flags = N->getFlags();
19064 EVT VT = N->getValueType(ResNo: 0);
19065 SelectionDAG &DAG = DCI.DAG;
19066 const TargetOptions &Options = getTargetMachine().Options;
19067 unsigned Opc = N->getOpcode();
19068 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
19069 bool LegalOps = !DCI.isBeforeLegalizeOps();
19070 SDLoc Loc(N);
19071
19072 if (!isOperationLegal(Op: ISD::FMA, VT))
19073 return SDValue();
19074
19075 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19076 // since (fnmsub a b c)=-0 while c-ab=+0.
19077 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19078 return SDValue();
19079
19080 // (fma (fneg a) b c) => (fnmsub a b c)
19081 // (fnmsub (fneg a) b c) => (fma a b c)
19082 if (SDValue NegN0 = getCheaperNegatedExpression(Op: N0, DAG, LegalOps, OptForSize: CodeSize))
19083 return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: NegN0, N2: N1, N3: N2, Flags);
19084
19085 // (fma a (fneg b) c) => (fnmsub a b c)
19086 // (fnmsub a (fneg b) c) => (fma a b c)
19087 if (SDValue NegN1 = getCheaperNegatedExpression(Op: N1, DAG, LegalOps, OptForSize: CodeSize))
19088 return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: N0, N2: NegN1, N3: N2, Flags);
19089
19090 return SDValue();
19091}
19092
19093bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19094 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19095 if (!Subtarget.is64BitELFABI())
19096 return false;
19097
19098 // If not a tail call then no need to proceed.
19099 if (!CI->isTailCall())
19100 return false;
19101
19102 // If sibling calls have been disabled and tail-calls aren't guaranteed
19103 // there is no reason to duplicate.
19104 auto &TM = getTargetMachine();
19105 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19106 return false;
19107
19108 // Can't tail call a function called indirectly, or if it has variadic args.
19109 const Function *Callee = CI->getCalledFunction();
19110 if (!Callee || Callee->isVarArg())
19111 return false;
19112
19113 // Make sure the callee and caller calling conventions are eligible for tco.
19114 const Function *Caller = CI->getParent()->getParent();
19115 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC: Caller->getCallingConv(),
19116 CalleeCC: CI->getCallingConv()))
19117 return false;
19118
19119 // If the function is local then we have a good chance at tail-calling it
19120 return getTargetMachine().shouldAssumeDSOLocal(GV: Callee);
19121}
19122
19123bool PPCTargetLowering::
19124isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19125 const Value *Mask = AndI.getOperand(i: 1);
19126 // If the mask is suitable for andi. or andis. we should sink the and.
19127 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Mask)) {
19128 // Can't handle constants wider than 64-bits.
19129 if (CI->getBitWidth() > 64)
19130 return false;
19131 int64_t ConstVal = CI->getZExtValue();
19132 return isUInt<16>(x: ConstVal) ||
19133 (isUInt<16>(x: ConstVal >> 16) && !(ConstVal & 0xFFFF));
19134 }
19135
19136 // For non-constant masks, we can always use the record-form and.
19137 return true;
19138}
19139
19140/// getAddrModeForFlags - Based on the set of address flags, select the most
19141/// optimal instruction format to match by.
19142PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19143 // This is not a node we should be handling here.
19144 if (Flags == PPC::MOF_None)
19145 return PPC::AM_None;
19146 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19147 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DForm))
19148 if ((Flags & FlagSet) == FlagSet)
19149 return PPC::AM_DForm;
19150 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DSForm))
19151 if ((Flags & FlagSet) == FlagSet)
19152 return PPC::AM_DSForm;
19153 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DQForm))
19154 if ((Flags & FlagSet) == FlagSet)
19155 return PPC::AM_DQForm;
19156 for (auto FlagSet : AddrModesMap.at(k: PPC::AM_PrefixDForm))
19157 if ((Flags & FlagSet) == FlagSet)
19158 return PPC::AM_PrefixDForm;
19159 // If no other forms are selected, return an X-Form as it is the most
19160 // general addressing mode.
19161 return PPC::AM_XForm;
19162}
19163
19164/// Set alignment flags based on whether or not the Frame Index is aligned.
19165/// Utilized when computing flags for address computation when selecting
19166/// load and store instructions.
19167static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19168 SelectionDAG &DAG) {
19169 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19170 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: IsAdd ? N.getOperand(i: 0) : N);
19171 if (!FI)
19172 return;
19173 const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
19174 unsigned FrameIndexAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
19175 // If this is (add $FI, $S16Imm), the alignment flags are already set
19176 // based on the immediate. We just need to clear the alignment flags
19177 // if the FI alignment is weaker.
19178 if ((FrameIndexAlign % 4) != 0)
19179 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19180 if ((FrameIndexAlign % 16) != 0)
19181 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19182 // If the address is a plain FrameIndex, set alignment flags based on
19183 // FI alignment.
19184 if (!IsAdd) {
19185 if ((FrameIndexAlign % 4) == 0)
19186 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19187 if ((FrameIndexAlign % 16) == 0)
19188 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19189 }
19190}
19191
19192/// Given a node, compute flags that are used for address computation when
19193/// selecting load and store instructions. The flags computed are stored in
19194/// FlagSet. This function takes into account whether the node is a constant,
19195/// an ADD, OR, or a constant, and computes the address flags accordingly.
19196static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19197 SelectionDAG &DAG) {
19198 // Set the alignment flags for the node depending on if the node is
19199 // 4-byte or 16-byte aligned.
19200 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19201 if ((Imm & 0x3) == 0)
19202 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19203 if ((Imm & 0xf) == 0)
19204 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19205 };
19206
19207 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
19208 // All 32-bit constants can be computed as LIS + Disp.
19209 const APInt &ConstImm = CN->getAPIntValue();
19210 if (ConstImm.isSignedIntN(N: 32)) { // Flag to handle 32-bit constants.
19211 FlagSet |= PPC::MOF_AddrIsSImm32;
19212 SetAlignFlagsForImm(ConstImm.getZExtValue());
19213 setAlignFlagsForFI(N, FlagSet, DAG);
19214 }
19215 if (ConstImm.isSignedIntN(N: 34)) // Flag to handle 34-bit constants.
19216 FlagSet |= PPC::MOF_RPlusSImm34;
19217 else // Let constant materialization handle large constants.
19218 FlagSet |= PPC::MOF_NotAddNorCst;
19219 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19220 // This address can be represented as an addition of:
19221 // - Register + Imm16 (possibly a multiple of 4/16)
19222 // - Register + Imm34
19223 // - Register + PPCISD::Lo
19224 // - Register + Register
19225 // In any case, we won't have to match this as Base + Zero.
19226 SDValue RHS = N.getOperand(i: 1);
19227 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: RHS)) {
19228 const APInt &ConstImm = CN->getAPIntValue();
19229 if (ConstImm.isSignedIntN(N: 16)) {
19230 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19231 SetAlignFlagsForImm(ConstImm.getZExtValue());
19232 setAlignFlagsForFI(N, FlagSet, DAG);
19233 }
19234 if (ConstImm.isSignedIntN(N: 34))
19235 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19236 else
19237 FlagSet |= PPC::MOF_RPlusR; // Register.
19238 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(i: 1))
19239 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19240 else
19241 FlagSet |= PPC::MOF_RPlusR;
19242 } else { // The address computation is not a constant or an addition.
19243 setAlignFlagsForFI(N, FlagSet, DAG);
19244 FlagSet |= PPC::MOF_NotAddNorCst;
19245 }
19246}
19247
19248static bool isPCRelNode(SDValue N) {
19249 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19250 isValidPCRelNode<ConstantPoolSDNode>(N) ||
19251 isValidPCRelNode<GlobalAddressSDNode>(N) ||
19252 isValidPCRelNode<JumpTableSDNode>(N) ||
19253 isValidPCRelNode<BlockAddressSDNode>(N));
19254}
19255
19256/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19257/// the address flags of the load/store instruction that is to be matched.
19258unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19259 SelectionDAG &DAG) const {
19260 unsigned FlagSet = PPC::MOF_None;
19261
19262 // Compute subtarget flags.
19263 if (!Subtarget.hasP9Vector())
19264 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19265 else
19266 FlagSet |= PPC::MOF_SubtargetP9;
19267
19268 if (Subtarget.hasPrefixInstrs())
19269 FlagSet |= PPC::MOF_SubtargetP10;
19270
19271 if (Subtarget.hasSPE())
19272 FlagSet |= PPC::MOF_SubtargetSPE;
19273
19274 // Check if we have a PCRel node and return early.
19275 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19276 return FlagSet;
19277
19278 // If the node is the paired load/store intrinsics, compute flags for
19279 // address computation and return early.
19280 unsigned ParentOp = Parent->getOpcode();
19281 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19282 (ParentOp == ISD::INTRINSIC_VOID))) {
19283 unsigned ID = Parent->getConstantOperandVal(Num: 1);
19284 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19285 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19286 ? Parent->getOperand(Num: 2)
19287 : Parent->getOperand(Num: 3);
19288 computeFlagsForAddressComputation(N: IntrinOp, FlagSet, DAG);
19289 FlagSet |= PPC::MOF_Vector;
19290 return FlagSet;
19291 }
19292 }
19293
19294 // Mark this as something we don't want to handle here if it is atomic
19295 // or pre-increment instruction.
19296 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Val: Parent))
19297 if (LSB->isIndexed())
19298 return PPC::MOF_None;
19299
19300 // Compute in-memory type flags. This is based on if there are scalars,
19301 // floats or vectors.
19302 const MemSDNode *MN = dyn_cast<MemSDNode>(Val: Parent);
19303 assert(MN && "Parent should be a MemSDNode!");
19304 EVT MemVT = MN->getMemoryVT();
19305 unsigned Size = MemVT.getSizeInBits();
19306 if (MemVT.isScalarInteger()) {
19307 assert(Size <= 128 &&
19308 "Not expecting scalar integers larger than 16 bytes!");
19309 if (Size < 32)
19310 FlagSet |= PPC::MOF_SubWordInt;
19311 else if (Size == 32)
19312 FlagSet |= PPC::MOF_WordInt;
19313 else
19314 FlagSet |= PPC::MOF_DoubleWordInt;
19315 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19316 if (Size == 128)
19317 FlagSet |= PPC::MOF_Vector;
19318 else if (Size == 256) {
19319 assert(Subtarget.pairedVectorMemops() &&
19320 "256-bit vectors are only available when paired vector memops is "
19321 "enabled!");
19322 FlagSet |= PPC::MOF_Vector;
19323 } else
19324 llvm_unreachable("Not expecting illegal vectors!");
19325 } else { // Floating point type: can be scalar, f128 or vector types.
19326 if (Size == 32 || Size == 64)
19327 FlagSet |= PPC::MOF_ScalarFloat;
19328 else if (MemVT == MVT::f128 || MemVT.isVector())
19329 FlagSet |= PPC::MOF_Vector;
19330 else
19331 llvm_unreachable("Not expecting illegal scalar floats!");
19332 }
19333
19334 // Compute flags for address computation.
19335 computeFlagsForAddressComputation(N, FlagSet, DAG);
19336
19337 // Compute type extension flags.
19338 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Val: Parent)) {
19339 switch (LN->getExtensionType()) {
19340 case ISD::SEXTLOAD:
19341 FlagSet |= PPC::MOF_SExt;
19342 break;
19343 case ISD::EXTLOAD:
19344 case ISD::ZEXTLOAD:
19345 FlagSet |= PPC::MOF_ZExt;
19346 break;
19347 case ISD::NON_EXTLOAD:
19348 FlagSet |= PPC::MOF_NoExt;
19349 break;
19350 }
19351 } else
19352 FlagSet |= PPC::MOF_NoExt;
19353
19354 // For integers, no extension is the same as zero extension.
19355 // We set the extension mode to zero extension so we don't have
19356 // to add separate entries in AddrModesMap for loads and stores.
19357 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19358 FlagSet |= PPC::MOF_ZExt;
19359 FlagSet &= ~PPC::MOF_NoExt;
19360 }
19361
19362 // If we don't have prefixed instructions, 34-bit constants should be
19363 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19364 bool IsNonP1034BitConst =
19365 ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
19366 FlagSet) == PPC::MOF_RPlusSImm34;
19367 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19368 IsNonP1034BitConst)
19369 FlagSet |= PPC::MOF_NotAddNorCst;
19370
19371 return FlagSet;
19372}
19373
19374/// SelectForceXFormMode - Given the specified address, force it to be
19375/// represented as an indexed [r+r] operation (an XForm instruction).
19376PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
19377 SDValue &Base,
19378 SelectionDAG &DAG) const {
19379
19380 PPC::AddrMode Mode = PPC::AM_XForm;
19381 int16_t ForceXFormImm = 0;
19382 if (provablyDisjointOr(DAG, N) &&
19383 !isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: ForceXFormImm)) {
19384 Disp = N.getOperand(i: 0);
19385 Base = N.getOperand(i: 1);
19386 return Mode;
19387 }
19388
19389 // If the address is the result of an add, we will utilize the fact that the
19390 // address calculation includes an implicit add. However, we can reduce
19391 // register pressure if we do not materialize a constant just for use as the
19392 // index register. We only get rid of the add if it is not an add of a
19393 // value and a 16-bit signed constant and both have a single use.
19394 if (N.getOpcode() == ISD::ADD &&
19395 (!isIntS16Immediate(Op: N.getOperand(i: 1), Imm&: ForceXFormImm) ||
19396 !N.getOperand(i: 1).hasOneUse() || !N.getOperand(i: 0).hasOneUse())) {
19397 Disp = N.getOperand(i: 0);
19398 Base = N.getOperand(i: 1);
19399 return Mode;
19400 }
19401
19402 // Otherwise, use R0 as the base register.
19403 Disp = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19404 VT: N.getValueType());
19405 Base = N;
19406
19407 return Mode;
19408}
19409
19410bool PPCTargetLowering::splitValueIntoRegisterParts(
19411 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19412 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19413 EVT ValVT = Val.getValueType();
19414 // If we are splitting a scalar integer into f64 parts (i.e. so they
19415 // can be placed into VFRC registers), we need to zero extend and
19416 // bitcast the values. This will ensure the value is placed into a
19417 // VSR using direct moves or stack operations as needed.
19418 if (PartVT == MVT::f64 &&
19419 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19420 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
19421 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Val);
19422 Parts[0] = Val;
19423 return true;
19424 }
19425 return false;
19426}
19427
19428SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19429 SelectionDAG &DAG) const {
19430 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19431 TargetLowering::CallLoweringInfo CLI(DAG);
19432 EVT RetVT = Op.getValueType();
19433 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
19434 SDValue Callee =
19435 DAG.getExternalSymbol(Sym: LibCallName, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
19436 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: false);
19437 TargetLowering::ArgListTy Args;
19438 TargetLowering::ArgListEntry Entry;
19439 for (const SDValue &N : Op->op_values()) {
19440 EVT ArgVT = N.getValueType();
19441 Type *ArgTy = ArgVT.getTypeForEVT(Context&: *DAG.getContext());
19442 Entry.Node = N;
19443 Entry.Ty = ArgTy;
19444 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(Ty: ArgTy, IsSigned: SignExtend);
19445 Entry.IsZExt = !Entry.IsSExt;
19446 Args.push_back(x: Entry);
19447 }
19448
19449 SDValue InChain = DAG.getEntryNode();
19450 SDValue TCChain = InChain;
19451 const Function &F = DAG.getMachineFunction().getFunction();
19452 bool isTailCall =
19453 TLI.isInTailCallPosition(DAG, Node: Op.getNode(), Chain&: TCChain) &&
19454 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19455 if (isTailCall)
19456 InChain = TCChain;
19457 CLI.setDebugLoc(SDLoc(Op))
19458 .setChain(InChain)
19459 .setLibCallee(CC: CallingConv::C, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
19460 .setTailCall(isTailCall)
19461 .setSExtResult(SignExtend)
19462 .setZExtResult(!SignExtend)
19463 .setIsPostTypeLegalization(true);
19464 return TLI.LowerCallTo(CLI).first;
19465}
19466
19467SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19468 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19469 SelectionDAG &DAG) const {
19470 if (Op.getValueType() == MVT::f32)
19471 return lowerToLibCall(LibCallName: LibCallFloatName, Op, DAG);
19472
19473 if (Op.getValueType() == MVT::f64)
19474 return lowerToLibCall(LibCallName: LibCallDoubleName, Op, DAG);
19475
19476 return SDValue();
19477}
19478
19479bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19480 SDNodeFlags Flags = Op.getNode()->getFlags();
19481 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19482 Flags.hasNoNaNs() && Flags.hasNoInfs();
19483}
19484
19485bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19486 return Op.getNode()->getFlags().hasApproximateFuncs();
19487}
19488
19489bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19490 return getTargetMachine().Options.PPCGenScalarMASSEntries;
19491}
19492
19493SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19494 const char *LibCallFloatName,
19495 const char *LibCallDoubleNameFinite,
19496 const char *LibCallFloatNameFinite,
19497 SDValue Op,
19498 SelectionDAG &DAG) const {
19499 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19500 return SDValue();
19501
19502 if (!isLowringToMASSFiniteSafe(Op))
19503 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19504 DAG);
19505
19506 return lowerLibCallBasedOnType(LibCallFloatName: LibCallFloatNameFinite,
19507 LibCallDoubleName: LibCallDoubleNameFinite, Op, DAG);
19508}
19509
19510SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19511 return lowerLibCallBase(LibCallDoubleName: "__xl_pow", LibCallFloatName: "__xl_powf", LibCallDoubleNameFinite: "__xl_pow_finite",
19512 LibCallFloatNameFinite: "__xl_powf_finite", Op, DAG);
19513}
19514
19515SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19516 return lowerLibCallBase(LibCallDoubleName: "__xl_sin", LibCallFloatName: "__xl_sinf", LibCallDoubleNameFinite: "__xl_sin_finite",
19517 LibCallFloatNameFinite: "__xl_sinf_finite", Op, DAG);
19518}
19519
19520SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19521 return lowerLibCallBase(LibCallDoubleName: "__xl_cos", LibCallFloatName: "__xl_cosf", LibCallDoubleNameFinite: "__xl_cos_finite",
19522 LibCallFloatNameFinite: "__xl_cosf_finite", Op, DAG);
19523}
19524
19525SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19526 return lowerLibCallBase(LibCallDoubleName: "__xl_log", LibCallFloatName: "__xl_logf", LibCallDoubleNameFinite: "__xl_log_finite",
19527 LibCallFloatNameFinite: "__xl_logf_finite", Op, DAG);
19528}
19529
19530SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19531 return lowerLibCallBase(LibCallDoubleName: "__xl_log10", LibCallFloatName: "__xl_log10f", LibCallDoubleNameFinite: "__xl_log10_finite",
19532 LibCallFloatNameFinite: "__xl_log10f_finite", Op, DAG);
19533}
19534
19535SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19536 return lowerLibCallBase(LibCallDoubleName: "__xl_exp", LibCallFloatName: "__xl_expf", LibCallDoubleNameFinite: "__xl_exp_finite",
19537 LibCallFloatNameFinite: "__xl_expf_finite", Op, DAG);
19538}
19539
19540// If we happen to match to an aligned D-Form, check if the Frame Index is
19541// adequately aligned. If it is not, reset the mode to match to X-Form.
19542static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19543 PPC::AddrMode &Mode) {
19544 if (!isa<FrameIndexSDNode>(Val: N))
19545 return;
19546 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19547 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
19548 Mode = PPC::AM_XForm;
19549}
19550
19551/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19552/// compute the address flags of the node, get the optimal address mode based
19553/// on the flags, and set the Base and Disp based on the address mode.
19554PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
19555 SDValue N, SDValue &Disp,
19556 SDValue &Base,
19557 SelectionDAG &DAG,
19558 MaybeAlign Align) const {
19559 SDLoc DL(Parent);
19560
19561 // Compute the address flags.
19562 unsigned Flags = computeMOFlags(Parent, N, DAG);
19563
19564 // Get the optimal address mode based on the Flags.
19565 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19566
19567 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19568 // Select an X-Form load if it is not.
19569 setXFormForUnalignedFI(N, Flags, Mode);
19570
19571 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19572 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19573 assert(Subtarget.isUsingPCRelativeCalls() &&
19574 "Must be using PC-Relative calls when a valid PC-Relative node is "
19575 "present!");
19576 Mode = PPC::AM_PCRel;
19577 }
19578
19579 // Set Base and Disp accordingly depending on the address mode.
19580 switch (Mode) {
19581 case PPC::AM_DForm:
19582 case PPC::AM_DSForm:
19583 case PPC::AM_DQForm: {
19584 // This is a register plus a 16-bit immediate. The base will be the
19585 // register and the displacement will be the immediate unless it
19586 // isn't sufficiently aligned.
19587 if (Flags & PPC::MOF_RPlusSImm16) {
19588 SDValue Op0 = N.getOperand(i: 0);
19589 SDValue Op1 = N.getOperand(i: 1);
19590 int16_t Imm = Op1->getAsZExtVal();
19591 if (!Align || isAligned(Lhs: *Align, SizeInBytes: Imm)) {
19592 Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: N.getValueType());
19593 Base = Op0;
19594 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op0)) {
19595 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
19596 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
19597 }
19598 break;
19599 }
19600 }
19601 // This is a register plus the @lo relocation. The base is the register
19602 // and the displacement is the global address.
19603 else if (Flags & PPC::MOF_RPlusLo) {
19604 Disp = N.getOperand(i: 1).getOperand(i: 0); // The global address.
19605 assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
19606 Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
19607 Disp.getOpcode() == ISD::TargetConstantPool ||
19608 Disp.getOpcode() == ISD::TargetJumpTable);
19609 Base = N.getOperand(i: 0);
19610 break;
19611 }
19612 // This is a constant address at most 32 bits. The base will be
19613 // zero or load-immediate-shifted and the displacement will be
19614 // the low 16 bits of the address.
19615 else if (Flags & PPC::MOF_AddrIsSImm32) {
19616 auto *CN = cast<ConstantSDNode>(Val&: N);
19617 EVT CNType = CN->getValueType(ResNo: 0);
19618 uint64_t CNImm = CN->getZExtValue();
19619 // If this address fits entirely in a 16-bit sext immediate field, codegen
19620 // this as "d, 0".
19621 int16_t Imm;
19622 if (isIntS16Immediate(N: CN, Imm) && (!Align || isAligned(Lhs: *Align, SizeInBytes: Imm))) {
19623 Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: CNType);
19624 Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19625 VT: CNType);
19626 break;
19627 }
19628 // Handle 32-bit sext immediate with LIS + Addr mode.
19629 if ((CNType == MVT::i32 || isInt<32>(x: CNImm)) &&
19630 (!Align || isAligned(Lhs: *Align, SizeInBytes: CNImm))) {
19631 int32_t Addr = (int32_t)CNImm;
19632 // Otherwise, break this down into LIS + Disp.
19633 Disp = DAG.getSignedTargetConstant(Val: (int16_t)Addr, DL, VT: MVT::i32);
19634 Base = DAG.getSignedTargetConstant(Val: (Addr - (int16_t)Addr) >> 16, DL,
19635 VT: MVT::i32);
19636 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19637 Base = SDValue(DAG.getMachineNode(Opcode: LIS, dl: DL, VT: CNType, Op1: Base), 0);
19638 break;
19639 }
19640 }
19641 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19642 Disp = DAG.getTargetConstant(Val: 0, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
19643 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
19644 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
19645 fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
19646 } else
19647 Base = N;
19648 break;
19649 }
19650 case PPC::AM_PrefixDForm: {
19651 int64_t Imm34 = 0;
19652 unsigned Opcode = N.getOpcode();
19653 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19654 (isIntS34Immediate(Op: N.getOperand(i: 1), Imm&: Imm34))) {
19655 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19656 Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
19657 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: 0)))
19658 Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
19659 else
19660 Base = N.getOperand(i: 0);
19661 } else if (isIntS34Immediate(Op: N, Imm&: Imm34)) {
19662 // The address is a 34-bit signed immediate.
19663 Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
19664 Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
19665 }
19666 break;
19667 }
19668 case PPC::AM_PCRel: {
19669 // When selecting PC-Relative instructions, "Base" is not utilized as
19670 // we select the address as [PC+imm].
19671 Disp = N;
19672 break;
19673 }
19674 case PPC::AM_None:
19675 break;
19676 default: { // By default, X-Form is always available to be selected.
19677 // When a frame index is not aligned, we also match by XForm.
19678 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N);
19679 Base = FI ? N : N.getOperand(i: 1);
19680 Disp = FI ? DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19681 VT: N.getValueType())
19682 : N.getOperand(i: 0);
19683 break;
19684 }
19685 }
19686 return Mode;
19687}
19688
19689CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
19690 bool Return,
19691 bool IsVarArg) const {
19692 switch (CC) {
19693 case CallingConv::Cold:
19694 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19695 default:
19696 return CC_PPC64_ELF;
19697 }
19698}
19699
19700bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
19701 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19702}
19703
19704TargetLowering::AtomicExpansionKind
19705PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
19706 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19707 if (shouldInlineQuadwordAtomics() && Size == 128)
19708 return AtomicExpansionKind::MaskedIntrinsic;
19709
19710 switch (AI->getOperation()) {
19711 case AtomicRMWInst::UIncWrap:
19712 case AtomicRMWInst::UDecWrap:
19713 case AtomicRMWInst::USubCond:
19714 case AtomicRMWInst::USubSat:
19715 return AtomicExpansionKind::CmpXChg;
19716 default:
19717 return TargetLowering::shouldExpandAtomicRMWInIR(RMW: AI);
19718 }
19719
19720 llvm_unreachable("unreachable atomicrmw operation");
19721}
19722
19723TargetLowering::AtomicExpansionKind
19724PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
19725 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19726 if (shouldInlineQuadwordAtomics() && Size == 128)
19727 return AtomicExpansionKind::MaskedIntrinsic;
19728 return AtomicExpansionKind::LLSC;
19729}
19730
19731static Intrinsic::ID
19732getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
19733 switch (BinOp) {
19734 default:
19735 llvm_unreachable("Unexpected AtomicRMW BinOp");
19736 case AtomicRMWInst::Xchg:
19737 return Intrinsic::ppc_atomicrmw_xchg_i128;
19738 case AtomicRMWInst::Add:
19739 return Intrinsic::ppc_atomicrmw_add_i128;
19740 case AtomicRMWInst::Sub:
19741 return Intrinsic::ppc_atomicrmw_sub_i128;
19742 case AtomicRMWInst::And:
19743 return Intrinsic::ppc_atomicrmw_and_i128;
19744 case AtomicRMWInst::Or:
19745 return Intrinsic::ppc_atomicrmw_or_i128;
19746 case AtomicRMWInst::Xor:
19747 return Intrinsic::ppc_atomicrmw_xor_i128;
19748 case AtomicRMWInst::Nand:
19749 return Intrinsic::ppc_atomicrmw_nand_i128;
19750 }
19751}
19752
19753Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
19754 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19755 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19756 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19757 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19758 Type *ValTy = Incr->getType();
19759 assert(ValTy->getPrimitiveSizeInBits() == 128);
19760 Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
19761 Value *IncrLo = Builder.CreateTrunc(V: Incr, DestTy: Int64Ty, Name: "incr_lo");
19762 Value *IncrHi =
19763 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Incr, RHS: 64), DestTy: Int64Ty, Name: "incr_hi");
19764 Value *LoHi = Builder.CreateIntrinsic(
19765 ID: getIntrinsicForAtomicRMWBinOp128(BinOp: AI->getOperation()), Types: {},
19766 Args: {AlignedAddr, IncrLo, IncrHi});
19767 Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: 0, Name: "lo");
19768 Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: 1, Name: "hi");
19769 Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
19770 Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
19771 return Builder.CreateOr(
19772 LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: 64)), Name: "val64");
19773}
19774
19775Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19776 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19777 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19778 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19779 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19780 Type *ValTy = CmpVal->getType();
19781 assert(ValTy->getPrimitiveSizeInBits() == 128);
19782 Function *IntCmpXchg =
19783 Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::ppc_cmpxchg_i128);
19784 Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
19785 Value *CmpLo = Builder.CreateTrunc(V: CmpVal, DestTy: Int64Ty, Name: "cmp_lo");
19786 Value *CmpHi =
19787 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: CmpVal, RHS: 64), DestTy: Int64Ty, Name: "cmp_hi");
19788 Value *NewLo = Builder.CreateTrunc(V: NewVal, DestTy: Int64Ty, Name: "new_lo");
19789 Value *NewHi =
19790 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: NewVal, RHS: 64), DestTy: Int64Ty, Name: "new_hi");
19791 emitLeadingFence(Builder, Inst: CI, Ord);
19792 Value *LoHi =
19793 Builder.CreateCall(Callee: IntCmpXchg, Args: {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19794 emitTrailingFence(Builder, Inst: CI, Ord);
19795 Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: 0, Name: "lo");
19796 Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: 1, Name: "hi");
19797 Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
19798 Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
19799 return Builder.CreateOr(
19800 LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: 64)), Name: "val64");
19801}
19802