1//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the interfaces that Hexagon uses to lower LLVM code
10// into a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "HexagonISelLowering.h"
15#include "Hexagon.h"
16#include "HexagonMachineFunctionInfo.h"
17#include "HexagonRegisterInfo.h"
18#include "HexagonSubtarget.h"
19#include "HexagonTargetMachine.h"
20#include "HexagonTargetObjectFile.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/StringSwitch.h"
25#include "llvm/CodeGen/CallingConvLower.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineMemOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/TargetCallingConv.h"
32#include "llvm/CodeGen/ValueTypes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/CallingConv.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/DiagnosticInfo.h"
38#include "llvm/IR/DiagnosticPrinter.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/GlobalValue.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/IntrinsicsHexagon.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/Value.h"
50#include "llvm/Support/Casting.h"
51#include "llvm/Support/CodeGen.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MathExtras.h"
56#include "llvm/Support/raw_ostream.h"
57#include "llvm/Target/TargetMachine.h"
58#include <algorithm>
59#include <cassert>
60#include <cstdint>
61#include <limits>
62#include <utility>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "hexagon-lowering"
67
68static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
69 cl::init(Val: true), cl::Hidden,
70 cl::desc("Control jump table emission on Hexagon target"));
71
72static cl::opt<bool>
73 EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
74 cl::desc("Enable Hexagon SDNode scheduling"));
75
76static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
77 cl::init(Val: 5),
78 cl::desc("Set minimum jump tables"));
79
80static cl::opt<bool>
81 ConstantLoadsToImm("constant-loads-to-imm", cl::Hidden, cl::init(Val: true),
82 cl::desc("Convert constant loads to immediate values."));
83
84static cl::opt<bool> AlignLoads("hexagon-align-loads",
85 cl::Hidden, cl::init(Val: false),
86 cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
87
88static cl::opt<bool>
89 DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
90 cl::init(Val: false),
91 cl::desc("Disable minimum alignment of 1 for "
92 "arguments passed by value on stack"));
93
94// Implement calling convention for Hexagon.
95
96static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
97 CCValAssign::LocInfo &LocInfo,
98 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
99 static const MCPhysReg ArgRegs[] = {
100 Hexagon::R0, Hexagon::R1, Hexagon::R2,
101 Hexagon::R3, Hexagon::R4, Hexagon::R5
102 };
103 const unsigned NumArgRegs = std::size(ArgRegs);
104 unsigned RegNum = State.getFirstUnallocated(Regs: ArgRegs);
105
106 // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
107 if (RegNum != NumArgRegs && RegNum % 2 == 1)
108 State.AllocateReg(Reg: ArgRegs[RegNum]);
109
110 // Always return false here, as this function only makes sure that the first
111 // unallocated register has an even register number and does not actually
112 // allocate a register for the current argument.
113 return false;
114}
115
116#include "HexagonGenCallingConv.inc"
117
118unsigned HexagonTargetLowering::getVectorTypeBreakdownForCallingConv(
119 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
120 unsigned &NumIntermediates, MVT &RegisterVT) const {
121
122 bool isBoolVector = VT.getVectorElementType() == MVT::i1;
123 bool isPowerOf2 = VT.isPow2VectorType();
124 unsigned NumElts = VT.getVectorNumElements();
125
126 // Split vectors of type vXi1 into (X/8) vectors of type v8i1,
127 // where X is divisible by 8.
128 if (isBoolVector && !Subtarget.useHVXOps() && isPowerOf2 && NumElts >= 8) {
129 RegisterVT = MVT::v8i8;
130 IntermediateVT = MVT::v8i1;
131 NumIntermediates = NumElts / 8;
132 return NumIntermediates;
133 }
134
135 // In HVX 64-byte mode, vectors of type vXi1 are split into (X / 64) vectors
136 // of type v64i1, provided that X is divisible by 64.
137 if (isBoolVector && Subtarget.useHVX64BOps() && isPowerOf2 && NumElts >= 64) {
138 RegisterVT = MVT::v64i8;
139 IntermediateVT = MVT::v64i1;
140 NumIntermediates = NumElts / 64;
141 return NumIntermediates;
142 }
143
144 // In HVX 128-byte mode, vectors of type vXi1 are split into (X / 128) vectors
145 // of type v128i1, provided that X is divisible by 128.
146 if (isBoolVector && Subtarget.useHVX128BOps() && isPowerOf2 &&
147 NumElts >= 128) {
148 RegisterVT = MVT::v128i8;
149 IntermediateVT = MVT::v128i1;
150 NumIntermediates = NumElts / 128;
151 return NumIntermediates;
152 }
153
154 return TargetLowering::getVectorTypeBreakdownForCallingConv(
155 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
156}
157
158std::pair<MVT, unsigned>
159HexagonTargetLowering::handleMaskRegisterForCallingConv(
160 const HexagonSubtarget &Subtarget, EVT VT) const {
161 assert(VT.getVectorElementType() == MVT::i1);
162
163 const unsigned NumElems = VT.getVectorNumElements();
164
165 if (!VT.isPow2VectorType())
166 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
167
168 if (!Subtarget.useHVXOps() && NumElems >= 8)
169 return {MVT::v8i8, NumElems / 8};
170
171 if (Subtarget.useHVX64BOps() && NumElems >= 64)
172 return {MVT::v64i8, NumElems / 64};
173
174 if (Subtarget.useHVX128BOps() && NumElems >= 128)
175 return {MVT::v128i8, NumElems / 128};
176
177 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
178}
179
180MVT HexagonTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
181 CallingConv::ID CC,
182 EVT VT) const {
183
184 if (VT.isVectorOf(EltVT: MVT::i1)) {
185 auto [RegisterVT, NumRegisters] =
186 handleMaskRegisterForCallingConv(Subtarget, VT);
187 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
188 return RegisterVT;
189 }
190
191 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
192}
193
194SDValue
195HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
196 const {
197 unsigned IntNo = Op.getConstantOperandVal(i: 0);
198 SDLoc dl(Op);
199 switch (IntNo) {
200 default:
201 return SDValue(); // Don't custom lower most intrinsics.
202 case Intrinsic::thread_pointer: {
203 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
204 return DAG.getNode(Opcode: HexagonISD::THREAD_POINTER, DL: dl, VT: PtrVT);
205 }
206 }
207}
208
209/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
210/// by "Src" to address "Dst" of size "Size". Alignment information is
211/// specified by the specific parameter attribute. The copy will be passed as
212/// a byval function parameter. Sometimes what we are copying is the end of a
213/// larger object, the part that does not fit in registers.
214static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
215 SDValue Chain, ISD::ArgFlagsTy Flags,
216 SelectionDAG &DAG, const SDLoc &dl) {
217 SDValue SizeNode = DAG.getConstant(Val: Flags.getByValSize(), DL: dl, VT: MVT::i32);
218 Align Alignment = Flags.getNonZeroByValAlign();
219 return DAG.getMemcpy(Chain, dl, Dst, Src, Size: SizeNode, DstAlign: Alignment, SrcAlign: Alignment,
220 /*isVolatile=*/isVol: false, /*AlwaysInline=*/false,
221 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(),
222 SrcPtrInfo: MachinePointerInfo());
223}
224
225bool
226HexagonTargetLowering::CanLowerReturn(
227 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
228 const SmallVectorImpl<ISD::OutputArg> &Outs,
229 LLVMContext &Context, const Type *RetTy) const {
230 SmallVector<CCValAssign, 16> RVLocs;
231 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
232
233 if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
234 return CCInfo.CheckReturn(Outs, Fn: RetCC_Hexagon_HVX);
235 return CCInfo.CheckReturn(Outs, Fn: RetCC_Hexagon);
236}
237
238// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
239// passed by value, the function prototype is modified to return void and
240// the value is stored in memory pointed by a pointer passed by caller.
241SDValue
242HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
243 bool IsVarArg,
244 const SmallVectorImpl<ISD::OutputArg> &Outs,
245 const SmallVectorImpl<SDValue> &OutVals,
246 const SDLoc &dl, SelectionDAG &DAG) const {
247 // CCValAssign - represent the assignment of the return value to locations.
248 SmallVector<CCValAssign, 16> RVLocs;
249
250 // CCState - Info about the registers and stack slot.
251 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
252 *DAG.getContext());
253
254 // Analyze return values of ISD::RET
255 if (Subtarget.useHVXOps())
256 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_Hexagon_HVX);
257 else
258 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_Hexagon);
259
260 SDValue Glue;
261 SmallVector<SDValue, 4> RetOps(1, Chain);
262
263 // Copy the result values into the output registers.
264 for (unsigned i = 0; i != RVLocs.size(); ++i) {
265 CCValAssign &VA = RVLocs[i];
266 SDValue Val = OutVals[i];
267
268 switch (VA.getLocInfo()) {
269 default:
270 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
271 llvm_unreachable("Unknown loc info!");
272 case CCValAssign::Full:
273 break;
274 case CCValAssign::BCvt:
275 Val = DAG.getBitcast(VT: VA.getLocVT(), V: Val);
276 break;
277 case CCValAssign::SExt:
278 Val = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Val);
279 break;
280 case CCValAssign::ZExt:
281 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Val);
282 break;
283 case CCValAssign::AExt:
284 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Val);
285 break;
286 }
287
288 Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Val, Glue);
289
290 // Guarantee that all emitted copies are stuck together with flags.
291 Glue = Chain.getValue(R: 1);
292 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
293 }
294
295 RetOps[0] = Chain; // Update chain.
296
297 // Add the glue if we have it.
298 if (Glue.getNode())
299 RetOps.push_back(Elt: Glue);
300
301 return DAG.getNode(Opcode: HexagonISD::RET_GLUE, DL: dl, VT: MVT::Other, Ops: RetOps);
302}
303
304bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
305 // If either no tail call or told not to tail call at all, don't.
306 return CI->isTailCall();
307}
308
309Register HexagonTargetLowering::getRegisterByName(
310 const char* RegName, LLT VT, const MachineFunction &) const {
311 // Just support r19, the linux kernel uses it.
312 Register Reg = StringSwitch<Register>(RegName)
313 .Case(S: "r0", Value: Hexagon::R0)
314 .Case(S: "r1", Value: Hexagon::R1)
315 .Case(S: "r2", Value: Hexagon::R2)
316 .Case(S: "r3", Value: Hexagon::R3)
317 .Case(S: "r4", Value: Hexagon::R4)
318 .Case(S: "r5", Value: Hexagon::R5)
319 .Case(S: "r6", Value: Hexagon::R6)
320 .Case(S: "r7", Value: Hexagon::R7)
321 .Case(S: "r8", Value: Hexagon::R8)
322 .Case(S: "r9", Value: Hexagon::R9)
323 .Case(S: "r10", Value: Hexagon::R10)
324 .Case(S: "r11", Value: Hexagon::R11)
325 .Case(S: "r12", Value: Hexagon::R12)
326 .Case(S: "r13", Value: Hexagon::R13)
327 .Case(S: "r14", Value: Hexagon::R14)
328 .Case(S: "r15", Value: Hexagon::R15)
329 .Case(S: "r16", Value: Hexagon::R16)
330 .Case(S: "r17", Value: Hexagon::R17)
331 .Case(S: "r18", Value: Hexagon::R18)
332 .Case(S: "r19", Value: Hexagon::R19)
333 .Case(S: "r20", Value: Hexagon::R20)
334 .Case(S: "r21", Value: Hexagon::R21)
335 .Case(S: "r22", Value: Hexagon::R22)
336 .Case(S: "r23", Value: Hexagon::R23)
337 .Case(S: "r24", Value: Hexagon::R24)
338 .Case(S: "r25", Value: Hexagon::R25)
339 .Case(S: "r26", Value: Hexagon::R26)
340 .Case(S: "r27", Value: Hexagon::R27)
341 .Case(S: "r28", Value: Hexagon::R28)
342 .Case(S: "r29", Value: Hexagon::R29)
343 .Case(S: "r30", Value: Hexagon::R30)
344 .Case(S: "r31", Value: Hexagon::R31)
345 .Case(S: "r1:0", Value: Hexagon::D0)
346 .Case(S: "r3:2", Value: Hexagon::D1)
347 .Case(S: "r5:4", Value: Hexagon::D2)
348 .Case(S: "r7:6", Value: Hexagon::D3)
349 .Case(S: "r9:8", Value: Hexagon::D4)
350 .Case(S: "r11:10", Value: Hexagon::D5)
351 .Case(S: "r13:12", Value: Hexagon::D6)
352 .Case(S: "r15:14", Value: Hexagon::D7)
353 .Case(S: "r17:16", Value: Hexagon::D8)
354 .Case(S: "r19:18", Value: Hexagon::D9)
355 .Case(S: "r21:20", Value: Hexagon::D10)
356 .Case(S: "r23:22", Value: Hexagon::D11)
357 .Case(S: "r25:24", Value: Hexagon::D12)
358 .Case(S: "r27:26", Value: Hexagon::D13)
359 .Case(S: "r29:28", Value: Hexagon::D14)
360 .Case(S: "r31:30", Value: Hexagon::D15)
361 .Case(S: "sp", Value: Hexagon::R29)
362 .Case(S: "fp", Value: Hexagon::R30)
363 .Case(S: "lr", Value: Hexagon::R31)
364 .Case(S: "p0", Value: Hexagon::P0)
365 .Case(S: "p1", Value: Hexagon::P1)
366 .Case(S: "p2", Value: Hexagon::P2)
367 .Case(S: "p3", Value: Hexagon::P3)
368 .Case(S: "sa0", Value: Hexagon::SA0)
369 .Case(S: "lc0", Value: Hexagon::LC0)
370 .Case(S: "sa1", Value: Hexagon::SA1)
371 .Case(S: "lc1", Value: Hexagon::LC1)
372 .Case(S: "m0", Value: Hexagon::M0)
373 .Case(S: "m1", Value: Hexagon::M1)
374 .Case(S: "usr", Value: Hexagon::USR)
375 .Case(S: "ugp", Value: Hexagon::UGP)
376 .Case(S: "cs0", Value: Hexagon::CS0)
377 .Case(S: "cs1", Value: Hexagon::CS1)
378 .Default(Value: Register());
379 return Reg;
380}
381
382/// LowerCallResult - Lower the result values of an ISD::CALL into the
383/// appropriate copies out of appropriate physical registers. This assumes that
384/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
385/// being lowered. Returns a SDNode with the same number of values as the
386/// ISD::CALL.
387SDValue HexagonTargetLowering::LowerCallResult(
388 SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
389 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
390 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
391 const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
392 // Assign locations to each value returned by this call.
393 SmallVector<CCValAssign, 16> RVLocs;
394
395 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
396 *DAG.getContext());
397
398 if (Subtarget.useHVXOps())
399 CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_Hexagon_HVX);
400 else
401 CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_Hexagon);
402
403 // Copy all of the result registers out of their specified physreg.
404 for (unsigned i = 0; i != RVLocs.size(); ++i) {
405 SDValue RetVal;
406 if (RVLocs[i].getValVT() == MVT::i1) {
407 // Return values of type MVT::i1 require special handling. The reason
408 // is that MVT::i1 is associated with the PredRegs register class, but
409 // values of that type are still returned in R0. Generate an explicit
410 // copy into a predicate register from R0, and treat the value of the
411 // predicate register as the call result.
412 auto &MRI = DAG.getMachineFunction().getRegInfo();
413 SDValue FR0 = DAG.getCopyFromReg(Chain, dl, Reg: RVLocs[i].getLocReg(),
414 VT: MVT::i32, Glue);
415 // FR0 = (Value, Chain, Glue)
416 Register PredR = MRI.createVirtualRegister(RegClass: &Hexagon::PredRegsRegClass);
417 SDValue TPR = DAG.getCopyToReg(Chain: FR0.getValue(R: 1), dl, Reg: PredR,
418 N: FR0.getValue(R: 0), Glue: FR0.getValue(R: 2));
419 // TPR = (Chain, Glue)
420 // Don't glue this CopyFromReg, because it copies from a virtual
421 // register. If it is glued to the call, InstrEmitter will add it
422 // as an implicit def to the call (EmitMachineNode).
423 RetVal = DAG.getCopyFromReg(Chain: TPR.getValue(R: 0), dl, Reg: PredR, VT: MVT::i1);
424 Glue = TPR.getValue(R: 1);
425 Chain = TPR.getValue(R: 0);
426 } else {
427 RetVal = DAG.getCopyFromReg(Chain, dl, Reg: RVLocs[i].getLocReg(),
428 VT: RVLocs[i].getValVT(), Glue);
429 Glue = RetVal.getValue(R: 2);
430 Chain = RetVal.getValue(R: 1);
431 }
432 InVals.push_back(Elt: RetVal.getValue(R: 0));
433 }
434
435 return Chain;
436}
437
438/// LowerCall - Functions arguments are copied from virtual regs to
439/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
440SDValue
441HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
442 SmallVectorImpl<SDValue> &InVals) const {
443 SelectionDAG &DAG = CLI.DAG;
444 SDLoc &dl = CLI.DL;
445 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
446 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
447 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
448 SDValue Chain = CLI.Chain;
449 SDValue Callee = CLI.Callee;
450 CallingConv::ID CallConv = CLI.CallConv;
451 bool IsVarArg = CLI.IsVarArg;
452 bool DoesNotReturn = CLI.DoesNotReturn;
453
454 bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
455 MachineFunction &MF = DAG.getMachineFunction();
456 MachineFrameInfo &MFI = MF.getFrameInfo();
457 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
458
459 if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Val&: Callee))
460 Callee = DAG.getTargetGlobalAddress(GV: GAN->getGlobal(), DL: dl, VT: MVT::i32);
461
462 // Linux ABI treats var-arg calls the same way as regular ones.
463 bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
464
465 // Analyze operands of the call, assigning locations to each operand.
466 SmallVector<CCValAssign, 16> ArgLocs;
467 CCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext());
468
469 if (Subtarget.useHVXOps())
470 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_Hexagon_HVX);
471 else if (DisableArgsMinAlignment)
472 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_Hexagon_Legacy);
473 else
474 CCInfo.AnalyzeCallOperands(Outs, Fn: CC_Hexagon);
475
476 if (CLI.IsTailCall) {
477 bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
478 CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CalleeCC: CallConv,
479 isVarArg: IsVarArg, isCalleeStructRet: IsStructRet, isCallerStructRet: StructAttrFlag, Outs,
480 OutVals, Ins, DAG);
481 for (const CCValAssign &VA : ArgLocs) {
482 if (VA.isMemLoc()) {
483 CLI.IsTailCall = false;
484 break;
485 }
486 }
487 LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
488 : "Argument must be passed on stack. "
489 "Not eligible for Tail Call\n"));
490 }
491 // Get a count of how many bytes are to be pushed on the stack.
492 unsigned NumBytes = CCInfo.getStackSize();
493 SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
494 SmallVector<SDValue, 8> MemOpChains;
495
496 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
497 SDValue StackPtr =
498 DAG.getCopyFromReg(Chain, dl, Reg: HRI.getStackRegister(), VT: PtrVT);
499
500 bool NeedsArgAlign = false;
501 Align LargestAlignSeen;
502 // Walk the register/memloc assignments, inserting copies/loads.
503 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
504 CCValAssign &VA = ArgLocs[i];
505 SDValue Arg = OutVals[i];
506 ISD::ArgFlagsTy Flags = Outs[i].Flags;
507 // Record if we need > 8 byte alignment on an argument.
508 bool ArgAlign = Subtarget.isHVXVectorType(VecTy: VA.getValVT());
509 NeedsArgAlign |= ArgAlign;
510
511 // Promote the value if needed.
512 switch (VA.getLocInfo()) {
513 default:
514 // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
515 llvm_unreachable("Unknown loc info!");
516 case CCValAssign::Full:
517 break;
518 case CCValAssign::BCvt:
519 Arg = DAG.getBitcast(VT: VA.getLocVT(), V: Arg);
520 break;
521 case CCValAssign::SExt:
522 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
523 break;
524 case CCValAssign::ZExt:
525 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
526 break;
527 case CCValAssign::AExt:
528 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
529 break;
530 }
531
532 if (VA.isMemLoc()) {
533 unsigned LocMemOffset = VA.getLocMemOffset();
534 SDValue MemAddr = DAG.getConstant(Val: LocMemOffset, DL: dl,
535 VT: StackPtr.getValueType());
536 MemAddr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: StackPtr, N2: MemAddr);
537 if (ArgAlign)
538 LargestAlignSeen = std::max(
539 a: LargestAlignSeen, b: Align(VA.getLocVT().getStoreSizeInBits() / 8));
540 if (Flags.isByVal()) {
541 // The argument is a struct passed by value. According to LLVM, "Arg"
542 // is a pointer.
543 MemOpChains.push_back(Elt: CreateCopyOfByValArgument(Src: Arg, Dst: MemAddr, Chain,
544 Flags, DAG, dl));
545 } else {
546 MachinePointerInfo LocPI = MachinePointerInfo::getStack(
547 MF&: DAG.getMachineFunction(), Offset: LocMemOffset);
548 SDValue S = DAG.getStore(Chain, dl, Val: Arg, Ptr: MemAddr, PtrInfo: LocPI);
549 MemOpChains.push_back(Elt: S);
550 }
551 continue;
552 }
553
554 // Arguments that can be passed on register must be kept at RegsToPass
555 // vector.
556 if (VA.isRegLoc())
557 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
558 }
559
560 if (NeedsArgAlign && Subtarget.hasV60Ops()) {
561 LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
562 Align VecAlign = HRI.getSpillAlign(RC: Hexagon::HvxVRRegClass);
563 LargestAlignSeen = std::max(a: LargestAlignSeen, b: VecAlign);
564 MFI.ensureMaxAlignment(Alignment: LargestAlignSeen);
565 }
566 // Transform all store nodes into one single node because all store
567 // nodes are independent of each other.
568 if (!MemOpChains.empty())
569 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
570
571 SDValue Glue;
572 if (!CLI.IsTailCall) {
573 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: dl);
574 Glue = Chain.getValue(R: 1);
575 }
576
577 // Build a sequence of copy-to-reg nodes chained together with token
578 // chain and flag operands which copy the outgoing args into registers.
579 // The Glue is necessary since all emitted instructions must be
580 // stuck together.
581 if (!CLI.IsTailCall) {
582 for (const auto &R : RegsToPass) {
583 Chain = DAG.getCopyToReg(Chain, dl, Reg: R.first, N: R.second, Glue);
584 Glue = Chain.getValue(R: 1);
585 }
586 } else {
587 // For tail calls lower the arguments to the 'real' stack slot.
588 //
589 // Force all the incoming stack arguments to be loaded from the stack
590 // before any new outgoing arguments are stored to the stack, because the
591 // outgoing stack slots may alias the incoming argument stack slots, and
592 // the alias isn't otherwise explicit. This is slightly more conservative
593 // than necessary, because it means that each store effectively depends
594 // on every argument instead of just those arguments it would clobber.
595 //
596 // Do not flag preceding copytoreg stuff together with the following stuff.
597 Glue = SDValue();
598 for (const auto &R : RegsToPass) {
599 Chain = DAG.getCopyToReg(Chain, dl, Reg: R.first, N: R.second, Glue);
600 Glue = Chain.getValue(R: 1);
601 }
602 Glue = SDValue();
603 }
604
605 bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
606 unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
607
608 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
609 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
610 // node so that legalize doesn't hack it.
611 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
612 Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL: dl, VT: PtrVT, offset: 0, TargetFlags: Flags);
613 } else if (ExternalSymbolSDNode *S =
614 dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
615 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: Flags);
616 }
617
618 // Returns a chain & a flag for retval copy to use.
619 SmallVector<SDValue, 8> Ops;
620 Ops.push_back(Elt: Chain);
621 Ops.push_back(Elt: Callee);
622
623 // Add argument registers to the end of the list so that they are
624 // known live into the call.
625 for (const auto &R : RegsToPass)
626 Ops.push_back(Elt: DAG.getRegister(Reg: R.first, VT: R.second.getValueType()));
627
628 const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
629 assert(Mask && "Missing call preserved mask for calling convention");
630 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
631
632 if (Glue.getNode())
633 Ops.push_back(Elt: Glue);
634
635 if (CLI.IsTailCall) {
636 MFI.setHasTailCall();
637 return DAG.getNode(Opcode: HexagonISD::TC_RETURN, DL: dl, VT: MVT::Other, Ops);
638 }
639
640 // Set this here because we need to know this for "hasFP" in frame lowering.
641 // The target-independent code calls getFrameRegister before setting it, and
642 // getFrameRegister uses hasFP to determine whether the function has FP.
643 MFI.setHasCalls(true);
644
645 unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
646 Chain = DAG.getNode(Opcode: OpCode, DL: dl, ResultTys: {MVT::Other, MVT::Glue}, Ops);
647 if (CLI.CFIType)
648 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
649 Glue = Chain.getValue(R: 1);
650
651 // Create the CALLSEQ_END node.
652 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL: dl);
653 Glue = Chain.getValue(R: 1);
654
655 // Handle result values, copying them out of physregs into vregs that we
656 // return.
657 return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
658 InVals, OutVals, Callee);
659}
660
661/// Returns true by value, base pointer and offset pointer and addressing
662/// mode by reference if this node can be combined with a load / store to
663/// form a post-indexed load / store.
664bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
665 SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
666 SelectionDAG &DAG) const {
667 LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Val: N);
668 if (!LSN)
669 return false;
670 EVT VT = LSN->getMemoryVT();
671 if (!VT.isSimple())
672 return false;
673 bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
674 VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
675 VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
676 VT == MVT::v4i16 || VT == MVT::v8i8 ||
677 Subtarget.isHVXVectorType(VecTy: VT.getSimpleVT());
678 if (!IsLegalType)
679 return false;
680
681 if (Op->getOpcode() != ISD::ADD)
682 return false;
683 Base = Op->getOperand(Num: 0);
684 Offset = Op->getOperand(Num: 1);
685 if (!isa<ConstantSDNode>(Val: Offset.getNode()))
686 return false;
687 AM = ISD::POST_INC;
688
689 int32_t V = cast<ConstantSDNode>(Val: Offset.getNode())->getSExtValue();
690 return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, Offset: V);
691}
692
693SDValue HexagonTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
694 if (DAG.getMachineFunction().getFunction().hasOptSize())
695 return SDValue();
696 else
697 return Op;
698}
699
700SDValue
701HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
702 MachineFunction &MF = DAG.getMachineFunction();
703 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
704 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
705 unsigned LR = HRI.getRARegister();
706
707 if ((Op.getOpcode() != ISD::INLINEASM &&
708 Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
709 return Op;
710
711 unsigned NumOps = Op.getNumOperands();
712 if (Op.getOperand(i: NumOps-1).getValueType() == MVT::Glue)
713 --NumOps; // Ignore the flag operand.
714
715 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
716 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
717 unsigned NumVals = Flags.getNumOperandRegisters();
718 ++i; // Skip the ID value.
719
720 switch (Flags.getKind()) {
721 default:
722 llvm_unreachable("Bad flags!");
723 case InlineAsm::Kind::RegUse:
724 case InlineAsm::Kind::Imm:
725 case InlineAsm::Kind::Mem:
726 i += NumVals;
727 break;
728 case InlineAsm::Kind::Clobber:
729 case InlineAsm::Kind::RegDef:
730 case InlineAsm::Kind::RegDefEarlyClobber: {
731 for (; NumVals; --NumVals, ++i) {
732 Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
733 if (Reg != LR)
734 continue;
735 HMFI.setHasClobberLR(true);
736 return Op;
737 }
738 break;
739 }
740 }
741 }
742
743 return Op;
744}
745
746// Need to transform ISD::PREFETCH into something that doesn't inherit
747// all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
748// SDNPMayStore.
749SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
750 SelectionDAG &DAG) const {
751 SDValue Chain = Op.getOperand(i: 0);
752 SDValue Addr = Op.getOperand(i: 1);
753 // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
754 // if the "reg" is fed by an "add".
755 SDLoc DL(Op);
756 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: MVT::i32);
757 return DAG.getNode(Opcode: HexagonISD::DCFETCH, DL, VT: MVT::Other, N1: Chain, N2: Addr, N3: Zero);
758}
759
760SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
761 SelectionDAG &DAG) const {
762 SDValue Chain = Op.getOperand(i: 0);
763 unsigned IntNo = Op.getConstantOperandVal(i: 1);
764 // Lower the hexagon_prefetch builtin to DCFETCH, as above.
765 if (IntNo == Intrinsic::hexagon_prefetch) {
766 SDValue Addr = Op.getOperand(i: 2);
767 SDLoc DL(Op);
768 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: MVT::i32);
769 return DAG.getNode(Opcode: HexagonISD::DCFETCH, DL, VT: MVT::Other, N1: Chain, N2: Addr, N3: Zero);
770 }
771 return SDValue();
772}
773
774SDValue
775HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
776 SelectionDAG &DAG) const {
777 SDValue Chain = Op.getOperand(i: 0);
778 SDValue Size = Op.getOperand(i: 1);
779 SDValue Align = Op.getOperand(i: 2);
780 SDLoc dl(Op);
781
782 ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Val&: Align);
783 assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
784
785 unsigned A = AlignConst->getSExtValue();
786 auto &HFI = *Subtarget.getFrameLowering();
787 // "Zero" means natural stack alignment.
788 if (A == 0)
789 A = HFI.getStackAlign().value();
790
791 LLVM_DEBUG({
792 dbgs () << __func__ << " Align: " << A << " Size: ";
793 Size.getNode()->dump(&DAG);
794 dbgs() << "\n";
795 });
796
797 SDValue AC = DAG.getConstant(Val: A, DL: dl, VT: MVT::i32);
798 SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
799 SDValue AA = DAG.getNode(Opcode: HexagonISD::ALLOCA, DL: dl, VTList: VTs, N1: Chain, N2: Size, N3: AC);
800
801 DAG.ReplaceAllUsesOfValueWith(From: Op, To: AA);
802 return AA;
803}
804
805SDValue HexagonTargetLowering::LowerFormalArguments(
806 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
807 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
808 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
809 MachineFunction &MF = DAG.getMachineFunction();
810 MachineFrameInfo &MFI = MF.getFrameInfo();
811 MachineRegisterInfo &MRI = MF.getRegInfo();
812
813 // Linux ABI treats var-arg calls the same way as regular ones.
814 bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
815
816 // Assign locations to all of the incoming arguments.
817 SmallVector<CCValAssign, 16> ArgLocs;
818 CCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext());
819
820 if (Subtarget.useHVXOps())
821 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_Hexagon_HVX);
822 else if (DisableArgsMinAlignment)
823 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_Hexagon_Legacy);
824 else
825 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_Hexagon);
826
827 // For LLVM, in the case when returning a struct by value (>8byte),
828 // the first argument is a pointer that points to the location on caller's
829 // stack where the return value will be stored. For Hexagon, the location on
830 // caller's stack is passed only when the struct size is smaller than (and
831 // equal to) 8 bytes. If not, no address will be passed into callee and
832 // callee return the result directly through R0/R1.
833 auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
834 switch (RC.getID()) {
835 case Hexagon::IntRegsRegClassID:
836 return Reg - Hexagon::R0 + 1;
837 case Hexagon::DoubleRegsRegClassID:
838 return (Reg - Hexagon::D0 + 1) * 2;
839 case Hexagon::HvxVRRegClassID:
840 return Reg - Hexagon::V0 + 1;
841 case Hexagon::HvxWRRegClassID:
842 return (Reg - Hexagon::W0 + 1) * 2;
843 }
844 llvm_unreachable("Unexpected register class");
845 };
846
847 auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
848 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
849 HFL.FirstVarArgSavedReg = 0;
850 HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
851
852 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
853 CCValAssign &VA = ArgLocs[i];
854 ISD::ArgFlagsTy Flags = Ins[i].Flags;
855 bool ByVal = Flags.isByVal();
856
857 // Arguments passed in registers:
858 // 1. 32- and 64-bit values and HVX vectors are passed directly,
859 // 2. Large structs are passed via an address, and the address is
860 // passed in a register.
861 if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
862 llvm_unreachable("ByValSize must be bigger than 8 bytes");
863
864 bool InReg = VA.isRegLoc() &&
865 (!ByVal || (ByVal && Flags.getByValSize() > 8));
866
867 if (InReg) {
868 MVT RegVT = VA.getLocVT();
869 if (VA.getLocInfo() == CCValAssign::BCvt)
870 RegVT = VA.getValVT();
871
872 const TargetRegisterClass *RC = getRegClassFor(VT: RegVT);
873 Register VReg = MRI.createVirtualRegister(RegClass: RC);
874 SDValue Copy = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: RegVT);
875
876 // Treat values of type MVT::i1 specially: they are passed in
877 // registers of type i32, but they need to remain as values of
878 // type i1 for consistency of the argument lowering.
879 if (VA.getValVT() == MVT::i1) {
880 assert(RegVT.getSizeInBits() <= 32);
881 SDValue T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: RegVT,
882 N1: Copy, N2: DAG.getConstant(Val: 1, DL: dl, VT: RegVT));
883 Copy = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: T, RHS: DAG.getConstant(Val: 0, DL: dl, VT: RegVT),
884 Cond: ISD::SETNE);
885 } else {
886#ifndef NDEBUG
887 unsigned RegSize = RegVT.getSizeInBits();
888 assert(RegSize == 32 || RegSize == 64 ||
889 Subtarget.isHVXVectorType(RegVT));
890#endif
891 }
892 InVals.push_back(Elt: Copy);
893 MRI.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
894 HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
895 } else {
896 assert(VA.isMemLoc() && "Argument should be passed in memory");
897
898 // If it's a byval parameter, then we need to compute the
899 // "real" size, not the size of the pointer.
900 unsigned ObjSize = Flags.isByVal()
901 ? Flags.getByValSize()
902 : VA.getLocVT().getStoreSizeInBits() / 8;
903
904 // Create the frame index object for this incoming parameter.
905 int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
906 int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: Offset, IsImmutable: true);
907 SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
908
909 if (Flags.isByVal()) {
910 // If it's a pass-by-value aggregate, then do not dereference the stack
911 // location. Instead, we should generate a reference to the stack
912 // location.
913 InVals.push_back(Elt: FIN);
914 } else {
915 SDValue L = DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN,
916 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: 0));
917 InVals.push_back(Elt: L);
918 }
919 }
920 }
921
922 if (IsVarArg && Subtarget.isEnvironmentMusl()) {
923 for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
924 MRI.addLiveIn(Reg: Hexagon::R0+i);
925 }
926
927 if (IsVarArg && Subtarget.isEnvironmentMusl()) {
928 HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
929 HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
930
931 // Create Frame index for the start of register saved area.
932 int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
933 bool RequiresPadding = (NumVarArgRegs & 1);
934 int RegSaveAreaSizePlusPadding = RequiresPadding
935 ? (NumVarArgRegs + 1) * 4
936 : NumVarArgRegs * 4;
937
938 if (RegSaveAreaSizePlusPadding > 0) {
939 // The offset to saved register area should be 8 byte aligned.
940 int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
941 if (!(RegAreaStart % 8))
942 RegAreaStart = (RegAreaStart + 7) & -8;
943
944 int RegSaveAreaFrameIndex =
945 MFI.CreateFixedObject(Size: RegSaveAreaSizePlusPadding, SPOffset: RegAreaStart, IsImmutable: true);
946 HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
947
948 // This will point to the next argument passed via stack.
949 int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
950 int FI = MFI.CreateFixedObject(Hexagon_PointerSize, SPOffset: Offset, IsImmutable: true);
951 HMFI.setVarArgsFrameIndex(FI);
952 } else {
953 // This will point to the next argument passed via stack, when
954 // there is no saved register area.
955 int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
956 int FI = MFI.CreateFixedObject(Hexagon_PointerSize, SPOffset: Offset, IsImmutable: true);
957 HMFI.setRegSavedAreaStartFrameIndex(FI);
958 HMFI.setVarArgsFrameIndex(FI);
959 }
960 }
961
962
963 if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
964 // This will point to the next argument passed via stack.
965 int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
966 int FI = MFI.CreateFixedObject(Hexagon_PointerSize, SPOffset: Offset, IsImmutable: true);
967 HMFI.setVarArgsFrameIndex(FI);
968 }
969
970 return Chain;
971}
972
973SDValue
974HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
975 // VASTART stores the address of the VarArgsFrameIndex slot into the
976 // memory location argument.
977 MachineFunction &MF = DAG.getMachineFunction();
978 HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
979 SDValue Addr = DAG.getFrameIndex(FI: QFI->getVarArgsFrameIndex(), VT: MVT::i32);
980 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
981
982 if (!Subtarget.isEnvironmentMusl()) {
983 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: SDLoc(Op), Val: Addr, Ptr: Op.getOperand(i: 1),
984 PtrInfo: MachinePointerInfo(SV));
985 }
986 auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
987 auto &HFL = *Subtarget.getFrameLowering();
988 SDLoc DL(Op);
989 SmallVector<SDValue, 8> MemOps;
990
991 // Get frame index of va_list.
992 SDValue FIN = Op.getOperand(i: 1);
993
994 // If first Vararg register is odd, add 4 bytes to start of
995 // saved register area to point to the first register location.
996 // This is because the saved register area has to be 8 byte aligned.
997 // In case of an odd start register, there will be 4 bytes of padding in
998 // the beginning of saved register area. If all registers area used up,
999 // the following condition will handle it correctly.
1000 SDValue SavedRegAreaStartFrameIndex =
1001 DAG.getFrameIndex(FI: FuncInfo.getRegSavedAreaStartFrameIndex(), VT: MVT::i32);
1002
1003 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1004
1005 if (HFL.FirstVarArgSavedReg & 1)
1006 SavedRegAreaStartFrameIndex =
1007 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
1008 N1: DAG.getFrameIndex(FI: FuncInfo.getRegSavedAreaStartFrameIndex(),
1009 VT: MVT::i32),
1010 N2: DAG.getIntPtrConstant(Val: 4, DL));
1011
1012 // Store the saved register area start pointer.
1013 SDValue Store =
1014 DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL,
1015 Val: SavedRegAreaStartFrameIndex,
1016 Ptr: FIN, PtrInfo: MachinePointerInfo(SV));
1017 MemOps.push_back(Elt: Store);
1018
1019 // Store saved register area end pointer.
1020 FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
1021 N1: FIN, N2: DAG.getIntPtrConstant(Val: 4, DL));
1022 Store = DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL,
1023 Val: DAG.getFrameIndex(FI: FuncInfo.getVarArgsFrameIndex(),
1024 VT: PtrVT),
1025 Ptr: FIN, PtrInfo: MachinePointerInfo(SV, 4));
1026 MemOps.push_back(Elt: Store);
1027
1028 // Store overflow area pointer.
1029 FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
1030 N1: FIN, N2: DAG.getIntPtrConstant(Val: 4, DL));
1031 Store = DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL,
1032 Val: DAG.getFrameIndex(FI: FuncInfo.getVarArgsFrameIndex(),
1033 VT: PtrVT),
1034 Ptr: FIN, PtrInfo: MachinePointerInfo(SV, 8));
1035 MemOps.push_back(Elt: Store);
1036
1037 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOps);
1038}
1039
1040SDValue
1041HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
1042 // Assert that the linux ABI is enabled for the current compilation.
1043 assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
1044 SDValue Chain = Op.getOperand(i: 0);
1045 SDValue DestPtr = Op.getOperand(i: 1);
1046 SDValue SrcPtr = Op.getOperand(i: 2);
1047 const Value *DestSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 3))->getValue();
1048 const Value *SrcSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 4))->getValue();
1049 SDLoc DL(Op);
1050 // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1051 // we need to memcopy 12 bytes from va_list to another similar list.
1052 return DAG.getMemcpy(Chain, dl: DL, Dst: DestPtr, Src: SrcPtr,
1053 Size: DAG.getIntPtrConstant(Val: 12, DL), DstAlign: Align(4), SrcAlign: Align(4),
1054 /*isVolatile*/ isVol: false, AlwaysInline: false, /*CI=*/nullptr,
1055 OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(DestSV),
1056 SrcPtrInfo: MachinePointerInfo(SrcSV));
1057}
1058
1059SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1060 const SDLoc &dl(Op);
1061 SDValue LHS = Op.getOperand(i: 0);
1062 SDValue RHS = Op.getOperand(i: 1);
1063 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1064 MVT ResTy = ty(Op);
1065 MVT OpTy = ty(Op: LHS);
1066
1067 if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1068 MVT ElemTy = OpTy.getVectorElementType();
1069 assert(ElemTy.isScalarInteger());
1070 MVT WideTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 2*ElemTy.getSizeInBits()),
1071 NumElements: OpTy.getVectorNumElements());
1072 return DAG.getSetCC(DL: dl, VT: ResTy,
1073 LHS: DAG.getSExtOrTrunc(Op: LHS, DL: SDLoc(LHS), VT: WideTy),
1074 RHS: DAG.getSExtOrTrunc(Op: RHS, DL: SDLoc(RHS), VT: WideTy), Cond: CC);
1075 }
1076
1077 // Treat all other vector types as legal.
1078 if (ResTy.isVector())
1079 return Op;
1080
1081 // Comparisons of short integers should use sign-extend, not zero-extend,
1082 // since we can represent small negative values in the compare instructions.
1083 // The LLVM default is to use zero-extend arbitrarily in these cases.
1084 auto isSExtFree = [this](SDValue N) {
1085 switch (N.getOpcode()) {
1086 case ISD::TRUNCATE: {
1087 // A sign-extend of a truncate of a sign-extend is free.
1088 SDValue Op = N.getOperand(i: 0);
1089 if (Op.getOpcode() != ISD::AssertSext)
1090 return false;
1091 EVT OrigTy = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
1092 unsigned ThisBW = ty(Op: N).getSizeInBits();
1093 unsigned OrigBW = OrigTy.getSizeInBits();
1094 // The type that was sign-extended to get the AssertSext must be
1095 // narrower than the type of N (so that N has still the same value
1096 // as the original).
1097 return ThisBW >= OrigBW;
1098 }
1099 case ISD::LOAD:
1100 // We have sign-extended loads.
1101 return true;
1102 }
1103 return false;
1104 };
1105
1106 if (OpTy == MVT::i8 || OpTy == MVT::i16) {
1107 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS);
1108 bool IsNegative = C && C->getAPIntValue().isNegative();
1109 if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
1110 return DAG.getSetCC(DL: dl, VT: ResTy,
1111 LHS: DAG.getSExtOrTrunc(Op: LHS, DL: SDLoc(LHS), VT: MVT::i32),
1112 RHS: DAG.getSExtOrTrunc(Op: RHS, DL: SDLoc(RHS), VT: MVT::i32), Cond: CC);
1113 }
1114
1115 return SDValue();
1116}
1117
1118SDValue
1119HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
1120 SDValue PredOp = Op.getOperand(i: 0);
1121 SDValue Op1 = Op.getOperand(i: 1), Op2 = Op.getOperand(i: 2);
1122 MVT OpTy = ty(Op: Op1);
1123 const SDLoc &dl(Op);
1124
1125 if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1126 MVT ElemTy = OpTy.getVectorElementType();
1127 assert(ElemTy.isScalarInteger());
1128 MVT WideTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 2*ElemTy.getSizeInBits()),
1129 NumElements: OpTy.getVectorNumElements());
1130 // Generate (trunc (select (_, sext, sext))).
1131 return DAG.getSExtOrTrunc(
1132 Op: DAG.getSelect(DL: dl, VT: WideTy, Cond: PredOp,
1133 LHS: DAG.getSExtOrTrunc(Op: Op1, DL: dl, VT: WideTy),
1134 RHS: DAG.getSExtOrTrunc(Op: Op2, DL: dl, VT: WideTy)),
1135 DL: dl, VT: OpTy);
1136 }
1137
1138 return SDValue();
1139}
1140
1141SDValue
1142HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
1143 EVT ValTy = Op.getValueType();
1144 ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Val&: Op);
1145 Constant *CVal = nullptr;
1146 bool isVTi1Type = false;
1147 if (auto *CV = dyn_cast<ConstantVector>(Val: CPN->getConstVal())) {
1148 if (cast<VectorType>(Val: CV->getType())->getElementType()->isIntegerTy(BitWidth: 1)) {
1149 IRBuilder<> IRB(CV->getContext());
1150 SmallVector<Constant*, 128> NewConst;
1151 unsigned VecLen = CV->getNumOperands();
1152 assert(isPowerOf2_32(VecLen) &&
1153 "conversion only supported for pow2 VectorSize");
1154 for (unsigned i = 0; i < VecLen; ++i)
1155 NewConst.push_back(Elt: IRB.getInt8(C: CV->getOperand(i_nocapture: i)->isNullValue()));
1156
1157 CVal = ConstantVector::get(V: NewConst);
1158 isVTi1Type = true;
1159 }
1160 }
1161 Align Alignment = CPN->getAlign();
1162 bool IsPositionIndependent = isPositionIndependent();
1163 unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
1164
1165 unsigned Offset = 0;
1166 SDValue T;
1167 if (CPN->isMachineConstantPoolEntry())
1168 T = DAG.getTargetConstantPool(C: CPN->getMachineCPVal(), VT: ValTy, Align: Alignment,
1169 Offset, TargetFlags: TF);
1170 else if (isVTi1Type)
1171 T = DAG.getTargetConstantPool(C: CVal, VT: ValTy, Align: Alignment, Offset, TargetFlags: TF);
1172 else
1173 T = DAG.getTargetConstantPool(C: CPN->getConstVal(), VT: ValTy, Align: Alignment, Offset,
1174 TargetFlags: TF);
1175
1176 assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
1177 "Inconsistent target flag encountered");
1178
1179 if (IsPositionIndependent)
1180 return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc(Op), VT: ValTy, Operand: T);
1181 return DAG.getNode(Opcode: HexagonISD::CP, DL: SDLoc(Op), VT: ValTy, Operand: T);
1182}
1183
1184SDValue
1185HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1186 EVT VT = Op.getValueType();
1187 int Idx = cast<JumpTableSDNode>(Val&: Op)->getIndex();
1188 if (isPositionIndependent()) {
1189 SDValue T = DAG.getTargetJumpTable(JTI: Idx, VT, TargetFlags: HexagonII::MO_PCREL);
1190 return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc(Op), VT, Operand: T);
1191 }
1192
1193 SDValue T = DAG.getTargetJumpTable(JTI: Idx, VT);
1194 return DAG.getNode(Opcode: HexagonISD::JT, DL: SDLoc(Op), VT, Operand: T);
1195}
1196
1197SDValue
1198HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1199 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1200 MachineFunction &MF = DAG.getMachineFunction();
1201 MachineFrameInfo &MFI = MF.getFrameInfo();
1202 MFI.setReturnAddressIsTaken(true);
1203
1204 EVT VT = Op.getValueType();
1205 SDLoc dl(Op);
1206 unsigned Depth = Op.getConstantOperandVal(i: 0);
1207 if (Depth) {
1208 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1209 SDValue Offset = DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32);
1210 return DAG.getLoad(VT, dl, Chain: DAG.getEntryNode(),
1211 Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: FrameAddr, N2: Offset),
1212 PtrInfo: MachinePointerInfo());
1213 }
1214
1215 // Return LR, which contains the return address. Mark it an implicit live-in.
1216 Register Reg = MF.addLiveIn(PReg: HRI.getRARegister(), RC: getRegClassFor(VT: MVT::i32));
1217 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg, VT);
1218}
1219
1220SDValue
1221HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1222 const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1223 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1224 MFI.setFrameAddressIsTaken(true);
1225
1226 EVT VT = Op.getValueType();
1227 SDLoc dl(Op);
1228 unsigned Depth = Op.getConstantOperandVal(i: 0);
1229 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl,
1230 Reg: HRI.getFrameRegister(), VT);
1231 while (Depth--)
1232 FrameAddr = DAG.getLoad(VT, dl, Chain: DAG.getEntryNode(), Ptr: FrameAddr,
1233 PtrInfo: MachinePointerInfo());
1234 return FrameAddr;
1235}
1236
1237SDValue
1238HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1239 SDLoc dl(Op);
1240 return DAG.getNode(Opcode: HexagonISD::BARRIER, DL: dl, VT: MVT::Other, Operand: Op.getOperand(i: 0));
1241}
1242
1243SDValue
1244HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1245 SDLoc dl(Op);
1246 auto *GAN = cast<GlobalAddressSDNode>(Val&: Op);
1247 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1248 auto *GV = GAN->getGlobal();
1249 int64_t Offset = GAN->getOffset();
1250
1251 auto &HLOF = *HTM.getObjFileLowering();
1252 Reloc::Model RM = HTM.getRelocationModel();
1253
1254 if (RM == Reloc::Static) {
1255 SDValue GA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: Offset);
1256 const GlobalObject *GO = GV->getAliaseeObject();
1257 if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, TM: HTM))
1258 return DAG.getNode(Opcode: HexagonISD::CONST32_GP, DL: dl, VT: PtrVT, Operand: GA);
1259 return DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: GA);
1260 }
1261
1262 bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(GV);
1263 if (UsePCRel) {
1264 SDValue GA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: Offset,
1265 TargetFlags: HexagonII::MO_PCREL);
1266 return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: dl, VT: PtrVT, Operand: GA);
1267 }
1268
1269 // Use GOT index.
1270 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(VT: PtrVT);
1271 SDValue GA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: 0, TargetFlags: HexagonII::MO_GOT);
1272 SDValue Off = DAG.getConstant(Val: Offset, DL: dl, VT: MVT::i32);
1273 return DAG.getNode(Opcode: HexagonISD::AT_GOT, DL: dl, VT: PtrVT, N1: GOT, N2: GA, N3: Off);
1274}
1275
1276// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1277SDValue
1278HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1279 const BlockAddress *BA = cast<BlockAddressSDNode>(Val&: Op)->getBlockAddress();
1280 SDLoc dl(Op);
1281 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1282
1283 Reloc::Model RM = HTM.getRelocationModel();
1284 if (RM == Reloc::Static) {
1285 SDValue A = DAG.getTargetBlockAddress(BA, VT: PtrVT);
1286 return DAG.getNode(Opcode: HexagonISD::CONST32_GP, DL: dl, VT: PtrVT, Operand: A);
1287 }
1288
1289 SDValue A = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: 0, TargetFlags: HexagonII::MO_PCREL);
1290 return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: dl, VT: PtrVT, Operand: A);
1291}
1292
1293SDValue
1294HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1295 const {
1296 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1297 SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, VT: PtrVT,
1298 TargetFlags: HexagonII::MO_PCREL);
1299 return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc(Op), VT: PtrVT, Operand: GOTSym);
1300}
1301
1302SDValue
1303HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1304 GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1305 unsigned char OperandFlags) const {
1306 MachineFunction &MF = DAG.getMachineFunction();
1307 MachineFrameInfo &MFI = MF.getFrameInfo();
1308 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1309 SDLoc dl(GA);
1310 SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl,
1311 VT: GA->getValueType(ResNo: 0),
1312 offset: GA->getOffset(),
1313 TargetFlags: OperandFlags);
1314 // Create Operands for the call.The Operands should have the following:
1315 // 1. Chain SDValue
1316 // 2. Callee which in this case is the Global address value.
1317 // 3. Registers live into the call.In this case its R0, as we
1318 // have just one argument to be passed.
1319 // 4. Glue.
1320 // Note: The order is important.
1321
1322 const auto &HRI = *Subtarget.getRegisterInfo();
1323 const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1324 assert(Mask && "Missing call preserved mask for calling convention");
1325 SDValue Ops[] = { Chain, TGA, DAG.getRegister(Reg: Hexagon::R0, VT: PtrVT),
1326 DAG.getRegisterMask(RegMask: Mask), Glue };
1327 Chain = DAG.getNode(Opcode: HexagonISD::CALL, DL: dl, VTList: NodeTys, Ops);
1328
1329 // Inform MFI that function has calls.
1330 MFI.setAdjustsStack(true);
1331
1332 Glue = Chain.getValue(R: 1);
1333 return DAG.getCopyFromReg(Chain, dl, Reg: ReturnReg, VT: PtrVT, Glue);
1334}
1335
1336//
1337// Lower using the initial executable model for TLS addresses
1338//
1339SDValue
1340HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1341 SelectionDAG &DAG) const {
1342 SDLoc dl(GA);
1343 int64_t Offset = GA->getOffset();
1344 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1345
1346 // Get the thread pointer.
1347 SDValue TP = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: Hexagon::UGP, VT: PtrVT);
1348
1349 bool IsPositionIndependent = isPositionIndependent();
1350 unsigned char TF =
1351 IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1352
1353 // First generate the TLS symbol address
1354 SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl, VT: PtrVT,
1355 offset: Offset, TargetFlags: TF);
1356
1357 SDValue Sym = DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: TGA);
1358
1359 if (IsPositionIndependent) {
1360 // Generate the GOT pointer in case of position independent code
1361 SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Op: Sym, DAG);
1362
1363 // Add the TLS Symbol address to GOT pointer.This gives
1364 // GOT relative relocation for the symbol.
1365 Sym = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: GOT, N2: Sym);
1366 }
1367
1368 // Load the offset value for TLS symbol.This offset is relative to
1369 // thread pointer.
1370 SDValue LoadOffset =
1371 DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: Sym, PtrInfo: MachinePointerInfo());
1372
1373 // Address of the thread local variable is the add of thread
1374 // pointer and the offset of the variable.
1375 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: TP, N2: LoadOffset);
1376}
1377
1378//
1379// Lower using the local executable model for TLS addresses
1380//
1381SDValue
1382HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1383 SelectionDAG &DAG) const {
1384 SDLoc dl(GA);
1385 int64_t Offset = GA->getOffset();
1386 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1387
1388 // Get the thread pointer.
1389 SDValue TP = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: Hexagon::UGP, VT: PtrVT);
1390 // Generate the TLS symbol address
1391 SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl, VT: PtrVT, offset: Offset,
1392 TargetFlags: HexagonII::MO_TPREL);
1393 SDValue Sym = DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: TGA);
1394
1395 // Address of the thread local variable is the add of thread
1396 // pointer and the offset of the variable.
1397 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: TP, N2: Sym);
1398}
1399
1400//
1401// Lower using the general dynamic model for TLS addresses
1402//
1403SDValue
1404HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1405 SelectionDAG &DAG) const {
1406 SDLoc dl(GA);
1407 int64_t Offset = GA->getOffset();
1408 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1409
1410 // First generate the TLS symbol address
1411 SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl, VT: PtrVT, offset: Offset,
1412 TargetFlags: HexagonII::MO_GDGOT);
1413
1414 // Then, generate the GOT pointer
1415 SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Op: TGA, DAG);
1416
1417 // Add the TLS symbol and the GOT pointer
1418 SDValue Sym = DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: TGA);
1419 SDValue Chain = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: GOT, N2: Sym);
1420
1421 // Copy over the argument to R0
1422 SDValue InGlue;
1423 Chain = DAG.getCopyToReg(Chain: DAG.getEntryNode(), dl, Reg: Hexagon::R0, N: Chain, Glue: InGlue);
1424 InGlue = Chain.getValue(R: 1);
1425
1426 unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
1427 ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1428 : HexagonII::MO_GDPLT;
1429
1430 return GetDynamicTLSAddr(DAG, Chain, GA, Glue: InGlue, PtrVT,
1431 ReturnReg: Hexagon::R0, OperandFlags: Flags);
1432}
1433
1434//
1435// Lower TLS addresses.
1436//
1437// For now for dynamic models, we only support the general dynamic model.
1438//
1439SDValue
1440HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1441 SelectionDAG &DAG) const {
1442 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
1443
1444 switch (HTM.getTLSModel(GV: GA->getGlobal())) {
1445 case TLSModel::GeneralDynamic:
1446 case TLSModel::LocalDynamic:
1447 return LowerToTLSGeneralDynamicModel(GA, DAG);
1448 case TLSModel::InitialExec:
1449 return LowerToTLSInitialExecModel(GA, DAG);
1450 case TLSModel::LocalExec:
1451 return LowerToTLSLocalExecModel(GA, DAG);
1452 }
1453 llvm_unreachable("Bogus TLS model");
1454}
1455
1456//===----------------------------------------------------------------------===//
1457// TargetLowering Implementation
1458//===----------------------------------------------------------------------===//
1459
1460HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1461 const HexagonSubtarget &ST)
1462 : TargetLowering(TM, ST),
1463 HTM(static_cast<const HexagonTargetMachine &>(TM)), Subtarget(ST) {
1464 auto &HRI = *Subtarget.getRegisterInfo();
1465
1466 setPrefLoopAlignment(Align(16));
1467 setMinFunctionAlignment(Align(4));
1468 setPrefFunctionAlignment(Align(16));
1469 setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1470 setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1471 setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1472
1473 setMaxAtomicSizeInBitsSupported(64);
1474 setMinCmpXchgSizeInBits(32);
1475
1476 if (EnableHexSDNodeSched)
1477 setSchedulingPreference(Sched::VLIW);
1478 else
1479 setSchedulingPreference(Sched::Source);
1480
1481 // Limits for inline expansion of memcpy/memmove
1482 MaxStoresPerMemcpy = 6;
1483 MaxStoresPerMemcpyOptSize = 4;
1484 MaxStoresPerMemmove = 6;
1485 MaxStoresPerMemmoveOptSize = 4;
1486 MaxStoresPerMemset = 8;
1487 MaxStoresPerMemsetOptSize = 4;
1488
1489 setTargetDAGCombine(ISD::VECREDUCE_ADD);
1490
1491 //
1492 // Set up register classes.
1493 //
1494
1495 addRegisterClass(VT: MVT::i1, RC: &Hexagon::PredRegsRegClass);
1496 addRegisterClass(VT: MVT::v2i1, RC: &Hexagon::PredRegsRegClass); // bbbbaaaa
1497 addRegisterClass(VT: MVT::v4i1, RC: &Hexagon::PredRegsRegClass); // ddccbbaa
1498 addRegisterClass(VT: MVT::v8i1, RC: &Hexagon::PredRegsRegClass); // hgfedcba
1499 addRegisterClass(VT: MVT::i32, RC: &Hexagon::IntRegsRegClass);
1500 addRegisterClass(VT: MVT::v2i16, RC: &Hexagon::IntRegsRegClass);
1501 addRegisterClass(VT: MVT::v4i8, RC: &Hexagon::IntRegsRegClass);
1502 addRegisterClass(VT: MVT::i64, RC: &Hexagon::DoubleRegsRegClass);
1503 addRegisterClass(VT: MVT::v8i8, RC: &Hexagon::DoubleRegsRegClass);
1504 addRegisterClass(VT: MVT::v4i16, RC: &Hexagon::DoubleRegsRegClass);
1505 addRegisterClass(VT: MVT::v2i32, RC: &Hexagon::DoubleRegsRegClass);
1506
1507 addRegisterClass(VT: MVT::f32, RC: &Hexagon::IntRegsRegClass);
1508 addRegisterClass(VT: MVT::f64, RC: &Hexagon::DoubleRegsRegClass);
1509
1510 //
1511 // Handling of scalar operations.
1512 //
1513 // All operations default to "legal", except:
1514 // - indexed loads and stores (pre-/post-incremented),
1515 // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1516 // ConstantFP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1517 // FLOG, FLOG2, FLOG10, FMAXIMUMNUM, FMINIMUMNUM, FNEARBYINT, FRINT, FROUND,
1518 // TRAP, FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG,
1519 // ZERO_EXTEND_VECTOR_INREG,
1520 // which default to "expand" for at least one type.
1521
1522 // Misc operations.
1523 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f32, Action: Legal);
1524 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f64, Action: Legal);
1525 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
1526 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
1527 setOperationAction(Op: ISD::ConstantPool, VT: MVT::i32, Action: Custom);
1528 setOperationAction(Op: ISD::JumpTable, VT: MVT::i32, Action: Custom);
1529 setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
1530 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
1531 setOperationAction(Op: ISD::INLINEASM, VT: MVT::Other, Action: Custom);
1532 setOperationAction(Op: ISD::INLINEASM_BR, VT: MVT::Other, Action: Custom);
1533 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
1534 setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: Legal);
1535 setOperationAction(Op: ISD::READSTEADYCOUNTER, VT: MVT::i64, Action: Legal);
1536 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
1537 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
1538 setOperationAction(Op: ISD::EH_RETURN, VT: MVT::Other, Action: Custom);
1539 setOperationAction(Op: ISD::GLOBAL_OFFSET_TABLE, VT: MVT::i32, Action: Custom);
1540 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i32, Action: Custom);
1541 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
1542
1543 // Custom legalize GlobalAddress nodes into CONST32.
1544 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
1545 setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i8, Action: Custom);
1546 setOperationAction(Op: ISD::BlockAddress, VT: MVT::i32, Action: Custom);
1547
1548 // Hexagon needs to optimize cases with negative constants.
1549 setOperationAction(Op: ISD::SETCC, VT: MVT::i8, Action: Custom);
1550 setOperationAction(Op: ISD::SETCC, VT: MVT::i16, Action: Custom);
1551 setOperationAction(Op: ISD::SETCC, VT: MVT::v4i8, Action: Custom);
1552 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i16, Action: Custom);
1553
1554 // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1555 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
1556 setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
1557 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
1558 if (Subtarget.isEnvironmentMusl())
1559 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Custom);
1560 else
1561 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
1562
1563 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
1564 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
1565 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
1566
1567 if (EmitJumpTables)
1568 setMinimumJumpTableEntries(MinimumJumpTables);
1569 else
1570 setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
1571 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
1572
1573 for (unsigned LegalIntOp :
1574 {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
1575 setOperationAction(Op: LegalIntOp, VT: MVT::i32, Action: Legal);
1576 setOperationAction(Op: LegalIntOp, VT: MVT::i64, Action: Legal);
1577 }
1578
1579 // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1580 // but they only operate on i64.
1581 for (MVT VT : MVT::integer_valuetypes()) {
1582 setOperationAction(Op: ISD::UADDO, VT, Action: Custom);
1583 setOperationAction(Op: ISD::USUBO, VT, Action: Custom);
1584 setOperationAction(Op: ISD::SADDO, VT, Action: Expand);
1585 setOperationAction(Op: ISD::SSUBO, VT, Action: Expand);
1586 setOperationAction(Op: ISD::UADDO_CARRY, VT, Action: Expand);
1587 setOperationAction(Op: ISD::USUBO_CARRY, VT, Action: Expand);
1588 }
1589 setOperationAction(Op: ISD::UADDO_CARRY, VT: MVT::i64, Action: Custom);
1590 setOperationAction(Op: ISD::USUBO_CARRY, VT: MVT::i64, Action: Custom);
1591
1592 setOperationAction(Op: ISD::CTLZ, VT: MVT::i8, Action: Promote);
1593 setOperationAction(Op: ISD::CTLZ, VT: MVT::i16, Action: Promote);
1594 setOperationAction(Op: ISD::CTTZ, VT: MVT::i8, Action: Promote);
1595 setOperationAction(Op: ISD::CTTZ, VT: MVT::i16, Action: Promote);
1596
1597 // Popcount can count # of 1s in i64 but returns i32.
1598 setOperationAction(Op: ISD::CTPOP, VT: MVT::i8, Action: Promote);
1599 setOperationAction(Op: ISD::CTPOP, VT: MVT::i16, Action: Promote);
1600 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32, Action: Promote);
1601 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64, Action: Legal);
1602
1603 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Legal);
1604 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i64, Action: Legal);
1605 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Legal);
1606 setOperationAction(Op: ISD::BSWAP, VT: MVT::i64, Action: Legal);
1607
1608 setOperationAction(Op: ISD::FSHL, VT: MVT::i32, Action: Legal);
1609 setOperationAction(Op: ISD::FSHL, VT: MVT::i64, Action: Legal);
1610 setOperationAction(Op: ISD::FSHR, VT: MVT::i32, Action: Legal);
1611 setOperationAction(Op: ISD::FSHR, VT: MVT::i64, Action: Legal);
1612
1613 for (unsigned IntExpOp :
1614 {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
1615 ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
1616 ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1617 ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1618 for (MVT VT : MVT::integer_valuetypes())
1619 setOperationAction(Op: IntExpOp, VT, Action: Expand);
1620 }
1621 for (MVT VT : MVT::fp_valuetypes()) {
1622 for (unsigned FPExpOp : {ISD::FDIV, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
1623 ISD::FSINCOS, ISD::FPOW, ISD::FCOPYSIGN})
1624 setOperationAction(Op: FPExpOp, VT, Action: Expand);
1625
1626 setOperationAction(Op: ISD::FREM, VT, Action: LibCall);
1627 }
1628
1629 // No extending loads from i32.
1630 for (MVT VT : MVT::integer_valuetypes()) {
1631 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i32, Action: Expand);
1632 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i32, Action: Expand);
1633 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i32, Action: Expand);
1634 }
1635 // Turn FP truncstore into trunc + store.
1636 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
1637 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
1638 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
1639 // Turn FP extload into load/fpextend.
1640 for (MVT VT : MVT::fp_valuetypes())
1641 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::f32, Action: Expand);
1642
1643 // Expand BR_CC and SELECT_CC for all integer and fp types.
1644 for (MVT VT : MVT::integer_valuetypes()) {
1645 setOperationAction(Op: ISD::BR_CC, VT, Action: Expand);
1646 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
1647 }
1648 for (MVT VT : MVT::fp_valuetypes()) {
1649 setOperationAction(Op: ISD::BR_CC, VT, Action: Expand);
1650 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
1651 }
1652 setOperationAction(Op: ISD::BR_CC, VT: MVT::Other, Action: Expand);
1653
1654 //
1655 // Handling of vector operations.
1656 //
1657
1658 // Set the action for vector operations to "expand", then override it with
1659 // either "custom" or "legal" for specific cases.
1660 // clang-format off
1661 static const unsigned VectExpOps[] = {
1662 // Integer arithmetic:
1663 ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
1664 ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO,
1665 ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1666 // Logical/bit:
1667 ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
1668 ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::BSWAP, ISD::BITREVERSE,
1669 // Floating point arithmetic/math functions:
1670 ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
1671 ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
1672 ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
1673 ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
1674 ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
1675 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
1676 ISD::FSINCOS, ISD::FLDEXP,
1677 // Misc:
1678 ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool,
1679 // Vector:
1680 ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
1681 ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
1682 ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
1683 ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE,
1684 ISD::SPLAT_VECTOR,
1685 };
1686 // clang-format on
1687
1688 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1689 for (unsigned VectExpOp : VectExpOps)
1690 setOperationAction(Op: VectExpOp, VT, Action: Expand);
1691
1692 // Expand all extending loads and truncating stores:
1693 for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1694 if (TargetVT == VT)
1695 continue;
1696 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: TargetVT, MemVT: VT, Action: Expand);
1697 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: TargetVT, MemVT: VT, Action: Expand);
1698 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: TargetVT, MemVT: VT, Action: Expand);
1699 setTruncStoreAction(ValVT: VT, MemVT: TargetVT, Action: Expand);
1700 }
1701
1702 // Normalize all inputs to SELECT to be vectors of i32.
1703 if (VT.getVectorElementType() != MVT::i32) {
1704 MVT VT32 = MVT::getVectorVT(VT: MVT::i32, NumElements: VT.getSizeInBits()/32);
1705 setOperationAction(Op: ISD::SELECT, VT, Action: Promote);
1706 AddPromotedToType(Opc: ISD::SELECT, OrigVT: VT, DestVT: VT32);
1707 }
1708 setOperationAction(Op: ISD::SRA, VT, Action: Custom);
1709 setOperationAction(Op: ISD::SHL, VT, Action: Custom);
1710 setOperationAction(Op: ISD::SRL, VT, Action: Custom);
1711 }
1712
1713 setOperationAction(Op: ISD::SADDSAT, VT: MVT::i32, Action: Legal);
1714 setOperationAction(Op: ISD::SADDSAT, VT: MVT::i64, Action: Legal);
1715
1716 // Extending loads from (native) vectors of i8 into (native) vectors of i16
1717 // are legal.
1718 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::v2i16, MemVT: MVT::v2i8, Action: Legal);
1719 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: MVT::v2i16, MemVT: MVT::v2i8, Action: Legal);
1720 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: MVT::v2i16, MemVT: MVT::v2i8, Action: Legal);
1721 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::v4i16, MemVT: MVT::v4i8, Action: Legal);
1722 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: MVT::v4i16, MemVT: MVT::v4i8, Action: Legal);
1723 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: MVT::v4i16, MemVT: MVT::v4i8, Action: Legal);
1724
1725 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i8, Action: Legal);
1726 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i16, Action: Legal);
1727 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i32, Action: Legal);
1728
1729 // Types natively supported:
1730 for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1731 MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1732 setOperationAction(Op: ISD::BUILD_VECTOR, VT: NativeVT, Action: Custom);
1733 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: NativeVT, Action: Custom);
1734 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: NativeVT, Action: Custom);
1735 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: NativeVT, Action: Custom);
1736 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: NativeVT, Action: Custom);
1737 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: NativeVT, Action: Custom);
1738
1739 setOperationAction(Op: ISD::ADD, VT: NativeVT, Action: Legal);
1740 setOperationAction(Op: ISD::SUB, VT: NativeVT, Action: Legal);
1741 setOperationAction(Op: ISD::MUL, VT: NativeVT, Action: Legal);
1742 setOperationAction(Op: ISD::AND, VT: NativeVT, Action: Legal);
1743 setOperationAction(Op: ISD::OR, VT: NativeVT, Action: Legal);
1744 setOperationAction(Op: ISD::XOR, VT: NativeVT, Action: Legal);
1745
1746 if (NativeVT.getVectorElementType() != MVT::i1) {
1747 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: NativeVT, Action: Legal);
1748 setOperationAction(Op: ISD::BSWAP, VT: NativeVT, Action: Legal);
1749 setOperationAction(Op: ISD::BITREVERSE, VT: NativeVT, Action: Legal);
1750 }
1751 }
1752
1753 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
1754 setOperationAction(Op: ISD::SMIN, VT, Action: Legal);
1755 setOperationAction(Op: ISD::SMAX, VT, Action: Legal);
1756 setOperationAction(Op: ISD::UMIN, VT, Action: Legal);
1757 setOperationAction(Op: ISD::UMAX, VT, Action: Legal);
1758 }
1759
1760 // Custom lower unaligned loads.
1761 // Also, for both loads and stores, verify the alignment of the address
1762 // in case it is a compile-time constant. This is a usability feature to
1763 // provide a meaningful error message to users.
1764 for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1765 MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1766 setOperationAction(Op: ISD::LOAD, VT, Action: Custom);
1767 setOperationAction(Op: ISD::STORE, VT, Action: Custom);
1768 }
1769
1770 // Custom-lower load/stores of boolean vectors.
1771 for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1772 setOperationAction(Op: ISD::LOAD, VT, Action: Custom);
1773 setOperationAction(Op: ISD::STORE, VT, Action: Custom);
1774 }
1775
1776 // Normalize integer compares to EQ/GT/UGT
1777 for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
1778 MVT::v2i32}) {
1779 setCondCodeAction(CCs: ISD::SETNE, VT, Action: Expand);
1780 setCondCodeAction(CCs: ISD::SETLE, VT, Action: Expand);
1781 setCondCodeAction(CCs: ISD::SETGE, VT, Action: Expand);
1782 setCondCodeAction(CCs: ISD::SETLT, VT, Action: Expand);
1783 setCondCodeAction(CCs: ISD::SETULE, VT, Action: Expand);
1784 setCondCodeAction(CCs: ISD::SETUGE, VT, Action: Expand);
1785 setCondCodeAction(CCs: ISD::SETULT, VT, Action: Expand);
1786 }
1787
1788 // Normalize boolean compares to [U]LE/[U]LT
1789 for (MVT VT : {MVT::i1, MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1790 setCondCodeAction(CCs: ISD::SETGE, VT, Action: Expand);
1791 setCondCodeAction(CCs: ISD::SETGT, VT, Action: Expand);
1792 setCondCodeAction(CCs: ISD::SETUGE, VT, Action: Expand);
1793 setCondCodeAction(CCs: ISD::SETUGT, VT, Action: Expand);
1794 }
1795
1796 // Custom-lower bitcasts from i8 to v8i1.
1797 setOperationAction(Op: ISD::BITCAST, VT: MVT::i8, Action: Custom);
1798 setOperationAction(Op: ISD::SETCC, VT: MVT::v2i16, Action: Custom);
1799 setOperationAction(Op: ISD::VSELECT, VT: MVT::v4i8, Action: Custom);
1800 setOperationAction(Op: ISD::VSELECT, VT: MVT::v2i16, Action: Custom);
1801 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v4i8, Action: Custom);
1802 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v4i16, Action: Custom);
1803 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v8i8, Action: Custom);
1804
1805 // V5+.
1806 setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Expand);
1807 setOperationAction(Op: ISD::FADD, VT: MVT::f64, Action: Expand);
1808 setOperationAction(Op: ISD::FSUB, VT: MVT::f64, Action: Expand);
1809 setOperationAction(Op: ISD::FMUL, VT: MVT::f64, Action: Expand);
1810 setOperationAction(Op: ISD::FDIV, VT: MVT::f32, Action: Custom);
1811
1812 setOperationAction(Op: ISD::FMINIMUMNUM, VT: MVT::f32, Action: Legal);
1813 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: MVT::f32, Action: Legal);
1814
1815 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i1, Action: Promote);
1816 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i8, Action: Promote);
1817 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i16, Action: Promote);
1818 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i1, Action: Promote);
1819 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i8, Action: Promote);
1820 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i16, Action: Promote);
1821 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Promote);
1822 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i8, Action: Promote);
1823 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i16, Action: Promote);
1824 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Promote);
1825 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i8, Action: Promote);
1826 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i16, Action: Promote);
1827
1828 // Special handling for half-precision floating point conversions.
1829 // Lower half float conversions into library calls.
1830 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
1831 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
1832 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
1833 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
1834 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Expand);
1835 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Expand);
1836 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64, Action: Expand);
1837
1838 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
1839 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
1840 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
1841 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
1842
1843 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
1844 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
1845
1846 // Handling of indexed loads/stores: default is "expand".
1847 //
1848 for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1849 MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1850 setIndexedLoadAction(IdxModes: ISD::POST_INC, VT, Action: Legal);
1851 setIndexedStoreAction(IdxModes: ISD::POST_INC, VT, Action: Legal);
1852 }
1853
1854 // Subtarget-specific operation actions.
1855 //
1856 if (Subtarget.hasV60Ops()) {
1857 setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Legal);
1858 setOperationAction(Op: ISD::ROTL, VT: MVT::i64, Action: Legal);
1859 setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Legal);
1860 setOperationAction(Op: ISD::ROTR, VT: MVT::i64, Action: Legal);
1861 }
1862 if (Subtarget.hasV66Ops()) {
1863 setOperationAction(Op: ISD::FADD, VT: MVT::f64, Action: Legal);
1864 setOperationAction(Op: ISD::FSUB, VT: MVT::f64, Action: Legal);
1865 }
1866 if (Subtarget.hasV67Ops()) {
1867 setOperationAction(Op: ISD::FMINIMUMNUM, VT: MVT::f64, Action: Legal);
1868 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: MVT::f64, Action: Legal);
1869 setOperationAction(Op: ISD::FMUL, VT: MVT::f64, Action: Legal);
1870 }
1871
1872 setTargetDAGCombine(ISD::OR);
1873 setTargetDAGCombine(ISD::TRUNCATE);
1874 setTargetDAGCombine(ISD::VSELECT);
1875
1876 if (Subtarget.useHVXOps())
1877 initializeHVXLowering();
1878
1879 computeRegisterProperties(TRI: &HRI);
1880}
1881
1882bool
1883HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
1884 const SDLoc &dl, SelectionDAG &DAG) const {
1885 auto *CA = dyn_cast<ConstantSDNode>(Val&: Ptr);
1886 if (!CA)
1887 return true;
1888 unsigned Addr = CA->getZExtValue();
1889 Align HaveAlign =
1890 Addr != 0 ? Align(1ull << llvm::countr_zero(Val: Addr)) : NeedAlign;
1891 if (HaveAlign >= NeedAlign)
1892 return true;
1893
1894 static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();
1895
1896 struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
1897 DiagnosticInfoMisalignedTrap(StringRef M)
1898 : DiagnosticInfo(DK_MisalignedTrap, DS_Remark), Msg(M) {}
1899 void print(DiagnosticPrinter &DP) const override {
1900 DP << Msg;
1901 }
1902 static bool classof(const DiagnosticInfo *DI) {
1903 return DI->getKind() == DK_MisalignedTrap;
1904 }
1905 StringRef Msg;
1906 };
1907
1908 std::string ErrMsg;
1909 raw_string_ostream O(ErrMsg);
1910 O << "Misaligned constant address: " << format_hex(N: Addr, Width: 10)
1911 << " has alignment " << HaveAlign.value()
1912 << ", but the memory access requires " << NeedAlign.value();
1913 if (DebugLoc DL = dl.getDebugLoc())
1914 DL.print(OS&: O << ", at ");
1915 O << ". The instruction has been replaced with a trap.";
1916
1917 DAG.getContext()->diagnose(DI: DiagnosticInfoMisalignedTrap(O.str()));
1918 return false;
1919}
1920
1921SDValue
1922HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
1923 const {
1924 const SDLoc &dl(Op);
1925 auto *LS = cast<LSBaseSDNode>(Val: Op.getNode());
1926 assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");
1927
1928 SDValue Chain = LS->getChain();
1929 SDValue Trap = DAG.getNode(Opcode: ISD::TRAP, DL: dl, VT: MVT::Other, Operand: Chain);
1930 if (LS->getOpcode() == ISD::LOAD)
1931 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: ty(Op)), Trap}, dl);
1932 return Trap;
1933}
1934
1935// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1936// intrinsic.
1937static bool isBrevLdIntrinsic(const Value *Inst) {
1938 unsigned ID = cast<IntrinsicInst>(Val: Inst)->getIntrinsicID();
1939 return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
1940 ID == Intrinsic::hexagon_L2_loadri_pbr ||
1941 ID == Intrinsic::hexagon_L2_loadrh_pbr ||
1942 ID == Intrinsic::hexagon_L2_loadruh_pbr ||
1943 ID == Intrinsic::hexagon_L2_loadrb_pbr ||
1944 ID == Intrinsic::hexagon_L2_loadrub_pbr);
1945}
1946
1947// Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1948// instruction. So far we only handle bitcast, extract value and bit reverse
1949// load intrinsic instructions. Should we handle CGEP ?
1950static Value *getBrevLdObject(Value *V) {
1951 if (Operator::getOpcode(V) == Instruction::ExtractValue ||
1952 Operator::getOpcode(V) == Instruction::BitCast)
1953 V = cast<Operator>(Val: V)->getOperand(i: 0);
1954 else if (isa<IntrinsicInst>(Val: V) && isBrevLdIntrinsic(Inst: V))
1955 V = cast<Instruction>(Val: V)->getOperand(i: 0);
1956 return V;
1957}
1958
1959// Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1960// a back edge. If the back edge comes from the intrinsic itself, the incoming
1961// edge is returned.
1962static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
1963 const BasicBlock *Parent = PN->getParent();
1964 int Idx = -1;
1965 for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
1966 BasicBlock *Blk = PN->getIncomingBlock(i);
1967 // Determine if the back edge is originated from intrinsic.
1968 if (Blk == Parent) {
1969 Value *BackEdgeVal = PN->getIncomingValue(i);
1970 Value *BaseVal;
1971 // Loop over till we return the same Value or we hit the IntrBaseVal.
1972 do {
1973 BaseVal = BackEdgeVal;
1974 BackEdgeVal = getBrevLdObject(V: BackEdgeVal);
1975 } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
1976 // If the getBrevLdObject returns IntrBaseVal, we should return the
1977 // incoming edge.
1978 if (IntrBaseVal == BackEdgeVal)
1979 continue;
1980 Idx = i;
1981 break;
1982 } else // Set the node to incoming edge.
1983 Idx = i;
1984 }
1985 assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
1986 return PN->getIncomingValue(i: Idx);
1987}
1988
1989// Bit-reverse Load Intrinsic: Figure out the underlying object the base
1990// pointer points to, for the bit-reverse load intrinsic. Setting this to
1991// memoperand might help alias analysis to figure out the dependencies.
1992static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
1993 Value *IntrBaseVal = V;
1994 Value *BaseVal;
1995 // Loop over till we return the same Value, implies we either figure out
1996 // the object or we hit a PHI
1997 do {
1998 BaseVal = V;
1999 V = getBrevLdObject(V);
2000 } while (BaseVal != V);
2001
2002 // Identify the object from PHINode.
2003 if (const PHINode *PN = dyn_cast<PHINode>(Val: V))
2004 return returnEdge(PN, IntrBaseVal);
2005 // For non PHI nodes, the object is the last value returned by getBrevLdObject
2006 else
2007 return V;
2008}
2009
2010/// Given an intrinsic, checks if on the target the intrinsic will need to map
2011/// to a MemIntrinsicNode (touches memory). If this is the case, it stores
2012/// the intrinsic information into the Infos vector.
2013void HexagonTargetLowering::getTgtMemIntrinsic(
2014 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
2015 MachineFunction &MF, unsigned Intrinsic) const {
2016 IntrinsicInfo Info;
2017 switch (Intrinsic) {
2018 case Intrinsic::hexagon_L2_loadrd_pbr:
2019 case Intrinsic::hexagon_L2_loadri_pbr:
2020 case Intrinsic::hexagon_L2_loadrh_pbr:
2021 case Intrinsic::hexagon_L2_loadruh_pbr:
2022 case Intrinsic::hexagon_L2_loadrb_pbr:
2023 case Intrinsic::hexagon_L2_loadrub_pbr: {
2024 Info.opc = ISD::INTRINSIC_W_CHAIN;
2025 auto &DL = I.getDataLayout();
2026 auto &Cont = I.getCalledFunction()->getParent()->getContext();
2027 // The intrinsic function call is of the form { ElTy, i8* }
2028 // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2029 // should be derived from ElTy.
2030 Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(N: 0);
2031 Info.memVT = MVT::getVT(Ty: ElTy);
2032 llvm::Value *BasePtrVal = I.getOperand(i_nocapture: 0);
2033 Info.ptrVal = getUnderLyingObjectForBrevLdIntr(V: BasePtrVal);
2034 // The offset value comes through Modifier register. For now, assume the
2035 // offset is 0.
2036 Info.offset = 0;
2037 Info.align = DL.getABITypeAlign(Ty: Info.memVT.getTypeForEVT(Context&: Cont));
2038 Info.flags = MachineMemOperand::MOLoad;
2039 Infos.push_back(Elt: Info);
2040 return;
2041 }
2042 case Intrinsic::hexagon_V6_vgathermw:
2043 case Intrinsic::hexagon_V6_vgathermw_128B:
2044 case Intrinsic::hexagon_V6_vgathermh:
2045 case Intrinsic::hexagon_V6_vgathermh_128B:
2046 case Intrinsic::hexagon_V6_vgathermhw:
2047 case Intrinsic::hexagon_V6_vgathermhw_128B:
2048 case Intrinsic::hexagon_V6_vgathermwq:
2049 case Intrinsic::hexagon_V6_vgathermwq_128B:
2050 case Intrinsic::hexagon_V6_vgathermhq:
2051 case Intrinsic::hexagon_V6_vgathermhq_128B:
2052 case Intrinsic::hexagon_V6_vgathermhwq:
2053 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2054 case Intrinsic::hexagon_V6_vgather_vscattermh:
2055 case Intrinsic::hexagon_V6_vgather_vscattermh_128B: {
2056 const Module &M = *I.getParent()->getParent()->getParent();
2057 Info.opc = ISD::INTRINSIC_W_CHAIN;
2058 Type *VecTy = I.getArgOperand(i: I.arg_size() - 1)->getType();
2059 assert(VecTy->isVectorTy() && "Expected vector operand for vgather");
2060 Info.memVT = MVT::getVT(Ty: VecTy);
2061 Info.ptrVal = I.getArgOperand(i: 0);
2062 Info.offset = 0;
2063 Info.align =
2064 MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(Ty: VecTy) / 8);
2065 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
2066 MachineMemOperand::MOVolatile;
2067 Infos.push_back(Elt: Info);
2068 return;
2069 }
2070 default:
2071 break;
2072 }
2073}
2074
2075bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2076 return X.getValueType().isScalarInteger(); // 'tstbit'
2077}
2078
2079bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
2080 return isTruncateFree(VT1: EVT::getEVT(Ty: Ty1), VT2: EVT::getEVT(Ty: Ty2));
2081}
2082
2083bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
2084 if (!VT1.isSimple() || !VT2.isSimple())
2085 return false;
2086 return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
2087}
2088
2089bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2090 const MachineFunction &MF, EVT VT) const {
2091 return isOperationLegalOrCustom(Op: ISD::FMA, VT);
2092}
2093
2094// Should we expand the build vector with shuffles?
2095bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
2096 unsigned DefinedValues) const {
2097 return false;
2098}
2099
2100bool HexagonTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2101 unsigned Index) const {
2102 assert(ResVT.getVectorElementType() == SrcVT.getVectorElementType());
2103 if (!ResVT.isSimple() || !SrcVT.isSimple())
2104 return false;
2105
2106 MVT ResTy = ResVT.getSimpleVT(), SrcTy = SrcVT.getSimpleVT();
2107 if (ResTy.getVectorElementType() != MVT::i1)
2108 return true;
2109
2110 // Non-HVX bool vectors are relatively cheap.
2111 return SrcTy.getVectorNumElements() <= 8;
2112}
2113
2114bool HexagonTargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
2115 return Op.getOpcode() == ISD::CONCAT_VECTORS ||
2116 TargetLowering::isTargetCanonicalConstantNode(Op);
2117}
2118
2119bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
2120 EVT VT) const {
2121 return true;
2122}
2123
2124TargetLoweringBase::LegalizeTypeAction
2125HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
2126 unsigned VecLen = VT.getVectorMinNumElements();
2127 MVT ElemTy = VT.getVectorElementType();
2128
2129 if (VecLen == 1 || VT.isScalableVector())
2130 return TargetLoweringBase::TypeScalarizeVector;
2131
2132 if (Subtarget.useHVXOps()) {
2133 unsigned Action = getPreferredHvxVectorAction(VecTy: VT);
2134 if (Action != ~0u)
2135 return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
2136 }
2137
2138 // Always widen (remaining) vectors of i1.
2139 if (ElemTy == MVT::i1)
2140 return TargetLoweringBase::TypeWidenVector;
2141 // Widen non-power-of-2 vectors. Such types cannot be split right now,
2142 // and computeRegisterProperties will override "split" with "widen",
2143 // which can cause other issues.
2144 if (!isPowerOf2_32(Value: VecLen))
2145 return TargetLoweringBase::TypeWidenVector;
2146
2147 return TargetLoweringBase::TypeSplitVector;
2148}
2149
2150TargetLoweringBase::LegalizeAction
2151HexagonTargetLowering::getCustomOperationAction(SDNode &Op) const {
2152 if (Subtarget.useHVXOps()) {
2153 unsigned Action = getCustomHvxOperationAction(Op);
2154 if (Action != ~0u)
2155 return static_cast<TargetLoweringBase::LegalizeAction>(Action);
2156 }
2157 return TargetLoweringBase::Legal;
2158}
2159
2160std::pair<SDValue, int>
2161HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
2162 if (Addr.getOpcode() == ISD::ADD) {
2163 SDValue Op1 = Addr.getOperand(i: 1);
2164 if (auto *CN = dyn_cast<const ConstantSDNode>(Val: Op1.getNode()))
2165 return { Addr.getOperand(i: 0), CN->getSExtValue() };
2166 }
2167 return { Addr, 0 };
2168}
2169
2170// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
2171// to select data from, V3 is the permutation.
2172SDValue
2173HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
2174 const {
2175 const auto *SVN = cast<ShuffleVectorSDNode>(Val&: Op);
2176 ArrayRef<int> AM = SVN->getMask();
2177 assert(AM.size() <= 8 && "Unexpected shuffle mask");
2178 unsigned VecLen = AM.size();
2179
2180 MVT VecTy = ty(Op);
2181 assert(!Subtarget.isHVXVectorType(VecTy, true) &&
2182 "HVX shuffles should be legal");
2183 assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
2184
2185 SDValue Op0 = Op.getOperand(i: 0);
2186 SDValue Op1 = Op.getOperand(i: 1);
2187 const SDLoc &dl(Op);
2188
2189 // If the inputs are not the same as the output, bail. This is not an
2190 // error situation, but complicates the handling and the default expansion
2191 // (into BUILD_VECTOR) should be adequate.
2192 if (ty(Op: Op0) != VecTy || ty(Op: Op1) != VecTy)
2193 return SDValue();
2194
2195 // Normalize the mask so that the first non-negative index comes from
2196 // the first operand.
2197 SmallVector<int, 8> Mask(AM);
2198 unsigned F = llvm::find_if(Range&: AM, P: [](int M) { return M >= 0; }) - AM.data();
2199 if (F == AM.size())
2200 return DAG.getUNDEF(VT: VecTy);
2201 if (AM[F] >= int(VecLen)) {
2202 ShuffleVectorSDNode::commuteMask(Mask);
2203 std::swap(a&: Op0, b&: Op1);
2204 }
2205
2206 // Express the shuffle mask in terms of bytes.
2207 SmallVector<int,8> ByteMask;
2208 unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
2209 for (int M : Mask) {
2210 if (M < 0) {
2211 for (unsigned j = 0; j != ElemBytes; ++j)
2212 ByteMask.push_back(Elt: -1);
2213 } else {
2214 for (unsigned j = 0; j != ElemBytes; ++j)
2215 ByteMask.push_back(Elt: M*ElemBytes + j);
2216 }
2217 }
2218 assert(ByteMask.size() <= 8);
2219
2220 // All non-undef (non-negative) indexes are well within [0..127], so they
2221 // fit in a single byte. Build two 64-bit words:
2222 // - MaskIdx where each byte is the corresponding index (for non-negative
2223 // indexes), and 0xFF for negative indexes, and
2224 // - MaskUnd that has 0xFF for each negative index.
2225 uint64_t MaskIdx = 0;
2226 uint64_t MaskUnd = 0;
2227 for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2228 unsigned S = 8*i;
2229 uint64_t M = ByteMask[i] & 0xFF;
2230 if (M == 0xFF)
2231 MaskUnd |= M << S;
2232 MaskIdx |= M << S;
2233 }
2234
2235 if (ByteMask.size() == 4) {
2236 // Identity.
2237 if (MaskIdx == (0x03020100 | MaskUnd))
2238 return Op0;
2239 // Byte swap.
2240 if (MaskIdx == (0x00010203 | MaskUnd)) {
2241 SDValue T0 = DAG.getBitcast(VT: MVT::i32, V: Op0);
2242 SDValue T1 = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: T0);
2243 return DAG.getBitcast(VT: VecTy, V: T1);
2244 }
2245
2246 // Byte packs.
2247 SDValue Concat10 =
2248 getCombine(Hi: Op1, Lo: Op0, dl, ResTy: typeJoin(Tys: {ty(Op: Op1), ty(Op: Op0)}), DAG);
2249 if (MaskIdx == (0x06040200 | MaskUnd))
2250 return getInstr(MachineOpc: Hexagon::S2_vtrunehb, dl, Ty: VecTy, Ops: {Concat10}, DAG);
2251 if (MaskIdx == (0x07050301 | MaskUnd))
2252 return getInstr(MachineOpc: Hexagon::S2_vtrunohb, dl, Ty: VecTy, Ops: {Concat10}, DAG);
2253
2254 SDValue Concat01 =
2255 getCombine(Hi: Op0, Lo: Op1, dl, ResTy: typeJoin(Tys: {ty(Op: Op0), ty(Op: Op1)}), DAG);
2256 if (MaskIdx == (0x02000604 | MaskUnd))
2257 return getInstr(MachineOpc: Hexagon::S2_vtrunehb, dl, Ty: VecTy, Ops: {Concat01}, DAG);
2258 if (MaskIdx == (0x03010705 | MaskUnd))
2259 return getInstr(MachineOpc: Hexagon::S2_vtrunohb, dl, Ty: VecTy, Ops: {Concat01}, DAG);
2260 }
2261
2262 if (ByteMask.size() == 8) {
2263 // Identity.
2264 if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2265 return Op0;
2266 // Byte swap.
2267 if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2268 SDValue T0 = DAG.getBitcast(VT: MVT::i64, V: Op0);
2269 SDValue T1 = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i64, Operand: T0);
2270 return DAG.getBitcast(VT: VecTy, V: T1);
2271 }
2272
2273 // Halfword picks.
2274 if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2275 return getInstr(MachineOpc: Hexagon::S2_shuffeh, dl, Ty: VecTy, Ops: {Op1, Op0}, DAG);
2276 if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2277 return getInstr(MachineOpc: Hexagon::S2_shuffoh, dl, Ty: VecTy, Ops: {Op1, Op0}, DAG);
2278 if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2279 return getInstr(MachineOpc: Hexagon::S2_vtrunewh, dl, Ty: VecTy, Ops: {Op1, Op0}, DAG);
2280 if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2281 return getInstr(MachineOpc: Hexagon::S2_vtrunowh, dl, Ty: VecTy, Ops: {Op1, Op0}, DAG);
2282 if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2283 VectorPair P = opSplit(Vec: Op0, dl, DAG);
2284 return getInstr(MachineOpc: Hexagon::S2_packhl, dl, Ty: VecTy, Ops: {P.second, P.first}, DAG);
2285 }
2286
2287 // Byte packs.
2288 if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2289 return getInstr(MachineOpc: Hexagon::S2_shuffeb, dl, Ty: VecTy, Ops: {Op1, Op0}, DAG);
2290 if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2291 return getInstr(MachineOpc: Hexagon::S2_shuffob, dl, Ty: VecTy, Ops: {Op1, Op0}, DAG);
2292 }
2293
2294 return SDValue();
2295}
2296
2297SDValue
2298HexagonTargetLowering::getSplatValue(SDValue Op, SelectionDAG &DAG) const {
2299 switch (Op.getOpcode()) {
2300 case ISD::BUILD_VECTOR:
2301 if (SDValue S = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue())
2302 return S;
2303 break;
2304 case ISD::SPLAT_VECTOR:
2305 return Op.getOperand(i: 0);
2306 }
2307 return SDValue();
2308}
2309
2310// Create a Hexagon-specific node for shifting a vector by an integer.
2311SDValue
2312HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2313 const {
2314 unsigned NewOpc;
2315 switch (Op.getOpcode()) {
2316 case ISD::SHL:
2317 NewOpc = HexagonISD::VASL;
2318 break;
2319 case ISD::SRA:
2320 NewOpc = HexagonISD::VASR;
2321 break;
2322 case ISD::SRL:
2323 NewOpc = HexagonISD::VLSR;
2324 break;
2325 default:
2326 llvm_unreachable("Unexpected shift opcode");
2327 }
2328 if (SDValue Sp = getSplatValue(Op: Op.getOperand(i: 1), DAG)) {
2329 const SDLoc dl(Op);
2330 // Canonicalize shift amount to i32 as required.
2331 SDValue Sh = Sp;
2332 if (Sh.getValueType() != MVT::i32)
2333 Sh = DAG.getZExtOrTrunc(Op: Sh, DL: dl, VT: MVT::i32);
2334
2335 assert(Sh.getValueType() == MVT::i32 &&
2336 "Hexagon vector shift-by-int must use i32 shift operand");
2337 return DAG.getNode(Opcode: NewOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: 0), N2: Sh);
2338 }
2339
2340 return SDValue();
2341}
2342
2343SDValue
2344HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2345 const SDLoc &dl(Op);
2346
2347 // First try to convert the shift (by vector) to a shift by a scalar.
2348 // If we first split the shift, the shift amount will become 'extract
2349 // subvector', and will no longer be recognized as scalar.
2350 SDValue Res = Op;
2351 if (SDValue S = getVectorShiftByInt(Op, DAG))
2352 Res = S;
2353
2354 unsigned Opc = Res.getOpcode();
2355 switch (Opc) {
2356 case HexagonISD::VASR:
2357 case HexagonISD::VLSR:
2358 case HexagonISD::VASL:
2359 break;
2360 default:
2361 // No instructions for shifts by non-scalars.
2362 return SDValue();
2363 }
2364
2365 MVT ResTy = ty(Op: Res);
2366 if (ResTy.getVectorElementType() != MVT::i8)
2367 return Res;
2368
2369 // For shifts of i8, extend the inputs to i16, then truncate back to i8.
2370 assert(ResTy.getVectorElementType() == MVT::i8);
2371 SDValue Val = Res.getOperand(i: 0), Amt = Res.getOperand(i: 1);
2372
2373 auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
2374 MVT Ty = ty(Op: V);
2375 MVT ExtTy = MVT::getVectorVT(VT: MVT::i16, NumElements: Ty.getVectorNumElements());
2376 SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(Op: V, DL: dl, VT: ExtTy)
2377 : DAG.getZExtOrTrunc(Op: V, DL: dl, VT: ExtTy);
2378 SDValue ExtS = DAG.getNode(Opcode: Opc, DL: dl, VT: ExtTy, Ops: {ExtV, A});
2379 return DAG.getZExtOrTrunc(Op: ExtS, DL: dl, VT: Ty);
2380 };
2381
2382 if (ResTy.getSizeInBits() == 32)
2383 return ShiftPartI8(Opc, Val, Amt);
2384
2385 auto [LoV, HiV] = opSplit(Vec: Val, dl, DAG);
2386 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: ResTy,
2387 Ops: {ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
2388}
2389
2390SDValue
2391HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2392 if (isa<ConstantSDNode>(Val: Op.getOperand(i: 1).getNode()))
2393 return Op;
2394 return SDValue();
2395}
2396
2397SDValue
2398HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2399 MVT ResTy = ty(Op);
2400 SDValue InpV = Op.getOperand(i: 0);
2401 MVT InpTy = ty(Op: InpV);
2402 assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2403 const SDLoc &dl(Op);
2404
2405 // Handle conversion from i8 to v8i1.
2406 if (InpTy == MVT::i8) {
2407 if (ResTy == MVT::v8i1) {
2408 SDValue Sc = DAG.getBitcast(VT: tyScalar(Ty: InpTy), V: InpV);
2409 SDValue Ext = DAG.getZExtOrTrunc(Op: Sc, DL: dl, VT: MVT::i32);
2410 return getInstr(MachineOpc: Hexagon::C2_tfrrp, dl, Ty: ResTy, Ops: Ext, DAG);
2411 }
2412 return SDValue();
2413 }
2414
2415 return Op;
2416}
2417
2418bool
2419HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2420 MVT VecTy, SelectionDAG &DAG,
2421 MutableArrayRef<ConstantInt*> Consts) const {
2422 MVT ElemTy = VecTy.getVectorElementType();
2423 unsigned ElemWidth = ElemTy.getSizeInBits();
2424 IntegerType *IntTy = IntegerType::get(C&: *DAG.getContext(), NumBits: ElemWidth);
2425 bool AllConst = true;
2426
2427 for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2428 SDValue V = Values[i];
2429 if (V.isUndef()) {
2430 Consts[i] = ConstantInt::get(Ty: IntTy, V: 0);
2431 continue;
2432 }
2433 // Make sure to always cast to IntTy.
2434 if (auto *CN = dyn_cast<ConstantSDNode>(Val: V.getNode())) {
2435 const ConstantInt *CI = CN->getConstantIntValue();
2436 Consts[i] = cast<ConstantInt>(
2437 Val: ConstantInt::get(Ty: IntTy, V: CI->getValue().trunc(width: ElemWidth)));
2438 } else if (auto *CN = dyn_cast<ConstantFPSDNode>(Val: V.getNode())) {
2439 const ConstantFP *CF = CN->getConstantFPValue();
2440 APInt A = CF->getValueAPF().bitcastToAPInt();
2441 Consts[i] = ConstantInt::get(Ty: IntTy, V: A.getZExtValue());
2442 } else {
2443 AllConst = false;
2444 }
2445 }
2446 return AllConst;
2447}
2448
2449SDValue
2450HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2451 MVT VecTy, SelectionDAG &DAG) const {
2452 MVT ElemTy = VecTy.getVectorElementType();
2453 assert(VecTy.getVectorNumElements() == Elem.size());
2454
2455 SmallVector<ConstantInt*,4> Consts(Elem.size());
2456 bool AllConst = getBuildVectorConstInts(Values: Elem, VecTy, DAG, Consts);
2457
2458 unsigned First, Num = Elem.size();
2459 for (First = 0; First != Num; ++First) {
2460 if (!isUndef(Op: Elem[First]))
2461 break;
2462 }
2463 if (First == Num)
2464 return DAG.getUNDEF(VT: VecTy);
2465
2466 if (AllConst &&
2467 llvm::all_of(Range&: Consts, P: [](ConstantInt *CI) { return CI->isZero(); }))
2468 return getZero(dl, Ty: VecTy, DAG);
2469
2470 if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2471 assert(Elem.size() == 2);
2472 if (AllConst) {
2473 // The 'Consts' array will have all values as integers regardless
2474 // of the vector element type.
2475 uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2476 Consts[1]->getZExtValue() << 16;
2477 return DAG.getBitcast(VT: VecTy, V: DAG.getConstant(Val: V, DL: dl, VT: MVT::i32));
2478 }
2479 SDValue E0, E1;
2480 if (ElemTy == MVT::f16) {
2481 E0 = DAG.getZExtOrTrunc(Op: DAG.getBitcast(VT: MVT::i16, V: Elem[0]), DL: dl, VT: MVT::i32);
2482 E1 = DAG.getZExtOrTrunc(Op: DAG.getBitcast(VT: MVT::i16, V: Elem[1]), DL: dl, VT: MVT::i32);
2483 } else {
2484 E0 = Elem[0];
2485 E1 = Elem[1];
2486 }
2487 SDValue N = getInstr(MachineOpc: Hexagon::A2_combine_ll, dl, Ty: MVT::i32, Ops: {E1, E0}, DAG);
2488 return DAG.getBitcast(VT: VecTy, V: N);
2489 }
2490
2491 if (ElemTy == MVT::i8) {
2492 // First try generating a constant.
2493 if (AllConst) {
2494 uint32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2495 (Consts[1]->getZExtValue() & 0xFF) << 8 |
2496 (Consts[2]->getZExtValue() & 0xFF) << 16 |
2497 Consts[3]->getZExtValue() << 24;
2498 return DAG.getBitcast(VT: MVT::v4i8, V: DAG.getConstant(Val: V, DL: dl, VT: MVT::i32));
2499 }
2500
2501 // Then try splat.
2502 bool IsSplat = true;
2503 for (unsigned i = First+1; i != Num; ++i) {
2504 if (Elem[i] == Elem[First] || isUndef(Op: Elem[i]))
2505 continue;
2506 IsSplat = false;
2507 break;
2508 }
2509 if (IsSplat) {
2510 // Legalize the operand of SPLAT_VECTOR.
2511 SDValue Ext = DAG.getZExtOrTrunc(Op: Elem[First], DL: dl, VT: MVT::i32);
2512 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Ext);
2513 }
2514
2515 // Generate
2516 // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2517 // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2518 assert(Elem.size() == 4);
2519 SDValue Vs[4];
2520 for (unsigned i = 0; i != 4; ++i) {
2521 Vs[i] = DAG.getZExtOrTrunc(Op: Elem[i], DL: dl, VT: MVT::i32);
2522 Vs[i] = DAG.getZeroExtendInReg(Op: Vs[i], DL: dl, VT: MVT::i8);
2523 }
2524 SDValue S8 = DAG.getConstant(Val: 8, DL: dl, VT: MVT::i32);
2525 SDValue T0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32, Ops: {Vs[1], S8});
2526 SDValue T1 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32, Ops: {Vs[3], S8});
2527 SDValue B0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i32, Ops: {Vs[0], T0});
2528 SDValue B1 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i32, Ops: {Vs[2], T1});
2529
2530 SDValue R = getInstr(MachineOpc: Hexagon::A2_combine_ll, dl, Ty: MVT::i32, Ops: {B1, B0}, DAG);
2531 return DAG.getBitcast(VT: MVT::v4i8, V: R);
2532 }
2533
2534#ifndef NDEBUG
2535 dbgs() << "VecTy: " << VecTy << '\n';
2536#endif
2537 llvm_unreachable("Unexpected vector element type");
2538}
2539
2540SDValue
2541HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2542 MVT VecTy, SelectionDAG &DAG) const {
2543 MVT ElemTy = VecTy.getVectorElementType();
2544 assert(VecTy.getVectorNumElements() == Elem.size());
2545
2546 SmallVector<ConstantInt*,8> Consts(Elem.size());
2547 bool AllConst = getBuildVectorConstInts(Values: Elem, VecTy, DAG, Consts);
2548
2549 unsigned First, Num = Elem.size();
2550 for (First = 0; First != Num; ++First) {
2551 if (!isUndef(Op: Elem[First]))
2552 break;
2553 }
2554 if (First == Num)
2555 return DAG.getUNDEF(VT: VecTy);
2556
2557 if (AllConst &&
2558 llvm::all_of(Range&: Consts, P: [](ConstantInt *CI) { return CI->isZero(); }))
2559 return getZero(dl, Ty: VecTy, DAG);
2560
2561 // First try splat if possible.
2562 if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2563 bool IsSplat = true;
2564 for (unsigned i = First+1; i != Num; ++i) {
2565 if (Elem[i] == Elem[First] || isUndef(Op: Elem[i]))
2566 continue;
2567 IsSplat = false;
2568 break;
2569 }
2570 if (IsSplat) {
2571 // Legalize the operand of SPLAT_VECTOR
2572 SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(VT: MVT::i16, V: Elem[First])
2573 : Elem[First];
2574 SDValue Ext = DAG.getZExtOrTrunc(Op: S, DL: dl, VT: MVT::i32);
2575 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Ext);
2576 }
2577 }
2578
2579 // Then try constant.
2580 if (AllConst) {
2581 uint64_t Val = 0;
2582 unsigned W = ElemTy.getSizeInBits();
2583 uint64_t Mask = (1ull << W) - 1;
2584 for (unsigned i = 0; i != Num; ++i)
2585 Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2586 SDValue V0 = DAG.getConstant(Val, DL: dl, VT: MVT::i64);
2587 return DAG.getBitcast(VT: VecTy, V: V0);
2588 }
2589
2590 // Build two 32-bit vectors and concatenate.
2591 MVT HalfTy = MVT::getVectorVT(VT: ElemTy, NumElements: Num/2);
2592 SDValue L = (ElemTy == MVT::i32)
2593 ? Elem[0]
2594 : buildVector32(Elem: Elem.take_front(N: Num/2), dl, VecTy: HalfTy, DAG);
2595 SDValue H = (ElemTy == MVT::i32)
2596 ? Elem[1]
2597 : buildVector32(Elem: Elem.drop_front(N: Num/2), dl, VecTy: HalfTy, DAG);
2598 return getCombine(Hi: H, Lo: L, dl, ResTy: VecTy, DAG);
2599}
2600
2601SDValue
2602HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2603 const SDLoc &dl, MVT ValTy, MVT ResTy,
2604 SelectionDAG &DAG) const {
2605 MVT VecTy = ty(Op: VecV);
2606 assert(!ValTy.isVector() ||
2607 VecTy.getVectorElementType() == ValTy.getVectorElementType());
2608 if (VecTy.getVectorElementType() == MVT::i1)
2609 return extractVectorPred(VecV, IdxV, dl, ValTy, ResTy, DAG);
2610
2611 unsigned VecWidth = VecTy.getSizeInBits();
2612 unsigned ValWidth = ValTy.getSizeInBits();
2613 unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2614 assert((VecWidth % ElemWidth) == 0);
2615 assert(VecWidth == 32 || VecWidth == 64);
2616
2617 // Cast everything to scalar integer types.
2618 MVT ScalarTy = tyScalar(Ty: VecTy);
2619 VecV = DAG.getBitcast(VT: ScalarTy, V: VecV);
2620
2621 SDValue WidthV = DAG.getConstant(Val: ValWidth, DL: dl, VT: MVT::i32);
2622 SDValue ExtV;
2623
2624 if (auto *IdxN = dyn_cast<ConstantSDNode>(Val&: IdxV)) {
2625 unsigned Off = IdxN->getZExtValue() * ElemWidth;
2626 if (VecWidth == 64 && ValWidth == 32) {
2627 assert(Off == 0 || Off == 32);
2628 ExtV = Off == 0 ? LoHalf(V: VecV, DAG) : HiHalf(V: VecV, DAG);
2629 } else if (Off == 0 && (ValWidth % 8) == 0) {
2630 ExtV = DAG.getZeroExtendInReg(Op: VecV, DL: dl, VT: tyScalar(Ty: ValTy));
2631 } else {
2632 SDValue OffV = DAG.getConstant(Val: Off, DL: dl, VT: MVT::i32);
2633 // The return type of EXTRACTU must be the same as the type of the
2634 // input vector.
2635 ExtV = DAG.getNode(Opcode: HexagonISD::EXTRACTU, DL: dl, VT: ScalarTy,
2636 Ops: {VecV, WidthV, OffV});
2637 }
2638 } else {
2639 if (ty(Op: IdxV) != MVT::i32)
2640 IdxV = DAG.getZExtOrTrunc(Op: IdxV, DL: dl, VT: MVT::i32);
2641 SDValue OffV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
2642 N2: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32));
2643 ExtV = DAG.getNode(Opcode: HexagonISD::EXTRACTU, DL: dl, VT: ScalarTy,
2644 Ops: {VecV, WidthV, OffV});
2645 }
2646
2647 // Cast ExtV to the requested result type.
2648 ExtV = DAG.getZExtOrTrunc(Op: ExtV, DL: dl, VT: tyScalar(Ty: ResTy));
2649 ExtV = DAG.getBitcast(VT: ResTy, V: ExtV);
2650 return ExtV;
2651}
2652
2653SDValue
2654HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
2655 const SDLoc &dl, MVT ValTy, MVT ResTy,
2656 SelectionDAG &DAG) const {
2657 // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2658 // without any coprocessors).
2659 MVT VecTy = ty(Op: VecV);
2660 unsigned VecWidth = VecTy.getSizeInBits();
2661 unsigned ValWidth = ValTy.getSizeInBits();
2662 assert(VecWidth == VecTy.getVectorNumElements() &&
2663 "Vector elements should equal vector width size");
2664 assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2665
2666 // Check if this is an extract of the lowest bit.
2667 if (isNullConstant(V: IdxV) && ValTy.getSizeInBits() == 1) {
2668 // Extracting the lowest bit is a no-op, but it changes the type,
2669 // so it must be kept as an operation to avoid errors related to
2670 // type mismatches.
2671 return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: MVT::i1, Operand: VecV);
2672 }
2673
2674 // If the value extracted is a single bit, use tstbit.
2675 if (ValWidth == 1) {
2676 SDValue A0 = getInstr(MachineOpc: Hexagon::C2_tfrpr, dl, Ty: MVT::i32, Ops: {VecV}, DAG);
2677 SDValue M0 = DAG.getConstant(Val: 8 / VecWidth, DL: dl, VT: MVT::i32);
2678 SDValue I0 = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: M0);
2679 return DAG.getNode(Opcode: HexagonISD::TSTBIT, DL: dl, VT: MVT::i1, N1: A0, N2: I0);
2680 }
2681
2682 // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2683 // a predicate register. The elements of the vector are repeated
2684 // in the register (if necessary) so that the total number is 8.
2685 // The extracted subvector will need to be expanded in such a way.
2686 unsigned Scale = VecWidth / ValWidth;
2687
2688 // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2689 // position 0.
2690 assert(ty(IdxV) == MVT::i32);
2691 unsigned VecRep = 8 / VecWidth;
2692 SDValue S0 = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
2693 N2: DAG.getConstant(Val: 8*VecRep, DL: dl, VT: MVT::i32));
2694 SDValue T0 = DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: VecV);
2695 SDValue T1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: T0, N2: S0);
2696 while (Scale > 1) {
2697 // The longest possible subvector is at most 32 bits, so it is always
2698 // contained in the low subregister.
2699 T1 = LoHalf(V: T1, DAG);
2700 T1 = expandPredicate(Vec32: T1, dl, DAG);
2701 Scale /= 2;
2702 }
2703
2704 return DAG.getNode(Opcode: HexagonISD::D2P, DL: dl, VT: ResTy, Operand: T1);
2705}
2706
2707SDValue
2708HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2709 const SDLoc &dl, MVT ValTy,
2710 SelectionDAG &DAG) const {
2711 MVT VecTy = ty(Op: VecV);
2712 if (VecTy.getVectorElementType() == MVT::i1)
2713 return insertVectorPred(VecV, ValV, IdxV, dl, ValTy, DAG);
2714
2715 unsigned VecWidth = VecTy.getSizeInBits();
2716 unsigned ValWidth = ValTy.getSizeInBits();
2717 assert(VecWidth == 32 || VecWidth == 64);
2718 assert((VecWidth % ValWidth) == 0);
2719
2720 // Cast everything to scalar integer types.
2721 MVT ScalarTy = MVT::getIntegerVT(BitWidth: VecWidth);
2722 // The actual type of ValV may be different than ValTy (which is related
2723 // to the vector type).
2724 unsigned VW = ty(Op: ValV).getSizeInBits();
2725 ValV = DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: VW), V: ValV);
2726 VecV = DAG.getBitcast(VT: ScalarTy, V: VecV);
2727 if (VW != VecWidth)
2728 ValV = DAG.getAnyExtOrTrunc(Op: ValV, DL: dl, VT: ScalarTy);
2729
2730 SDValue WidthV = DAG.getConstant(Val: ValWidth, DL: dl, VT: MVT::i32);
2731 SDValue InsV;
2732
2733 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: IdxV)) {
2734 unsigned W = C->getZExtValue() * ValWidth;
2735 SDValue OffV = DAG.getConstant(Val: W, DL: dl, VT: MVT::i32);
2736 InsV = DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: ScalarTy,
2737 Ops: {VecV, ValV, WidthV, OffV});
2738 } else {
2739 if (ty(Op: IdxV) != MVT::i32)
2740 IdxV = DAG.getZExtOrTrunc(Op: IdxV, DL: dl, VT: MVT::i32);
2741 SDValue OffV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: WidthV);
2742 InsV = DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: ScalarTy,
2743 Ops: {VecV, ValV, WidthV, OffV});
2744 }
2745
2746 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: InsV);
2747}
2748
2749SDValue
2750HexagonTargetLowering::insertVectorPred(SDValue VecV, SDValue ValV,
2751 SDValue IdxV, const SDLoc &dl,
2752 MVT ValTy, SelectionDAG &DAG) const {
2753 MVT VecTy = ty(Op: VecV);
2754 unsigned VecLen = VecTy.getVectorNumElements();
2755
2756 if (ValTy == MVT::i1) {
2757 SDValue ToReg = getInstr(MachineOpc: Hexagon::C2_tfrpr, dl, Ty: MVT::i32, Ops: {VecV}, DAG);
2758 SDValue Ext = DAG.getSExtOrTrunc(Op: ValV, DL: dl, VT: MVT::i32);
2759 SDValue Width = DAG.getConstant(Val: 8 / VecLen, DL: dl, VT: MVT::i32);
2760 SDValue Idx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: Width);
2761 SDValue Ins =
2762 DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: MVT::i32, Ops: {ToReg, Ext, Width, Idx});
2763 return getInstr(MachineOpc: Hexagon::C2_tfrrp, dl, Ty: VecTy, Ops: {Ins}, DAG);
2764 }
2765
2766 assert(ValTy.getVectorElementType() == MVT::i1);
2767 SDValue ValR = ValTy.isVector()
2768 ? DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: ValV)
2769 : DAG.getSExtOrTrunc(Op: ValV, DL: dl, VT: MVT::i64);
2770
2771 unsigned Scale = VecLen / ValTy.getVectorNumElements();
2772 assert(Scale > 1);
2773
2774 for (unsigned R = Scale; R > 1; R /= 2) {
2775 ValR = contractPredicate(Vec64: ValR, dl, DAG);
2776 ValR = getCombine(Hi: DAG.getUNDEF(VT: MVT::i32), Lo: ValR, dl, ResTy: MVT::i64, DAG);
2777 }
2778
2779 SDValue Width = DAG.getConstant(Val: 64 / Scale, DL: dl, VT: MVT::i32);
2780 SDValue Idx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: Width);
2781 SDValue VecR = DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: VecV);
2782 SDValue Ins =
2783 DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: MVT::i64, Ops: {VecR, ValR, Width, Idx});
2784 return DAG.getNode(Opcode: HexagonISD::D2P, DL: dl, VT: VecTy, Operand: Ins);
2785}
2786
2787SDValue
2788HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2789 SelectionDAG &DAG) const {
2790 assert(ty(Vec32).getSizeInBits() == 32);
2791 if (isUndef(Op: Vec32))
2792 return DAG.getUNDEF(VT: MVT::i64);
2793 SDValue P = DAG.getBitcast(VT: MVT::v4i8, V: Vec32);
2794 SDValue X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::v4i16, Operand: P);
2795 return DAG.getBitcast(VT: MVT::i64, V: X);
2796}
2797
2798SDValue
2799HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2800 SelectionDAG &DAG) const {
2801 assert(ty(Vec64).getSizeInBits() == 64);
2802 if (isUndef(Op: Vec64))
2803 return DAG.getUNDEF(VT: MVT::i32);
2804 // Collect even bytes:
2805 SDValue A = DAG.getBitcast(VT: MVT::v8i8, V: Vec64);
2806 SDValue S = DAG.getVectorShuffle(VT: MVT::v8i8, dl, N1: A, N2: DAG.getUNDEF(VT: MVT::v8i8),
2807 Mask: {0, 2, 4, 6, 1, 3, 5, 7});
2808 return extractVector(VecV: S, IdxV: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32), dl, ValTy: MVT::v4i8,
2809 ResTy: MVT::i32, DAG);
2810}
2811
2812SDValue
2813HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2814 const {
2815 if (Ty.isVector()) {
2816 unsigned W = Ty.getSizeInBits();
2817 if (W <= 64)
2818 return DAG.getBitcast(VT: Ty, V: DAG.getConstant(Val: 0, DL: dl, VT: MVT::getIntegerVT(BitWidth: W)));
2819 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: Ty, Operand: getZero(dl, Ty: MVT::i32, DAG));
2820 }
2821
2822 if (Ty.isInteger())
2823 return DAG.getConstant(Val: 0, DL: dl, VT: Ty);
2824 if (Ty.isFloatingPoint())
2825 return DAG.getConstantFP(Val: 0.0, DL: dl, VT: Ty);
2826 llvm_unreachable("Invalid type for zero");
2827}
2828
2829SDValue
2830HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
2831 const {
2832 MVT ValTy = ty(Op: Val);
2833 assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
2834
2835 unsigned ValLen = ValTy.getVectorNumElements();
2836 unsigned ResLen = ResTy.getVectorNumElements();
2837 if (ValLen == ResLen)
2838 return Val;
2839
2840 const SDLoc &dl(Val);
2841 assert(ValLen < ResLen);
2842 assert(ResLen % ValLen == 0);
2843
2844 SmallVector<SDValue, 4> Concats = {Val};
2845 for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
2846 Concats.push_back(Elt: DAG.getUNDEF(VT: ValTy));
2847
2848 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: ResTy, Ops: Concats);
2849}
2850
2851SDValue
2852HexagonTargetLowering::getCombine(SDValue Hi, SDValue Lo, const SDLoc &dl,
2853 MVT ResTy, SelectionDAG &DAG) const {
2854 MVT ElemTy = ty(Op: Hi);
2855 assert(ElemTy == ty(Lo));
2856
2857 if (!ElemTy.isVector()) {
2858 assert(ElemTy.isScalarInteger());
2859 MVT PairTy = MVT::getIntegerVT(BitWidth: 2 * ElemTy.getSizeInBits());
2860 SDValue Pair = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: PairTy, N1: Lo, N2: Hi);
2861 return DAG.getBitcast(VT: ResTy, V: Pair);
2862 }
2863
2864 unsigned Width = ElemTy.getSizeInBits();
2865 MVT IntTy = MVT::getIntegerVT(BitWidth: Width);
2866 MVT PairTy = MVT::getIntegerVT(BitWidth: 2 * Width);
2867 SDValue Pair =
2868 DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: PairTy,
2869 Ops: {DAG.getBitcast(VT: IntTy, V: Lo), DAG.getBitcast(VT: IntTy, V: Hi)});
2870 return DAG.getBitcast(VT: ResTy, V: Pair);
2871}
2872
2873SDValue
2874HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2875 MVT VecTy = ty(Op);
2876 unsigned BW = VecTy.getSizeInBits();
2877 const SDLoc &dl(Op);
2878 SmallVector<SDValue,8> Ops;
2879 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2880 Ops.push_back(Elt: Op.getOperand(i));
2881
2882 if (BW == 32)
2883 return buildVector32(Elem: Ops, dl, VecTy, DAG);
2884 if (BW == 64)
2885 return buildVector64(Elem: Ops, dl, VecTy, DAG);
2886
2887 if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2888 // Check if this is a special case or all-0 or all-1.
2889 bool All0 = true, All1 = true;
2890 for (SDValue P : Ops) {
2891 auto *CN = dyn_cast<ConstantSDNode>(Val: P.getNode());
2892 if (CN == nullptr) {
2893 All0 = All1 = false;
2894 break;
2895 }
2896 uint32_t C = CN->getZExtValue();
2897 All0 &= (C == 0);
2898 All1 &= (C == 1);
2899 }
2900 if (All0)
2901 return DAG.getNode(Opcode: HexagonISD::PFALSE, DL: dl, VT: VecTy);
2902 if (All1)
2903 return DAG.getNode(Opcode: HexagonISD::PTRUE, DL: dl, VT: VecTy);
2904
2905 // For each i1 element in the resulting predicate register, put 1
2906 // shifted by the index of the element into a general-purpose register,
2907 // then or them together and transfer it back into a predicate register.
2908 SDValue Rs[8];
2909 SDValue Z = getZero(dl, Ty: MVT::i32, DAG);
2910 // Always produce 8 bits, repeat inputs if necessary.
2911 unsigned Rep = 8 / VecTy.getVectorNumElements();
2912 for (unsigned i = 0; i != 8; ++i) {
2913 SDValue S = DAG.getConstant(Val: 1ull << i, DL: dl, VT: MVT::i32);
2914 Rs[i] = DAG.getSelect(DL: dl, VT: MVT::i32, Cond: Ops[i/Rep], LHS: S, RHS: Z);
2915 }
2916 for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(N: A.size()/2)) {
2917 for (unsigned i = 0, e = A.size()/2; i != e; ++i)
2918 Rs[i] = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i32, N1: Rs[2*i], N2: Rs[2*i+1]);
2919 }
2920 // Move the value directly to a predicate register.
2921 return getInstr(MachineOpc: Hexagon::C2_tfrrp, dl, Ty: VecTy, Ops: {Rs[0]}, DAG);
2922 }
2923
2924 return SDValue();
2925}
2926
2927SDValue
2928HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2929 SelectionDAG &DAG) const {
2930 MVT VecTy = ty(Op);
2931 const SDLoc &dl(Op);
2932 if (VecTy.getSizeInBits() == 64) {
2933 assert(Op.getNumOperands() == 2);
2934 return getCombine(Hi: Op.getOperand(i: 1), Lo: Op.getOperand(i: 0), dl, ResTy: VecTy, DAG);
2935 }
2936
2937 MVT ElemTy = VecTy.getVectorElementType();
2938 if (ElemTy == MVT::i1) {
2939 assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
2940 MVT OpTy = ty(Op: Op.getOperand(i: 0));
2941 // Scale is how many times the operands need to be contracted to match
2942 // the representation in the target register.
2943 unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
2944 assert(Scale == Op.getNumOperands() && Scale > 1);
2945
2946 // First, convert all bool vectors to integers, then generate pairwise
2947 // inserts to form values of doubled length. Up until there are only
2948 // two values left to concatenate, all of these values will fit in a
2949 // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2950 SmallVector<SDValue,4> Words[2];
2951 unsigned IdxW = 0;
2952
2953 for (SDValue P : Op.getNode()->op_values()) {
2954 SDValue W = DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: P);
2955 for (unsigned R = Scale; R > 1; R /= 2) {
2956 W = contractPredicate(Vec64: W, dl, DAG);
2957 W = getCombine(Hi: DAG.getUNDEF(VT: MVT::i32), Lo: W, dl, ResTy: MVT::i64, DAG);
2958 }
2959 W = LoHalf(V: W, DAG);
2960 Words[IdxW].push_back(Elt: W);
2961 }
2962
2963 while (Scale > 2) {
2964 SDValue WidthV = DAG.getConstant(Val: 64 / Scale, DL: dl, VT: MVT::i32);
2965 Words[IdxW ^ 1].clear();
2966
2967 for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
2968 SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
2969 // Insert W1 into W0 right next to the significant bits of W0.
2970 SDValue T = DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: MVT::i32,
2971 Ops: {W0, W1, WidthV, WidthV});
2972 Words[IdxW ^ 1].push_back(Elt: T);
2973 }
2974 IdxW ^= 1;
2975 Scale /= 2;
2976 }
2977
2978 // At this point there should only be two words left, and Scale should be 2.
2979 assert(Scale == 2 && Words[IdxW].size() == 2);
2980
2981 SDValue WW = getCombine(Hi: Words[IdxW][1], Lo: Words[IdxW][0], dl, ResTy: MVT::i64, DAG);
2982 return DAG.getNode(Opcode: HexagonISD::D2P, DL: dl, VT: VecTy, Operand: WW);
2983 }
2984
2985 return SDValue();
2986}
2987
2988SDValue
2989HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
2990 SelectionDAG &DAG) const {
2991 SDValue Vec = Op.getOperand(i: 0);
2992 MVT ElemTy = ty(Op: Vec).getVectorElementType();
2993 return extractVector(VecV: Vec, IdxV: Op.getOperand(i: 1), dl: SDLoc(Op), ValTy: ElemTy, ResTy: ty(Op), DAG);
2994}
2995
2996SDValue
2997HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
2998 SelectionDAG &DAG) const {
2999 return extractVector(VecV: Op.getOperand(i: 0), IdxV: Op.getOperand(i: 1), dl: SDLoc(Op),
3000 ValTy: ty(Op), ResTy: ty(Op), DAG);
3001}
3002
3003SDValue
3004HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3005 SelectionDAG &DAG) const {
3006 return insertVector(VecV: Op.getOperand(i: 0), ValV: Op.getOperand(i: 1), IdxV: Op.getOperand(i: 2),
3007 dl: SDLoc(Op), ValTy: ty(Op).getVectorElementType(), DAG);
3008}
3009
3010SDValue
3011HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
3012 SelectionDAG &DAG) const {
3013 SDValue ValV = Op.getOperand(i: 1);
3014 return insertVector(VecV: Op.getOperand(i: 0), ValV, IdxV: Op.getOperand(i: 2),
3015 dl: SDLoc(Op), ValTy: ty(Op: ValV), DAG);
3016}
3017
3018bool
3019HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
3020 // Assuming the caller does not have either a signext or zeroext modifier, and
3021 // only one value is accepted, any reasonable truncation is allowed.
3022 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
3023 return false;
3024
3025 // FIXME: in principle up to 64-bit could be made safe, but it would be very
3026 // fragile at the moment: any support for multiple value returns would be
3027 // liable to disallow tail calls involving i64 -> iN truncation in many cases.
3028 return Ty1->getPrimitiveSizeInBits() <= 32;
3029}
3030
3031SDValue
3032HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
3033 MVT Ty = ty(Op);
3034 const SDLoc &dl(Op);
3035 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
3036 MVT MemTy = LN->getMemoryVT().getSimpleVT();
3037 ISD::LoadExtType ET = LN->getExtensionType();
3038
3039 bool LoadPred = MemTy == MVT::v2i1 || MemTy == MVT::v4i1 || MemTy == MVT::v8i1;
3040 if (LoadPred) {
3041 SDValue NL = DAG.getLoad(
3042 AM: LN->getAddressingMode(), ExtType: ISD::ZEXTLOAD, VT: MVT::i32, dl, Chain: LN->getChain(),
3043 Ptr: LN->getBasePtr(), Offset: LN->getOffset(), PtrInfo: LN->getPointerInfo(),
3044 /*MemoryVT*/ MemVT: MVT::i8, Alignment: LN->getAlign(), MMOFlags: LN->getMemOperand()->getFlags(),
3045 AAInfo: LN->getAAInfo(), Ranges: LN->getRanges());
3046 LN = cast<LoadSDNode>(Val: NL.getNode());
3047 }
3048
3049 Align ClaimAlign = LN->getAlign();
3050 if (!validateConstPtrAlignment(Ptr: LN->getBasePtr(), NeedAlign: ClaimAlign, dl, DAG))
3051 return replaceMemWithUndef(Op, DAG);
3052
3053 // Call LowerUnalignedLoad for all loads, it recognizes loads that
3054 // don't need extra aligning.
3055 SDValue LU = LowerUnalignedLoad(Op: SDValue(LN, 0), DAG);
3056 if (LoadPred) {
3057 SDValue TP = getInstr(MachineOpc: Hexagon::C2_tfrrp, dl, Ty: MemTy, Ops: {LU}, DAG);
3058 if (ET == ISD::SEXTLOAD) {
3059 TP = DAG.getSExtOrTrunc(Op: TP, DL: dl, VT: Ty);
3060 } else if (ET != ISD::NON_EXTLOAD) {
3061 TP = DAG.getZExtOrTrunc(Op: TP, DL: dl, VT: Ty);
3062 }
3063 SDValue Ch = cast<LoadSDNode>(Val: LU.getNode())->getChain();
3064 return DAG.getMergeValues(Ops: {TP, Ch}, dl);
3065 }
3066 return LU;
3067}
3068
3069SDValue
3070HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
3071 const SDLoc &dl(Op);
3072 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
3073 SDValue Val = SN->getValue();
3074 MVT Ty = ty(Op: Val);
3075
3076 if (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1) {
3077 // Store the exact predicate (all bits).
3078 SDValue TR = getInstr(MachineOpc: Hexagon::C2_tfrpr, dl, Ty: MVT::i32, Ops: {Val}, DAG);
3079 SDValue NS = DAG.getTruncStore(Chain: SN->getChain(), dl, Val: TR, Ptr: SN->getBasePtr(),
3080 SVT: MVT::i8, MMO: SN->getMemOperand());
3081 if (SN->isIndexed()) {
3082 NS = DAG.getIndexedStore(OrigStore: NS, dl, Base: SN->getBasePtr(), Offset: SN->getOffset(),
3083 AM: SN->getAddressingMode());
3084 }
3085 SN = cast<StoreSDNode>(Val: NS.getNode());
3086 }
3087
3088 Align ClaimAlign = SN->getAlign();
3089 if (!validateConstPtrAlignment(Ptr: SN->getBasePtr(), NeedAlign: ClaimAlign, dl, DAG))
3090 return replaceMemWithUndef(Op, DAG);
3091
3092 MVT StoreTy = SN->getMemoryVT().getSimpleVT();
3093 Align NeedAlign = Subtarget.getTypeAlignment(Ty: StoreTy);
3094 if (ClaimAlign < NeedAlign)
3095 return expandUnalignedStore(ST: SN, DAG);
3096 return SDValue(SN, 0);
3097}
3098
3099SDValue
3100HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
3101 const {
3102 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
3103 MVT LoadTy = ty(Op);
3104 unsigned NeedAlign = Subtarget.getTypeAlignment(Ty: LoadTy).value();
3105 unsigned HaveAlign = LN->getAlign().value();
3106 if (HaveAlign >= NeedAlign)
3107 return Op;
3108
3109 const SDLoc &dl(Op);
3110 const DataLayout &DL = DAG.getDataLayout();
3111 LLVMContext &Ctx = *DAG.getContext();
3112
3113 // If the load aligning is disabled or the load can be broken up into two
3114 // smaller legal loads, do the default (target-independent) expansion.
3115 bool DoDefault = false;
3116 // Handle it in the default way if this is an indexed load.
3117 if (!LN->isUnindexed())
3118 DoDefault = true;
3119
3120 if (!AlignLoads) {
3121 if (allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT: LN->getMemoryVT(),
3122 MMO: *LN->getMemOperand()))
3123 return Op;
3124 DoDefault = true;
3125 }
3126 if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
3127 // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3128 MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(BitWidth: 8 * HaveAlign)
3129 : MVT::getVectorVT(VT: MVT::i8, NumElements: HaveAlign);
3130 DoDefault =
3131 allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT: PartTy, MMO: *LN->getMemOperand());
3132 }
3133 if (DoDefault) {
3134 std::pair<SDValue, SDValue> P = expandUnalignedLoad(LD: LN, DAG);
3135 return DAG.getMergeValues(Ops: {P.first, P.second}, dl);
3136 }
3137
3138 // The code below generates two loads, both aligned as NeedAlign, and
3139 // with the distance of NeedAlign between them. For that to cover the
3140 // bits that need to be loaded (and without overlapping), the size of
3141 // the loads should be equal to NeedAlign. This is true for all loadable
3142 // types, but add an assertion in case something changes in the future.
3143 assert(LoadTy.getSizeInBits() == 8*NeedAlign);
3144
3145 unsigned LoadLen = NeedAlign;
3146 SDValue Base = LN->getBasePtr();
3147 SDValue Chain = LN->getChain();
3148 auto BO = getBaseAndOffset(Addr: Base);
3149 unsigned BaseOpc = BO.first.getOpcode();
3150 if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
3151 return Op;
3152
3153 if (BO.second % LoadLen != 0) {
3154 BO.first = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: BO.first,
3155 N2: DAG.getConstant(Val: BO.second % LoadLen, DL: dl, VT: MVT::i32));
3156 BO.second -= BO.second % LoadLen;
3157 }
3158 SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
3159 ? DAG.getNode(Opcode: HexagonISD::VALIGNADDR, DL: dl, VT: MVT::i32, N1: BO.first,
3160 N2: DAG.getConstant(Val: NeedAlign, DL: dl, VT: MVT::i32))
3161 : BO.first;
3162 SDValue Base0 =
3163 DAG.getMemBasePlusOffset(Base: BaseNoOff, Offset: TypeSize::getFixed(ExactSize: BO.second), DL: dl);
3164 SDValue Base1 = DAG.getMemBasePlusOffset(
3165 Base: BaseNoOff, Offset: TypeSize::getFixed(ExactSize: BO.second + LoadLen), DL: dl);
3166
3167 MachineMemOperand *WideMMO = nullptr;
3168 if (MachineMemOperand *MMO = LN->getMemOperand()) {
3169 MachineFunction &MF = DAG.getMachineFunction();
3170 WideMMO = MF.getMachineMemOperand(
3171 PtrInfo: MMO->getPointerInfo(), F: MMO->getFlags(), Size: 2 * LoadLen, BaseAlignment: Align(LoadLen),
3172 AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges(), SSID: MMO->getSyncScopeID(),
3173 Ordering: MMO->getSuccessOrdering(), FailureOrdering: MMO->getFailureOrdering());
3174 }
3175
3176 SDValue Load0 = DAG.getLoad(VT: LoadTy, dl, Chain, Ptr: Base0, MMO: WideMMO);
3177 SDValue Load1 = DAG.getLoad(VT: LoadTy, dl, Chain, Ptr: Base1, MMO: WideMMO);
3178
3179 SDValue Aligned = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: LoadTy,
3180 Ops: {Load1, Load0, BaseNoOff.getOperand(i: 0)});
3181 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3182 N1: Load0.getValue(R: 1), N2: Load1.getValue(R: 1));
3183 SDValue M = DAG.getMergeValues(Ops: {Aligned, NewChain}, dl);
3184 return M;
3185}
3186
3187SDValue
3188HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
3189 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
3190 auto *CY = dyn_cast<ConstantSDNode>(Val&: Y);
3191 if (!CY)
3192 return SDValue();
3193
3194 const SDLoc &dl(Op);
3195 SDVTList VTs = Op.getNode()->getVTList();
3196 assert(VTs.NumVTs == 2);
3197 assert(VTs.VTs[1] == MVT::i1);
3198 unsigned Opc = Op.getOpcode();
3199
3200 if (CY) {
3201 uint64_t VY = CY->getZExtValue();
3202 assert(VY != 0 && "This should have been folded");
3203 // X +/- 1
3204 if (VY != 1)
3205 return SDValue();
3206
3207 if (Opc == ISD::UADDO) {
3208 SDValue Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VTs.VTs[0], Ops: {X, Y});
3209 SDValue Ov = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: Op, RHS: getZero(dl, Ty: ty(Op), DAG),
3210 Cond: ISD::SETEQ);
3211 return DAG.getMergeValues(Ops: {Op, Ov}, dl);
3212 }
3213 if (Opc == ISD::USUBO) {
3214 SDValue Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: VTs.VTs[0], Ops: {X, Y});
3215 SDValue Ov = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: Op,
3216 RHS: DAG.getAllOnesConstant(DL: dl, VT: ty(Op)), Cond: ISD::SETEQ);
3217 return DAG.getMergeValues(Ops: {Op, Ov}, dl);
3218 }
3219 }
3220
3221 return SDValue();
3222}
3223
3224SDValue HexagonTargetLowering::LowerUAddSubOCarry(SDValue Op,
3225 SelectionDAG &DAG) const {
3226 const SDLoc &dl(Op);
3227 unsigned Opc = Op.getOpcode();
3228 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), C = Op.getOperand(i: 2);
3229
3230 if (Opc == ISD::UADDO_CARRY)
3231 return DAG.getNode(Opcode: HexagonISD::ADDC, DL: dl, VTList: Op.getNode()->getVTList(),
3232 Ops: { X, Y, C });
3233
3234 EVT CarryTy = C.getValueType();
3235 SDValue SubC = DAG.getNode(Opcode: HexagonISD::SUBC, DL: dl, VTList: Op.getNode()->getVTList(),
3236 Ops: { X, Y, DAG.getLogicalNOT(DL: dl, Val: C, VT: CarryTy) });
3237 SDValue Out[] = { SubC.getValue(R: 0),
3238 DAG.getLogicalNOT(DL: dl, Val: SubC.getValue(R: 1), VT: CarryTy) };
3239 return DAG.getMergeValues(Ops: Out, dl);
3240}
3241
3242SDValue
3243HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
3244 SDValue Chain = Op.getOperand(i: 0);
3245 SDValue Offset = Op.getOperand(i: 1);
3246 SDValue Handler = Op.getOperand(i: 2);
3247 SDLoc dl(Op);
3248 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
3249
3250 // Mark function as containing a call to EH_RETURN.
3251 HexagonMachineFunctionInfo *FuncInfo =
3252 DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
3253 FuncInfo->setHasEHReturn();
3254
3255 unsigned OffsetReg = Hexagon::R28;
3256
3257 SDValue StoreAddr =
3258 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: DAG.getRegister(Reg: Hexagon::R30, VT: PtrVT),
3259 N2: DAG.getIntPtrConstant(Val: 4, DL: dl));
3260 Chain = DAG.getStore(Chain, dl, Val: Handler, Ptr: StoreAddr, PtrInfo: MachinePointerInfo());
3261 Chain = DAG.getCopyToReg(Chain, dl, Reg: OffsetReg, N: Offset);
3262
3263 // Not needed we already use it as explicit input to EH_RETURN.
3264 // MF.getRegInfo().addLiveOut(OffsetReg);
3265
3266 return DAG.getNode(Opcode: HexagonISD::EH_RETURN, DL: dl, VT: MVT::Other, Operand: Chain);
3267}
3268
3269SDValue
3270HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3271 unsigned Opc = Op.getOpcode();
3272 // Handle INLINEASM first.
3273 if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
3274 return LowerINLINEASM(Op, DAG);
3275
3276 if (isHvxOperation(N: Op.getNode(), DAG)) {
3277 // If HVX lowering returns nothing, try the default lowering.
3278 if (SDValue V = LowerHvxOperation(Op, DAG))
3279 return V;
3280 }
3281
3282 switch (Opc) {
3283 default:
3284#ifndef NDEBUG
3285 Op.getNode()->dumpr(&DAG);
3286#endif
3287 llvm_unreachable("Should not custom lower this!");
3288
3289 case ISD::FDIV:
3290 return LowerFDIV(Op, DAG);
3291 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3292 case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
3293 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
3294 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
3295 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3296 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
3297 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3298 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
3299 case ISD::LOAD: return LowerLoad(Op, DAG);
3300 case ISD::STORE: return LowerStore(Op, DAG);
3301 case ISD::UADDO:
3302 case ISD::USUBO: return LowerUAddSubO(Op, DAG);
3303 case ISD::UADDO_CARRY:
3304 case ISD::USUBO_CARRY: return LowerUAddSubOCarry(Op, DAG);
3305 case ISD::SRA:
3306 case ISD::SHL:
3307 case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
3308 case ISD::ROTL: return LowerROTL(Op, DAG);
3309 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
3310 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
3311 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
3312 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
3313 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
3314 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
3315 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
3316 case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
3317 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
3318 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3319 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
3320 case ISD::VASTART: return LowerVASTART(Op, DAG);
3321 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3322 case ISD::SETCC: return LowerSETCC(Op, DAG);
3323 case ISD::VSELECT: return LowerVSELECT(Op, DAG);
3324 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3325 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
3326 case ISD::PREFETCH:
3327 return LowerPREFETCH(Op, DAG);
3328 break;
3329 }
3330
3331 return SDValue();
3332}
3333
3334void
3335HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
3336 SmallVectorImpl<SDValue> &Results,
3337 SelectionDAG &DAG) const {
3338 if (isHvxOperation(N, DAG)) {
3339 LowerHvxOperationWrapper(N, Results, DAG);
3340 if (!Results.empty())
3341 return;
3342 }
3343
3344 SDValue Op(N, 0);
3345 unsigned Opc = N->getOpcode();
3346
3347 switch (Opc) {
3348 case HexagonISD::SSAT:
3349 case HexagonISD::USAT:
3350 Results.push_back(Elt: opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG));
3351 break;
3352 case ISD::STORE:
3353 // We are only custom-lowering stores to verify the alignment of the
3354 // address if it is a compile-time constant. Since a store can be
3355 // modified during type-legalization (the value being stored may need
3356 // legalization), return empty Results here to indicate that we don't
3357 // really make any changes in the custom lowering.
3358 return;
3359 default:
3360 TargetLowering::LowerOperationWrapper(N, Results, DAG);
3361 break;
3362 }
3363}
3364
3365void
3366HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
3367 SmallVectorImpl<SDValue> &Results,
3368 SelectionDAG &DAG) const {
3369 if (isHvxOperation(N, DAG)) {
3370 ReplaceHvxNodeResults(N, Results, DAG);
3371 if (!Results.empty())
3372 return;
3373 }
3374
3375 const SDLoc &dl(N);
3376 switch (N->getOpcode()) {
3377 case ISD::SRL:
3378 case ISD::SRA:
3379 case ISD::SHL:
3380 return;
3381 case ISD::BITCAST:
3382 // Handle a bitcast from v8i1 to i8.
3383 if (N->getValueType(ResNo: 0) == MVT::i8) {
3384 if (N->getOperand(Num: 0).getValueType() == MVT::v8i1) {
3385 SDValue P = getInstr(MachineOpc: Hexagon::C2_tfrpr, dl, Ty: MVT::i32,
3386 Ops: N->getOperand(Num: 0), DAG);
3387 SDValue T = DAG.getAnyExtOrTrunc(Op: P, DL: dl, VT: MVT::i8);
3388 Results.push_back(Elt: T);
3389 }
3390 }
3391 break;
3392 }
3393}
3394
3395SDValue
3396HexagonTargetLowering::PerformDAGCombine(SDNode *N,
3397 DAGCombinerInfo &DCI) const {
3398 SDValue Op(N, 0);
3399 const SDLoc &dl(Op);
3400 unsigned Opc = Op.getOpcode();
3401
3402 // Combining transformations applicable for arbitrary vector sizes.
3403 if (DCI.isBeforeLegalizeOps()) {
3404 switch (Opc) {
3405 case ISD::VECREDUCE_ADD:
3406 if (SDValue V = splitVecReduceAdd(N, DAG&: DCI.DAG))
3407 return V;
3408 if (SDValue V = expandVecReduceAdd(N, DAG&: DCI.DAG))
3409 return V;
3410 return SDValue();
3411 case ISD::PARTIAL_REDUCE_SMLA:
3412 case ISD::PARTIAL_REDUCE_UMLA:
3413 case ISD::PARTIAL_REDUCE_SUMLA:
3414 if (SDValue V = splitExtendingPartialReduceMLA(N, DAG&: DCI.DAG))
3415 return V;
3416 return SDValue();
3417 }
3418 } else {
3419 switch (Opc) {
3420 case ISD::VSELECT: {
3421 // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3422 SDValue Cond = Op.getOperand(i: 0);
3423 if (Cond->getOpcode() == ISD::XOR) {
3424 SDValue C0 = Cond.getOperand(i: 0), C1 = Cond.getOperand(i: 1);
3425 if (C1->getOpcode() == HexagonISD::PTRUE) {
3426 SDValue VSel = DCI.DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0,
3427 N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 1));
3428 return VSel;
3429 }
3430 }
3431 return SDValue();
3432 }
3433 }
3434 }
3435
3436 if (isHvxOperation(N, DAG&: DCI.DAG)) {
3437 if (SDValue V = PerformHvxDAGCombine(N, DCI))
3438 return V;
3439 return SDValue();
3440 }
3441
3442 if (Opc == ISD::TRUNCATE) {
3443 SDValue Op0 = Op.getOperand(i: 0);
3444 // fold (truncate (build pair x, y)) -> (truncate x) or x
3445 if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3446 EVT TruncTy = Op.getValueType();
3447 SDValue Elem0 = Op0.getOperand(i: 0);
3448 // if we match the low element of the pair, just return it.
3449 if (Elem0.getValueType() == TruncTy)
3450 return Elem0;
3451 // otherwise, if the low part is still too large, apply the truncate.
3452 if (Elem0.getValueType().bitsGT(VT: TruncTy))
3453 return DCI.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncTy, Operand: Elem0);
3454 }
3455 }
3456
3457 if (DCI.isBeforeLegalizeOps())
3458 return SDValue();
3459
3460 switch (Opc) {
3461 case HexagonISD::P2D: {
3462 SDValue P = Op.getOperand(i: 0);
3463 switch (P.getOpcode()) {
3464 case HexagonISD::PTRUE:
3465 return DCI.DAG.getAllOnesConstant(DL: dl, VT: ty(Op));
3466 case HexagonISD::PFALSE:
3467 return getZero(dl, Ty: ty(Op), DAG&: DCI.DAG);
3468 default:
3469 break;
3470 }
3471 break;
3472 }
3473 case ISD::TRUNCATE: {
3474 SDValue Op0 = Op.getOperand(i: 0);
3475 // fold (truncate (build pair x, y)) -> (truncate x) or x
3476 if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3477 MVT TruncTy = ty(Op);
3478 SDValue Elem0 = Op0.getOperand(i: 0);
3479 // if we match the low element of the pair, just return it.
3480 if (ty(Op: Elem0) == TruncTy)
3481 return Elem0;
3482 // otherwise, if the low part is still too large, apply the truncate.
3483 if (ty(Op: Elem0).bitsGT(VT: TruncTy))
3484 return DCI.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncTy, Operand: Elem0);
3485 }
3486 break;
3487 }
3488 case ISD::OR: {
3489 // fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
3490 // if s >= 32
3491 auto fold0 = [&, this](SDValue Op) {
3492 if (ty(Op) != MVT::i64)
3493 return SDValue();
3494 SDValue Shl = Op.getOperand(i: 0);
3495 SDValue Zxt = Op.getOperand(i: 1);
3496 if (Shl.getOpcode() != ISD::SHL)
3497 std::swap(a&: Shl, b&: Zxt);
3498
3499 if (Shl.getOpcode() != ISD::SHL || Zxt.getOpcode() != ISD::ZERO_EXTEND)
3500 return SDValue();
3501
3502 SDValue Z = Zxt.getOperand(i: 0);
3503 auto *Amt = dyn_cast<ConstantSDNode>(Val: Shl.getOperand(i: 1));
3504 if (Amt && Amt->getZExtValue() >= 32 && ty(Op: Z).getSizeInBits() <= 32) {
3505 unsigned A = Amt->getZExtValue();
3506 SDValue S = Shl.getOperand(i: 0);
3507 SDValue T0 = DCI.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ty(Op: S), N1: S,
3508 N2: DCI.DAG.getConstant(Val: A - 32, DL: dl, VT: MVT::i32));
3509 SDValue T1 = DCI.DAG.getZExtOrTrunc(Op: T0, DL: dl, VT: MVT::i32);
3510 SDValue T2 = DCI.DAG.getZExtOrTrunc(Op: Z, DL: dl, VT: MVT::i32);
3511 return DCI.DAG.getNode(Opcode: HexagonISD::COMBINE, DL: dl, VT: MVT::i64, Ops: {T1, T2});
3512 }
3513 return SDValue();
3514 };
3515
3516 if (SDValue R = fold0(Op))
3517 return R;
3518 break;
3519 }
3520 }
3521
3522 return SDValue();
3523}
3524
3525/// Returns relocation base for the given PIC jumptable.
3526SDValue
3527HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3528 SelectionDAG &DAG) const {
3529 int Idx = cast<JumpTableSDNode>(Val&: Table)->getIndex();
3530 EVT VT = Table.getValueType();
3531 SDValue T = DAG.getTargetJumpTable(JTI: Idx, VT, TargetFlags: HexagonII::MO_PCREL);
3532 return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc(Table), VT, Operand: T);
3533}
3534
3535//===----------------------------------------------------------------------===//
3536// Inline Assembly Support
3537//===----------------------------------------------------------------------===//
3538
3539TargetLowering::ConstraintType
3540HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
3541 if (Constraint.size() == 1) {
3542 switch (Constraint[0]) {
3543 case 'q':
3544 case 'v':
3545 if (Subtarget.useHVXOps())
3546 return C_RegisterClass;
3547 break;
3548 case 'a':
3549 return C_RegisterClass;
3550 default:
3551 break;
3552 }
3553 }
3554 return TargetLowering::getConstraintType(Constraint);
3555}
3556
3557std::pair<unsigned, const TargetRegisterClass*>
3558HexagonTargetLowering::getRegForInlineAsmConstraint(
3559 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3560
3561 if (Constraint.size() == 1) {
3562 switch (Constraint[0]) {
3563 case 'r': // R0-R31
3564 switch (VT.SimpleTy) {
3565 default:
3566 return {0u, nullptr};
3567 case MVT::i1:
3568 case MVT::i8:
3569 case MVT::i16:
3570 case MVT::i32:
3571 case MVT::f32:
3572 return {0u, &Hexagon::IntRegsRegClass};
3573 case MVT::i64:
3574 case MVT::f64:
3575 return {0u, &Hexagon::DoubleRegsRegClass};
3576 }
3577 break;
3578 case 'a': // M0-M1
3579 if (VT != MVT::i32)
3580 return {0u, nullptr};
3581 return {0u, &Hexagon::ModRegsRegClass};
3582 case 'q': // q0-q3
3583 switch (VT.getSizeInBits()) {
3584 default:
3585 return {0u, nullptr};
3586 case 64:
3587 case 128:
3588 return {0u, &Hexagon::HvxQRRegClass};
3589 }
3590 break;
3591 case 'v': // V0-V31
3592 switch (VT.getSizeInBits()) {
3593 default:
3594 return {0u, nullptr};
3595 case 512:
3596 return {0u, &Hexagon::HvxVRRegClass};
3597 case 1024:
3598 if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3599 return {0u, &Hexagon::HvxVRRegClass};
3600 return {0u, &Hexagon::HvxWRRegClass};
3601 case 2048:
3602 return {0u, &Hexagon::HvxWRRegClass};
3603 }
3604 break;
3605 default:
3606 return {0u, nullptr};
3607 }
3608 }
3609
3610 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3611}
3612
3613/// isFPImmLegal - Returns true if the target can instruction select the
3614/// specified FP immediate natively. If false, the legalizer will
3615/// materialize the FP immediate as a load from a constant pool.
3616bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
3617 bool ForCodeSize) const {
3618 return true;
3619}
3620
3621/// Returns true if it is beneficial to convert a load of a constant
3622/// to just the constant itself.
3623bool HexagonTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
3624 Type *Ty) const {
3625 if (!ConstantLoadsToImm)
3626 return false;
3627
3628 assert(Ty->isIntegerTy());
3629 unsigned BitSize = Ty->getPrimitiveSizeInBits();
3630 return (BitSize > 0 && BitSize <= 64);
3631}
3632
3633/// isLegalAddressingMode - Return true if the addressing mode represented by
3634/// AM is legal for this target, for a load/store of the specified type.
3635bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3636 const AddrMode &AM, Type *Ty,
3637 unsigned AS, Instruction *I) const {
3638 if (Ty->isSized()) {
3639 // When LSR detects uses of the same base address to access different
3640 // types (e.g. unions), it will assume a conservative type for these
3641 // uses:
3642 // LSR Use: Kind=Address of void in addrspace(4294967295), ...
3643 // The type Ty passed here would then be "void". Skip the alignment
3644 // checks, but do not return false right away, since that confuses
3645 // LSR into crashing.
3646 Align A = DL.getABITypeAlign(Ty);
3647 // The base offset must be a multiple of the alignment.
3648 if (!isAligned(Lhs: A, SizeInBytes: AM.BaseOffs))
3649 return false;
3650 // The shifted offset must fit in 11 bits.
3651 if (!isInt<11>(x: AM.BaseOffs >> Log2(A)))
3652 return false;
3653 }
3654
3655 // No global is ever allowed as a base.
3656 if (AM.BaseGV)
3657 return false;
3658
3659 int Scale = AM.Scale;
3660 if (Scale < 0)
3661 Scale = -Scale;
3662 switch (Scale) {
3663 case 0: // No scale reg, "r+i", "r", or just "i".
3664 break;
3665 default: // No scaled addressing mode.
3666 return false;
3667 }
3668 return true;
3669}
3670
3671/// Return true if folding a constant offset with the given GlobalAddress is
3672/// legal. It is frequently not legal in PIC relocation models.
3673bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
3674 const {
3675 return HTM.getRelocationModel() == Reloc::Static;
3676}
3677
3678/// isLegalICmpImmediate - Return true if the specified immediate is legal
3679/// icmp immediate, that is the target has icmp instructions which can compare
3680/// a register against the immediate without having to materialize the
3681/// immediate into a register.
3682bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3683 return Imm >= -512 && Imm <= 511;
3684}
3685
3686/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3687/// for tail call optimization. Targets which want to do tail call
3688/// optimization should implement this function.
3689bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3690 SDValue Callee,
3691 CallingConv::ID CalleeCC,
3692 bool IsVarArg,
3693 bool IsCalleeStructRet,
3694 bool IsCallerStructRet,
3695 const SmallVectorImpl<ISD::OutputArg> &Outs,
3696 const SmallVectorImpl<SDValue> &OutVals,
3697 const SmallVectorImpl<ISD::InputArg> &Ins,
3698 SelectionDAG& DAG) const {
3699 const Function &CallerF = DAG.getMachineFunction().getFunction();
3700 CallingConv::ID CallerCC = CallerF.getCallingConv();
3701 bool CCMatch = CallerCC == CalleeCC;
3702
3703 // ***************************************************************************
3704 // Look for obvious safe cases to perform tail call optimization that do not
3705 // require ABI changes.
3706 // ***************************************************************************
3707
3708 // If this is a tail call via a function pointer, then don't do it!
3709 if (!isa<GlobalAddressSDNode>(Val: Callee) &&
3710 !isa<ExternalSymbolSDNode>(Val: Callee)) {
3711 return false;
3712 }
3713
3714 // Do not optimize if the calling conventions do not match and the conventions
3715 // used are not C or Fast.
3716 if (!CCMatch) {
3717 bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3718 bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3719 // If R & E, then ok.
3720 if (!R || !E)
3721 return false;
3722 }
3723
3724 // Do not tail call optimize vararg calls.
3725 if (IsVarArg)
3726 return false;
3727
3728 // Also avoid tail call optimization if either caller or callee uses struct
3729 // return semantics.
3730 if (IsCalleeStructRet || IsCallerStructRet)
3731 return false;
3732
3733 // In addition to the cases above, we also disable Tail Call Optimization if
3734 // the calling convention code that at least one outgoing argument needs to
3735 // go on the stack. We cannot check that here because at this point that
3736 // information is not available.
3737 return true;
3738}
3739
3740/// Returns the target specific optimal type for load and store operations as
3741/// a result of memset, memcpy, and memmove lowering.
3742///
3743/// If DstAlign is zero that means it's safe to destination alignment can
3744/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3745/// a need to check it against alignment requirement, probably because the
3746/// source does not need to be loaded. If 'IsMemset' is true, that means it's
3747/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3748/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3749/// does not need to be loaded. It returns EVT::Other if the type should be
3750/// determined using generic target-independent logic.
3751EVT HexagonTargetLowering::getOptimalMemOpType(
3752 LLVMContext &Context, const MemOp &Op,
3753 const AttributeList &FuncAttributes) const {
3754 if (Op.size() >= 8 && Op.isAligned(AlignCheck: Align(8)))
3755 return MVT::i64;
3756 if (Op.size() >= 4 && Op.isAligned(AlignCheck: Align(4)))
3757 return MVT::i32;
3758 if (Op.size() >= 2 && Op.isAligned(AlignCheck: Align(2)))
3759 return MVT::i16;
3760 return MVT::Other;
3761}
3762
3763// The helpers below are versions of llvm::getShuffleReduction and
3764// llvm::getOrderedReduction, adapted to use during DAG passes and simplified as
3765// follows:
3766// - ICmp and FCmp are not handled;
3767// - in every step in getShuffleReduction, the input is split into halves (not
3768// pairwise).
3769
3770static SDValue getOrderedReduction(SDValue Vec, unsigned Op,
3771 SelectionDAG &DAG) {
3772 assert(Op != Instruction::ICmp && Op != Instruction::FCmp);
3773
3774 EVT VT = Vec.getValueType();
3775 EVT EltT = VT.getVectorElementType();
3776 unsigned VF = VT.getVectorNumElements();
3777 assert(VF > 0 &&
3778 "Reduction emission only supported for non-zero length vectors!");
3779
3780 SDLoc DL(Vec);
3781 SDValue Result = DAG.getExtractVectorElt(DL, VT: EltT, Vec, Idx: 0);
3782 for (unsigned ExtractIdx = 1; ExtractIdx < VF; ++ExtractIdx) {
3783 SDValue Ext = DAG.getExtractVectorElt(DL, VT: EltT, Vec, Idx: ExtractIdx);
3784 Result = DAG.getNode(Opcode: Op, DL, VT: EltT, Ops: {Result, Ext});
3785 }
3786
3787 return Result;
3788}
3789
3790static SDValue getShuffleReduction(SDValue Vec, unsigned Op,
3791 SelectionDAG &DAG) {
3792 assert(Op != Instruction::ICmp && Op != Instruction::FCmp);
3793
3794 EVT VT = Vec.getValueType();
3795 unsigned VF = VT.getVectorNumElements();
3796 if (VF == 0)
3797 llvm_unreachable("Vector must be non-zero length");
3798 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
3799 // and vector ops, reducing the set of values being computed by half each
3800 // round.
3801 assert(isPowerOf2_32(VF) &&
3802 "Reduction emission only supported for pow2 vectors!");
3803
3804 SDLoc DL(Vec);
3805 // TODO: Is it correct to create double-vector shuffle and fill 3/4 of it with
3806 // undefs?
3807 SmallVector<int, 32> ShuffleMask(VF);
3808 for (unsigned i = VF; i > 1; i >>= 1) {
3809 // Move the upper half of the vector to the lower half.
3810 for (unsigned j = 0; j != i / 2; ++j)
3811 ShuffleMask[j] = i / 2 + j;
3812 // Fill the rest of the mask with undef.
3813 std::fill(first: &ShuffleMask[i / 2], last: ShuffleMask.end(), value: -1);
3814
3815 SDValue Shuf =
3816 DAG.getVectorShuffle(VT, dl: DL, N1: Vec, N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
3817
3818 Vec = DAG.getNode(Opcode: Op, DL, VT, Ops: {Vec, Shuf});
3819 }
3820 // The result is in the first element of the vector.
3821 return DAG.getExtractVectorElt(DL, VT: VT.getVectorElementType(), Vec, Idx: 0);
3822}
3823
3824SDValue HexagonTargetLowering::expandVecReduceAdd(SDNode *N,
3825 SelectionDAG &DAG) const {
3826 // Since we disabled automatic reduction expansion, generate log2 ladder code
3827 // if the vector is of a power-of-two length.
3828 SDValue Input = N->getOperand(Num: 0);
3829 if (isPowerOf2_32(Value: Input.getValueType().getVectorNumElements()))
3830 return getShuffleReduction(Vec: Input, Op: ISD::ADD, DAG);
3831 // Otherwise, reduction will be scalarized.
3832 return getOrderedReduction(Vec: Input, Op: ISD::ADD, DAG);
3833}
3834
3835bool HexagonTargetLowering::allowsMemoryAccess(
3836 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
3837 Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
3838 if (!VT.isSimple())
3839 return false;
3840 MVT SVT = VT.getSimpleVT();
3841 if (Subtarget.isHVXVectorType(VecTy: SVT, IncludeBool: true))
3842 return allowsHvxMemoryAccess(VecTy: SVT, Flags, Fast);
3843 return TargetLoweringBase::allowsMemoryAccess(
3844 Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
3845}
3846
3847bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3848 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3849 unsigned *Fast) const {
3850 if (!VT.isSimple())
3851 return false;
3852 MVT SVT = VT.getSimpleVT();
3853 if (Subtarget.isHVXVectorType(VecTy: SVT, IncludeBool: true))
3854 return allowsHvxMisalignedMemoryAccesses(VecTy: SVT, Flags, Fast);
3855 if (Fast)
3856 *Fast = 0;
3857 return false;
3858}
3859
3860std::pair<const TargetRegisterClass*, uint8_t>
3861HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3862 MVT VT) const {
3863 if (Subtarget.isHVXVectorType(VecTy: VT, IncludeBool: true)) {
3864 unsigned BitWidth = VT.getSizeInBits();
3865 unsigned VecWidth = Subtarget.getVectorLength() * 8;
3866
3867 if (VT.getVectorElementType() == MVT::i1)
3868 return std::make_pair(x: &Hexagon::HvxQRRegClass, y: 1);
3869 if (BitWidth == VecWidth)
3870 return std::make_pair(x: &Hexagon::HvxVRRegClass, y: 1);
3871 assert(BitWidth == 2 * VecWidth);
3872 return std::make_pair(x: &Hexagon::HvxWRRegClass, y: 1);
3873 }
3874
3875 return TargetLowering::findRepresentativeClass(TRI, VT);
3876}
3877
3878bool HexagonTargetLowering::shouldReduceLoadWidth(
3879 SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
3880 std::optional<unsigned> ByteOffset) const {
3881 // TODO: This may be worth removing. Check regression tests for diffs.
3882 if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT,
3883 ByteOffset))
3884 return false;
3885
3886 auto *L = cast<LoadSDNode>(Val: Load);
3887 std::pair<SDValue, int> BO = getBaseAndOffset(Addr: L->getBasePtr());
3888 // Small-data object, do not shrink.
3889 if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3890 return false;
3891 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: BO.first)) {
3892 auto &HTM = static_cast<const HexagonTargetMachine &>(getTargetMachine());
3893 const auto *GO = dyn_cast_or_null<const GlobalObject>(Val: GA->getGlobal());
3894 return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, TM: HTM);
3895 }
3896 return true;
3897}
3898
3899void HexagonTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
3900 SDNode *Node) const {
3901 AdjustHvxInstrPostInstrSelection(MI, Node);
3902}
3903
3904Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
3905 Type *ValueTy, Value *Addr,
3906 AtomicOrdering Ord) const {
3907 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
3908 assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3909 Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3910 : Intrinsic::hexagon_L4_loadd_locked;
3911
3912 Value *Call =
3913 Builder.CreateIntrinsic(ID: IntID, Args: Addr, /*FMFSource=*/nullptr, Name: "larx");
3914
3915 return Builder.CreateBitCast(V: Call, DestTy: ValueTy);
3916}
3917
3918/// Perform a store-conditional operation to Addr. Return the status of the
3919/// store. This should be 0 if the store succeeded, non-zero otherwise.
3920Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
3921 Value *Val, Value *Addr,
3922 AtomicOrdering Ord) const {
3923 BasicBlock *BB = Builder.GetInsertBlock();
3924 Module *M = BB->getParent()->getParent();
3925 Type *Ty = Val->getType();
3926 unsigned SZ = Ty->getPrimitiveSizeInBits();
3927
3928 Type *CastTy = Builder.getIntNTy(N: SZ);
3929 assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3930 Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3931 : Intrinsic::hexagon_S4_stored_locked;
3932
3933 Val = Builder.CreateBitCast(V: Val, DestTy: CastTy);
3934
3935 Value *Call = Builder.CreateIntrinsic(ID: IntID, Args: {Addr, Val},
3936 /*FMFSource=*/nullptr, Name: "stcx");
3937 Value *Cmp = Builder.CreateICmpEQ(LHS: Call, RHS: Builder.getInt32(C: 0), Name: "");
3938 Value *Ext = Builder.CreateZExt(V: Cmp, DestTy: Type::getInt32Ty(C&: M->getContext()));
3939 return Ext;
3940}
3941
3942TargetLowering::AtomicExpansionKind
3943HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3944 // Do not expand loads and stores that don't exceed 64 bits.
3945 return LI->getType()->getPrimitiveSizeInBits() > 64
3946 ? AtomicExpansionKind::LLOnly
3947 : AtomicExpansionKind::None;
3948}
3949
3950TargetLowering::AtomicExpansionKind
3951HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3952 // Do not expand loads and stores that don't exceed 64 bits.
3953 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
3954 ? AtomicExpansionKind::Expand
3955 : AtomicExpansionKind::None;
3956}
3957
3958TargetLowering::AtomicExpansionKind
3959HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3960 const AtomicCmpXchgInst *AI) const {
3961 return AtomicExpansionKind::LLSC;
3962}
3963
3964MachineBasicBlock *HexagonTargetLowering::EmitInstrWithCustomInserter(
3965 MachineInstr &MI, MachineBasicBlock *BB) const {
3966 switch (MI.getOpcode()) {
3967 case TargetOpcode::PATCHABLE_EVENT_CALL:
3968 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3969 // These are lowered in the AsmPrinter.
3970 return BB;
3971 default:
3972 llvm_unreachable("Unexpected instruction with custom inserter");
3973 }
3974}
3975
3976MachineInstr *
3977HexagonTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
3978 MachineBasicBlock::instr_iterator &MBBI,
3979 const TargetInstrInfo *TII) const {
3980 assert(MBBI->isCall() && MBBI->getCFIType() &&
3981 "Invalid call instruction for a KCFI check");
3982
3983 switch (MBBI->getOpcode()) {
3984 case Hexagon::J2_callr:
3985 case Hexagon::PS_callr_nr:
3986 break;
3987 default:
3988 llvm_unreachable("Unexpected CFI call opcode");
3989 }
3990
3991 MachineOperand &Target = MBBI->getOperand(i: 0);
3992 assert(Target.isReg() && "Invalid target operand for an indirect call");
3993 Target.setIsRenamable(false);
3994
3995 return BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode: Hexagon::KCFI_CHECK))
3996 .addReg(RegNo: Target.getReg())
3997 .addImm(Val: MBBI->getCFIType())
3998 .getInstr();
3999}
4000
4001bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
4002 const Instruction &AndI) const {
4003 // Only sink 'and' mask to cmp use block if it is masking a single bit since
4004 // this will fold the and/cmp/br into a single tstbit instruction.
4005 ConstantInt *Mask = dyn_cast<ConstantInt>(Val: AndI.getOperand(i: 1));
4006 if (!Mask)
4007 return false;
4008 return Mask->getValue().isPowerOf2();
4009}
4010
4011// Check if the result of the node is only used as a return value, as
4012// otherwise we can't perform a tail-call.
4013bool HexagonTargetLowering::isUsedByReturnOnly(SDNode *N,
4014 SDValue &Chain) const {
4015 if (N->getNumValues() != 1)
4016 return false;
4017 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
4018 return false;
4019
4020 SDNode *Copy = *N->user_begin();
4021
4022 if (Copy->getOpcode() == ISD::BITCAST) {
4023 return isUsedByReturnOnly(N: Copy, Chain);
4024 }
4025
4026 if (Copy->getOpcode() != ISD::CopyToReg) {
4027 return false;
4028 }
4029
4030 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
4031 // isn't safe to perform a tail call.
4032 if (Copy->getOperand(Num: Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
4033 return false;
4034
4035 // The copy must be used by a HexagonISD::RET_GLUE, and nothing else.
4036 bool HasRet = false;
4037 for (SDNode *Node : Copy->users()) {
4038 if (Node->getOpcode() != HexagonISD::RET_GLUE)
4039 return false;
4040 HasRet = true;
4041 }
4042 if (!HasRet)
4043 return false;
4044
4045 Chain = Copy->getOperand(Num: 0);
4046 return true;
4047}
4048
4049bool HexagonTargetLowering::hasInlineStackProbe(
4050 const MachineFunction &MF) const {
4051 if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
4052 return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
4053 "inline-asm";
4054 return false;
4055}
4056
4057unsigned HexagonTargetLowering::getStackProbeSize(const MachineFunction &MF,
4058 Align StackAlign) const {
4059 const Function &Fn = MF.getFunction();
4060 unsigned StackProbeSize =
4061 Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: 4096);
4062 // Round down to the stack alignment.
4063 StackProbeSize = alignDown(Value: StackProbeSize, Align: StackAlign.value());
4064 return StackProbeSize ? StackProbeSize : StackAlign.value();
4065}
4066