| 1 | //===-- VEISelLowering.h - VE DAG Lowering Interface ------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the interfaces that VE uses to lower LLVM code into a |
| 10 | // selection DAG. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_LIB_TARGET_VE_VEISELLOWERING_H |
| 15 | #define LLVM_LIB_TARGET_VE_VEISELLOWERING_H |
| 16 | |
| 17 | #include "VE.h" |
| 18 | #include "llvm/CodeGen/TargetLowering.h" |
| 19 | |
| 20 | namespace llvm { |
| 21 | class VESubtarget; |
| 22 | |
| 23 | namespace VEISD { |
| 24 | enum NodeType : unsigned { |
| 25 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
| 26 | |
| 27 | CMPI, // Compare between two signed integer values. |
| 28 | CMPU, // Compare between two unsigned integer values. |
| 29 | CMPF, // Compare between two floating-point values. |
| 30 | CMPQ, // Compare between two quad floating-point values. |
| 31 | CMOV, // Select between two values using the result of comparison. |
| 32 | |
| 33 | CALL, // A call instruction. |
| 34 | EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. |
| 35 | EH_SJLJ_SETJMP, // SjLj exception handling setjmp. |
| 36 | EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. |
| 37 | GETFUNPLT, // Load function address through %plt insturction. |
| 38 | GETTLSADDR, // Load address for TLS access. |
| 39 | GETSTACKTOP, // Retrieve address of stack top (first address of |
| 40 | // locals and temporaries). |
| 41 | GLOBAL_BASE_REG, // Global base reg for PIC. |
| 42 | Hi, // Hi/Lo operations, typically on a global address. |
| 43 | Lo, // Hi/Lo operations, typically on a global address. |
| 44 | RET_GLUE, // Return with a flag operand. |
| 45 | TS1AM, // A TS1AM instruction used for 1/2 bytes swap. |
| 46 | VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector. |
| 47 | VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector. |
| 48 | // 0: v512 vector, 1: AVL |
| 49 | VEC_PACK, // pack a lo and a hi vector into one v512 vector |
| 50 | // 0: v256 lo vector, 1: v256 hi vector, 2: AVL |
| 51 | |
| 52 | VEC_BROADCAST, // A vector broadcast instruction. |
| 53 | // 0: scalar value, 1: VL |
| 54 | REPL_I32, |
| 55 | REPL_F32, // Replicate subregister to other half. |
| 56 | |
| 57 | // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of |
| 58 | // data, whereas the raw EVL coming in from VP nodes always refers to number |
| 59 | // of elements, regardless of their size. |
| 60 | LEGALAVL, |
| 61 | |
| 62 | // VVP_* nodes. |
| 63 | #define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME, |
| 64 | #include "VVPNodes.def" |
| 65 | }; |
| 66 | } |
| 67 | |
| 68 | /// Convert a DAG integer condition code to a VE ICC condition. |
| 69 | inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) { |
| 70 | switch (CC) { |
| 71 | default: |
| 72 | llvm_unreachable("Unknown integer condition code!" ); |
| 73 | case ISD::SETEQ: |
| 74 | return VECC::CC_IEQ; |
| 75 | case ISD::SETNE: |
| 76 | return VECC::CC_INE; |
| 77 | case ISD::SETLT: |
| 78 | return VECC::CC_IL; |
| 79 | case ISD::SETGT: |
| 80 | return VECC::CC_IG; |
| 81 | case ISD::SETLE: |
| 82 | return VECC::CC_ILE; |
| 83 | case ISD::SETGE: |
| 84 | return VECC::CC_IGE; |
| 85 | case ISD::SETULT: |
| 86 | return VECC::CC_IL; |
| 87 | case ISD::SETULE: |
| 88 | return VECC::CC_ILE; |
| 89 | case ISD::SETUGT: |
| 90 | return VECC::CC_IG; |
| 91 | case ISD::SETUGE: |
| 92 | return VECC::CC_IGE; |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /// Convert a DAG floating point condition code to a VE FCC condition. |
| 97 | inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) { |
| 98 | switch (CC) { |
| 99 | default: |
| 100 | llvm_unreachable("Unknown fp condition code!" ); |
| 101 | case ISD::SETFALSE: |
| 102 | return VECC::CC_AF; |
| 103 | case ISD::SETEQ: |
| 104 | case ISD::SETOEQ: |
| 105 | return VECC::CC_EQ; |
| 106 | case ISD::SETNE: |
| 107 | case ISD::SETONE: |
| 108 | return VECC::CC_NE; |
| 109 | case ISD::SETLT: |
| 110 | case ISD::SETOLT: |
| 111 | return VECC::CC_L; |
| 112 | case ISD::SETGT: |
| 113 | case ISD::SETOGT: |
| 114 | return VECC::CC_G; |
| 115 | case ISD::SETLE: |
| 116 | case ISD::SETOLE: |
| 117 | return VECC::CC_LE; |
| 118 | case ISD::SETGE: |
| 119 | case ISD::SETOGE: |
| 120 | return VECC::CC_GE; |
| 121 | case ISD::SETO: |
| 122 | return VECC::CC_NUM; |
| 123 | case ISD::SETUO: |
| 124 | return VECC::CC_NAN; |
| 125 | case ISD::SETUEQ: |
| 126 | return VECC::CC_EQNAN; |
| 127 | case ISD::SETUNE: |
| 128 | return VECC::CC_NENAN; |
| 129 | case ISD::SETULT: |
| 130 | return VECC::CC_LNAN; |
| 131 | case ISD::SETUGT: |
| 132 | return VECC::CC_GNAN; |
| 133 | case ISD::SETULE: |
| 134 | return VECC::CC_LENAN; |
| 135 | case ISD::SETUGE: |
| 136 | return VECC::CC_GENAN; |
| 137 | case ISD::SETTRUE: |
| 138 | return VECC::CC_AT; |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | /// getImmVal - get immediate representation of integer value |
| 143 | inline static uint64_t getImmVal(const ConstantSDNode *N) { |
| 144 | return N->getSExtValue(); |
| 145 | } |
| 146 | |
| 147 | /// getFpImmVal - get immediate representation of floating point value |
| 148 | inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) { |
| 149 | const APInt &Imm = N->getValueAPF().bitcastToAPInt(); |
| 150 | uint64_t Val = Imm.getZExtValue(); |
| 151 | if (Imm.getBitWidth() == 32) { |
| 152 | // Immediate value of float place places at higher bits on VE. |
| 153 | Val <<= 32; |
| 154 | } |
| 155 | return Val; |
| 156 | } |
| 157 | |
| 158 | class VECustomDAG; |
| 159 | |
| 160 | class VETargetLowering : public TargetLowering { |
| 161 | const VESubtarget *Subtarget; |
| 162 | |
| 163 | void initRegisterClasses(); |
| 164 | void initSPUActions(); |
| 165 | void initVPUActions(); |
| 166 | |
| 167 | public: |
| 168 | VETargetLowering(const TargetMachine &TM, const VESubtarget &STI); |
| 169 | |
| 170 | const char *getTargetNodeName(unsigned Opcode) const override; |
| 171 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { |
| 172 | return MVT::i32; |
| 173 | } |
| 174 | |
| 175 | Register getRegisterByName(const char *RegName, LLT VT, |
| 176 | const MachineFunction &MF) const override; |
| 177 | |
| 178 | /// getSetCCResultType - Return the ISD::SETCC ValueType |
| 179 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
| 180 | EVT VT) const override; |
| 181 | |
| 182 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
| 183 | bool isVarArg, |
| 184 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 185 | const SDLoc &dl, SelectionDAG &DAG, |
| 186 | SmallVectorImpl<SDValue> &InVals) const override; |
| 187 | |
| 188 | SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, |
| 189 | SmallVectorImpl<SDValue> &InVals) const override; |
| 190 | |
| 191 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
| 192 | bool isVarArg, |
| 193 | const SmallVectorImpl<ISD::OutputArg> &ArgsFlags, |
| 194 | LLVMContext &Context, |
| 195 | const Type *RetTy) const override; |
| 196 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 197 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 198 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, |
| 199 | SelectionDAG &DAG) const override; |
| 200 | |
| 201 | /// Helper functions for atomic operations. |
| 202 | bool shouldInsertFencesForAtomic(const Instruction *I) const override { |
| 203 | // VE uses release consistency, so need fence for each atomics. |
| 204 | return true; |
| 205 | } |
| 206 | Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, |
| 207 | AtomicOrdering Ord) const override; |
| 208 | Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, |
| 209 | AtomicOrdering Ord) const override; |
| 210 | TargetLoweringBase::AtomicExpansionKind |
| 211 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
| 212 | ISD::NodeType getExtendForAtomicOps() const override { |
| 213 | return ISD::ANY_EXTEND; |
| 214 | } |
| 215 | |
| 216 | /// Custom Lower { |
| 217 | TargetLoweringBase::LegalizeAction |
| 218 | getCustomOperationAction(SDNode &) const override; |
| 219 | |
| 220 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
| 221 | unsigned getJumpTableEncoding() const override; |
| 222 | const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
| 223 | const MachineBasicBlock *MBB, |
| 224 | unsigned Uid, |
| 225 | MCContext &Ctx) const override; |
| 226 | SDValue getPICJumpTableRelocBase(SDValue Table, |
| 227 | SelectionDAG &DAG) const override; |
| 228 | // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only |
| 229 | // EK_LabelDifference32. |
| 230 | |
| 231 | SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; |
| 232 | SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const; |
| 233 | SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
| 234 | SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
| 235 | SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| 236 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; |
| 237 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
| 238 | SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; |
| 239 | SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
| 240 | SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 241 | SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
| 242 | SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
| 243 | SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
| 244 | SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
| 245 | SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; |
| 246 | SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
| 247 | SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
| 248 | |
| 249 | SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 250 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
| 251 | SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
| 252 | /// } Custom Lower |
| 253 | |
| 254 | /// Replace the results of node with an illegal result |
| 255 | /// type with new values built out of custom code. |
| 256 | /// |
| 257 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
| 258 | SelectionDAG &DAG) const override; |
| 259 | |
| 260 | /// Custom Inserter { |
| 261 | MachineBasicBlock * |
| 262 | EmitInstrWithCustomInserter(MachineInstr &MI, |
| 263 | MachineBasicBlock *MBB) const override; |
| 264 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, |
| 265 | MachineBasicBlock *MBB) const; |
| 266 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, |
| 267 | MachineBasicBlock *MBB) const; |
| 268 | MachineBasicBlock *emitSjLjDispatchBlock(MachineInstr &MI, |
| 269 | MachineBasicBlock *BB) const; |
| 270 | |
| 271 | void setupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, |
| 272 | MachineBasicBlock *DispatchBB, int FI, |
| 273 | int Offset) const; |
| 274 | // Setup basic block address. |
| 275 | Register prepareMBB(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
| 276 | MachineBasicBlock *TargetBB, const DebugLoc &DL) const; |
| 277 | // Prepare function/variable address. |
| 278 | Register prepareSymbol(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
| 279 | StringRef Symbol, const DebugLoc &DL, bool IsLocal, |
| 280 | bool IsCall) const; |
| 281 | /// } Custom Inserter |
| 282 | |
| 283 | /// VVP Lowering { |
| 284 | SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const; |
| 285 | SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG &) const; |
| 286 | SDValue lowerVVP_GATHER_SCATTER(SDValue Op, VECustomDAG &) const; |
| 287 | |
| 288 | SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const; |
| 289 | SDValue legalizeInternalLoadStoreOp(SDValue Op, VECustomDAG &CDAG) const; |
| 290 | SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const; |
| 291 | SDValue splitPackedLoadStore(SDValue Op, VECustomDAG &CDAG) const; |
| 292 | SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const; |
| 293 | SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const; |
| 294 | /// } VVPLowering |
| 295 | |
| 296 | /// Custom DAGCombine { |
| 297 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
| 298 | |
| 299 | SDValue combineSelect(SDNode *N, DAGCombinerInfo &DCI) const; |
| 300 | SDValue combineSelectCC(SDNode *N, DAGCombinerInfo &DCI) const; |
| 301 | SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; |
| 302 | /// } Custom DAGCombine |
| 303 | |
| 304 | SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; |
| 305 | SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, |
| 306 | SelectionDAG &DAG) const; |
| 307 | SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; |
| 308 | |
| 309 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
| 310 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
| 311 | bool ForCodeSize) const override; |
| 312 | /// Returns true if the target allows unaligned memory accesses of the |
| 313 | /// specified type. |
| 314 | bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A, |
| 315 | MachineMemOperand::Flags Flags, |
| 316 | unsigned *Fast) const override; |
| 317 | |
| 318 | /// Inline Assembly { |
| 319 | |
| 320 | ConstraintType getConstraintType(StringRef Constraint) const override; |
| 321 | std::pair<unsigned, const TargetRegisterClass *> |
| 322 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| 323 | StringRef Constraint, MVT VT) const override; |
| 324 | |
| 325 | /// } Inline Assembly |
| 326 | |
| 327 | /// Target Optimization { |
| 328 | |
| 329 | // Return lower limit for number of blocks in a jump table. |
| 330 | unsigned getMinimumJumpTableEntries() const override; |
| 331 | |
| 332 | // SX-Aurora VE's s/udiv is 5-9 times slower than multiply. |
| 333 | bool isIntDivCheap(EVT, AttributeList) const override { return false; } |
| 334 | // VE doesn't have rem. |
| 335 | bool hasStandaloneRem(EVT) const override { return false; } |
| 336 | // VE LDZ instruction returns 64 if the input is zero. |
| 337 | bool isCheapToSpeculateCtlz(Type *) const override { return true; } |
| 338 | // VE LDZ instruction is fast. |
| 339 | bool isCtlzFast() const override { return true; } |
| 340 | // VE has NND instruction. |
| 341 | bool hasAndNot(SDValue Y) const override; |
| 342 | |
| 343 | /// } Target Optimization |
| 344 | }; |
| 345 | } // namespace llvm |
| 346 | |
| 347 | #endif // LLVM_LIB_TARGET_VE_VEISELLOWERING_H |
| 348 | |