| 1 | //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This file defines a DAG pattern matching instruction selector for X86, | 
|---|
| 10 | // converting from a legalized dag to a X86 dag. | 
|---|
| 11 | // | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 |  | 
|---|
| 14 | #include "X86ISelDAGToDAG.h" | 
|---|
| 15 | #include "X86.h" | 
|---|
| 16 | #include "X86MachineFunctionInfo.h" | 
|---|
| 17 | #include "X86Subtarget.h" | 
|---|
| 18 | #include "X86TargetMachine.h" | 
|---|
| 19 | #include "llvm/ADT/Statistic.h" | 
|---|
| 20 | #include "llvm/CodeGen/MachineModuleInfo.h" | 
|---|
| 21 | #include "llvm/CodeGen/SelectionDAGISel.h" | 
|---|
| 22 | #include "llvm/Config/llvm-config.h" | 
|---|
| 23 | #include "llvm/IR/ConstantRange.h" | 
|---|
| 24 | #include "llvm/IR/Function.h" | 
|---|
| 25 | #include "llvm/IR/Instructions.h" | 
|---|
| 26 | #include "llvm/IR/Intrinsics.h" | 
|---|
| 27 | #include "llvm/IR/IntrinsicsX86.h" | 
|---|
| 28 | #include "llvm/IR/Module.h" | 
|---|
| 29 | #include "llvm/IR/Type.h" | 
|---|
| 30 | #include "llvm/Support/Debug.h" | 
|---|
| 31 | #include "llvm/Support/ErrorHandling.h" | 
|---|
| 32 | #include "llvm/Support/KnownBits.h" | 
|---|
| 33 | #include "llvm/Support/MathExtras.h" | 
|---|
| 34 | #include <cstdint> | 
|---|
| 35 |  | 
|---|
| 36 | using namespace llvm; | 
|---|
| 37 |  | 
|---|
| 38 | #define DEBUG_TYPE "x86-isel" | 
|---|
| 39 | #define PASS_NAME "X86 DAG->DAG Instruction Selection" | 
|---|
| 40 |  | 
|---|
| 41 | STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); | 
|---|
| 42 |  | 
|---|
| 43 | static cl::opt<bool> AndImmShrink( "x86-and-imm-shrink", cl::init(Val: true), | 
|---|
| 44 | cl::desc( "Enable setting constant bits to reduce size of mask immediates"), | 
|---|
| 45 | cl::Hidden); | 
|---|
| 46 |  | 
|---|
| 47 | static cl::opt<bool> EnablePromoteAnyextLoad( | 
|---|
| 48 | "x86-promote-anyext-load", cl::init(Val: true), | 
|---|
| 49 | cl::desc( "Enable promoting aligned anyext load to wider load"), cl::Hidden); | 
|---|
| 50 |  | 
|---|
| 51 | extern cl::opt<bool> IndirectBranchTracking; | 
|---|
| 52 |  | 
|---|
| 53 | //===----------------------------------------------------------------------===// | 
|---|
| 54 | //                      Pattern Matcher Implementation | 
|---|
| 55 | //===----------------------------------------------------------------------===// | 
|---|
| 56 |  | 
|---|
| 57 | namespace { | 
|---|
| 58 | /// This corresponds to X86AddressMode, but uses SDValue's instead of register | 
|---|
| 59 | /// numbers for the leaves of the matched tree. | 
|---|
| 60 | struct X86ISelAddressMode { | 
|---|
| 61 | enum { | 
|---|
| 62 | RegBase, | 
|---|
| 63 | FrameIndexBase | 
|---|
| 64 | } BaseType = RegBase; | 
|---|
| 65 |  | 
|---|
| 66 | // This is really a union, discriminated by BaseType! | 
|---|
| 67 | SDValue Base_Reg; | 
|---|
| 68 | int Base_FrameIndex = 0; | 
|---|
| 69 |  | 
|---|
| 70 | unsigned Scale = 1; | 
|---|
| 71 | SDValue IndexReg; | 
|---|
| 72 | int32_t Disp = 0; | 
|---|
| 73 | SDValue Segment; | 
|---|
| 74 | const GlobalValue *GV = nullptr; | 
|---|
| 75 | const Constant *CP = nullptr; | 
|---|
| 76 | const BlockAddress *BlockAddr = nullptr; | 
|---|
| 77 | const char *ES = nullptr; | 
|---|
| 78 | MCSymbol *MCSym = nullptr; | 
|---|
| 79 | int JT = -1; | 
|---|
| 80 | Align Alignment;            // CP alignment. | 
|---|
| 81 | unsigned char SymbolFlags = X86II::MO_NO_FLAG;  // X86II::MO_* | 
|---|
| 82 | bool NegateIndex = false; | 
|---|
| 83 |  | 
|---|
| 84 | X86ISelAddressMode() = default; | 
|---|
| 85 |  | 
|---|
| 86 | bool hasSymbolicDisplacement() const { | 
|---|
| 87 | return GV != nullptr || CP != nullptr || ES != nullptr || | 
|---|
| 88 | MCSym != nullptr || JT != -1 || BlockAddr != nullptr; | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | bool hasBaseOrIndexReg() const { | 
|---|
| 92 | return BaseType == FrameIndexBase || | 
|---|
| 93 | IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | /// Return true if this addressing mode is already RIP-relative. | 
|---|
| 97 | bool isRIPRelative() const { | 
|---|
| 98 | if (BaseType != RegBase) return false; | 
|---|
| 99 | if (RegisterSDNode *RegNode = | 
|---|
| 100 | dyn_cast_or_null<RegisterSDNode>(Val: Base_Reg.getNode())) | 
|---|
| 101 | return RegNode->getReg() == X86::RIP; | 
|---|
| 102 | return false; | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | void setBaseReg(SDValue Reg) { | 
|---|
| 106 | BaseType = RegBase; | 
|---|
| 107 | Base_Reg = Reg; | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|---|
| 111 | void dump(SelectionDAG *DAG = nullptr) { | 
|---|
| 112 | dbgs() << "X86ISelAddressMode "<< this << '\n'; | 
|---|
| 113 | dbgs() << "Base_Reg "; | 
|---|
| 114 | if (Base_Reg.getNode()) | 
|---|
| 115 | Base_Reg.getNode()->dump(DAG); | 
|---|
| 116 | else | 
|---|
| 117 | dbgs() << "nul\n"; | 
|---|
| 118 | if (BaseType == FrameIndexBase) | 
|---|
| 119 | dbgs() << " Base.FrameIndex "<< Base_FrameIndex << '\n'; | 
|---|
| 120 | dbgs() << " Scale "<< Scale << '\n' | 
|---|
| 121 | << "IndexReg "; | 
|---|
| 122 | if (NegateIndex) | 
|---|
| 123 | dbgs() << "negate "; | 
|---|
| 124 | if (IndexReg.getNode()) | 
|---|
| 125 | IndexReg.getNode()->dump(DAG); | 
|---|
| 126 | else | 
|---|
| 127 | dbgs() << "nul\n"; | 
|---|
| 128 | dbgs() << " Disp "<< Disp << '\n' | 
|---|
| 129 | << "GV "; | 
|---|
| 130 | if (GV) | 
|---|
| 131 | GV->dump(); | 
|---|
| 132 | else | 
|---|
| 133 | dbgs() << "nul"; | 
|---|
| 134 | dbgs() << " CP "; | 
|---|
| 135 | if (CP) | 
|---|
| 136 | CP->dump(); | 
|---|
| 137 | else | 
|---|
| 138 | dbgs() << "nul"; | 
|---|
| 139 | dbgs() << '\n' | 
|---|
| 140 | << "ES "; | 
|---|
| 141 | if (ES) | 
|---|
| 142 | dbgs() << ES; | 
|---|
| 143 | else | 
|---|
| 144 | dbgs() << "nul"; | 
|---|
| 145 | dbgs() << " MCSym "; | 
|---|
| 146 | if (MCSym) | 
|---|
| 147 | dbgs() << MCSym; | 
|---|
| 148 | else | 
|---|
| 149 | dbgs() << "nul"; | 
|---|
| 150 | dbgs() << " JT"<< JT << " Align"<< Alignment.value() << '\n'; | 
|---|
| 151 | } | 
|---|
| 152 | #endif | 
|---|
| 153 | }; | 
|---|
| 154 | } | 
|---|
| 155 |  | 
|---|
| 156 | namespace { | 
|---|
| 157 | //===--------------------------------------------------------------------===// | 
|---|
| 158 | /// ISel - X86-specific code to select X86 machine instructions for | 
|---|
| 159 | /// SelectionDAG operations. | 
|---|
| 160 | /// | 
|---|
| 161 | class X86DAGToDAGISel final : public SelectionDAGISel { | 
|---|
| 162 | /// Keep a pointer to the X86Subtarget around so that we can | 
|---|
| 163 | /// make the right decision when generating code for different targets. | 
|---|
| 164 | const X86Subtarget *Subtarget; | 
|---|
| 165 |  | 
|---|
| 166 | /// If true, selector should try to optimize for minimum code size. | 
|---|
| 167 | bool OptForMinSize; | 
|---|
| 168 |  | 
|---|
| 169 | /// Disable direct TLS access through segment registers. | 
|---|
| 170 | bool IndirectTlsSegRefs; | 
|---|
| 171 |  | 
|---|
| 172 | public: | 
|---|
| 173 | X86DAGToDAGISel() = delete; | 
|---|
| 174 |  | 
|---|
| 175 | explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOptLevel OptLevel) | 
|---|
| 176 | : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), | 
|---|
| 177 | OptForMinSize(false), IndirectTlsSegRefs(false) {} | 
|---|
| 178 |  | 
|---|
| 179 | bool runOnMachineFunction(MachineFunction &MF) override { | 
|---|
| 180 | // Reset the subtarget each time through. | 
|---|
| 181 | Subtarget = &MF.getSubtarget<X86Subtarget>(); | 
|---|
| 182 | IndirectTlsSegRefs = MF.getFunction().hasFnAttribute( | 
|---|
| 183 | Kind: "indirect-tls-seg-refs"); | 
|---|
| 184 |  | 
|---|
| 185 | // OptFor[Min]Size are used in pattern predicates that isel is matching. | 
|---|
| 186 | OptForMinSize = MF.getFunction().hasMinSize(); | 
|---|
| 187 | return SelectionDAGISel::runOnMachineFunction(mf&: MF); | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | void emitFunctionEntryCode() override; | 
|---|
| 191 |  | 
|---|
| 192 | bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; | 
|---|
| 193 |  | 
|---|
| 194 | void PreprocessISelDAG() override; | 
|---|
| 195 | void PostprocessISelDAG() override; | 
|---|
| 196 |  | 
|---|
| 197 | // Include the pieces autogenerated from the target description. | 
|---|
| 198 | #include "X86GenDAGISel.inc" | 
|---|
| 199 |  | 
|---|
| 200 | private: | 
|---|
| 201 | void Select(SDNode *N) override; | 
|---|
| 202 |  | 
|---|
| 203 | bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); | 
|---|
| 204 | bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, | 
|---|
| 205 | bool AllowSegmentRegForX32 = false); | 
|---|
| 206 | bool matchWrapper(SDValue N, X86ISelAddressMode &AM); | 
|---|
| 207 | bool matchAddress(SDValue N, X86ISelAddressMode &AM); | 
|---|
| 208 | bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); | 
|---|
| 209 | bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth); | 
|---|
| 210 | SDValue matchIndexRecursively(SDValue N, X86ISelAddressMode &AM, | 
|---|
| 211 | unsigned Depth); | 
|---|
| 212 | bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, | 
|---|
| 213 | unsigned Depth); | 
|---|
| 214 | bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM, | 
|---|
| 215 | unsigned Depth); | 
|---|
| 216 | bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); | 
|---|
| 217 | bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, | 
|---|
| 218 | SDValue &Scale, SDValue &Index, SDValue &Disp, | 
|---|
| 219 | SDValue &Segment); | 
|---|
| 220 | bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, | 
|---|
| 221 | SDValue ScaleOp, SDValue &Base, SDValue &Scale, | 
|---|
| 222 | SDValue &Index, SDValue &Disp, SDValue &Segment); | 
|---|
| 223 | bool selectMOV64Imm32(SDValue N, SDValue &Imm); | 
|---|
| 224 | bool selectLEAAddr(SDValue N, SDValue &Base, | 
|---|
| 225 | SDValue &Scale, SDValue &Index, SDValue &Disp, | 
|---|
| 226 | SDValue &Segment); | 
|---|
| 227 | bool selectLEA64_Addr(SDValue N, SDValue &Base, SDValue &Scale, | 
|---|
| 228 | SDValue &Index, SDValue &Disp, SDValue &Segment); | 
|---|
| 229 | bool selectTLSADDRAddr(SDValue N, SDValue &Base, | 
|---|
| 230 | SDValue &Scale, SDValue &Index, SDValue &Disp, | 
|---|
| 231 | SDValue &Segment); | 
|---|
| 232 | bool selectRelocImm(SDValue N, SDValue &Op); | 
|---|
| 233 |  | 
|---|
| 234 | bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, | 
|---|
| 235 | SDValue &Base, SDValue &Scale, | 
|---|
| 236 | SDValue &Index, SDValue &Disp, | 
|---|
| 237 | SDValue &Segment); | 
|---|
| 238 |  | 
|---|
| 239 | // Convenience method where P is also root. | 
|---|
| 240 | bool tryFoldLoad(SDNode *P, SDValue N, | 
|---|
| 241 | SDValue &Base, SDValue &Scale, | 
|---|
| 242 | SDValue &Index, SDValue &Disp, | 
|---|
| 243 | SDValue &Segment) { | 
|---|
| 244 | return tryFoldLoad(Root: P, P, N, Base, Scale, Index, Disp, Segment); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, | 
|---|
| 248 | SDValue &Base, SDValue &Scale, | 
|---|
| 249 | SDValue &Index, SDValue &Disp, | 
|---|
| 250 | SDValue &Segment); | 
|---|
| 251 |  | 
|---|
| 252 | bool isProfitableToFormMaskedOp(SDNode *N) const; | 
|---|
| 253 |  | 
|---|
| 254 | /// Implement addressing mode selection for inline asm expressions. | 
|---|
| 255 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | 
|---|
| 256 | InlineAsm::ConstraintCode ConstraintID, | 
|---|
| 257 | std::vector<SDValue> &OutOps) override; | 
|---|
| 258 |  | 
|---|
| 259 | void emitSpecialCodeForMain(); | 
|---|
| 260 |  | 
|---|
| 261 | inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL, | 
|---|
| 262 | MVT VT, SDValue &Base, SDValue &Scale, | 
|---|
| 263 | SDValue &Index, SDValue &Disp, | 
|---|
| 264 | SDValue &Segment) { | 
|---|
| 265 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) | 
|---|
| 266 | Base = CurDAG->getTargetFrameIndex( | 
|---|
| 267 | FI: AM.Base_FrameIndex, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout())); | 
|---|
| 268 | else if (AM.Base_Reg.getNode()) | 
|---|
| 269 | Base = AM.Base_Reg; | 
|---|
| 270 | else | 
|---|
| 271 | Base = CurDAG->getRegister(Reg: 0, VT); | 
|---|
| 272 |  | 
|---|
| 273 | Scale = getI8Imm(Imm: AM.Scale, DL); | 
|---|
| 274 |  | 
|---|
| 275 | #define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC) | 
|---|
| 276 | // Negate the index if needed. | 
|---|
| 277 | if (AM.NegateIndex) { | 
|---|
| 278 | unsigned NegOpc; | 
|---|
| 279 | switch (VT.SimpleTy) { | 
|---|
| 280 | default: | 
|---|
| 281 | llvm_unreachable( "Unsupported VT!"); | 
|---|
| 282 | case MVT::i64: | 
|---|
| 283 | NegOpc = GET_ND_IF_ENABLED(X86::NEG64r); | 
|---|
| 284 | break; | 
|---|
| 285 | case MVT::i32: | 
|---|
| 286 | NegOpc = GET_ND_IF_ENABLED(X86::NEG32r); | 
|---|
| 287 | break; | 
|---|
| 288 | case MVT::i16: | 
|---|
| 289 | NegOpc = GET_ND_IF_ENABLED(X86::NEG16r); | 
|---|
| 290 | break; | 
|---|
| 291 | case MVT::i8: | 
|---|
| 292 | NegOpc = GET_ND_IF_ENABLED(X86::NEG8r); | 
|---|
| 293 | break; | 
|---|
| 294 | } | 
|---|
| 295 | SDValue Neg = SDValue(CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT1: VT, VT2: MVT::i32, | 
|---|
| 296 | Ops: AM.IndexReg), 0); | 
|---|
| 297 | AM.IndexReg = Neg; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | if (AM.IndexReg.getNode()) | 
|---|
| 301 | Index = AM.IndexReg; | 
|---|
| 302 | else | 
|---|
| 303 | Index = CurDAG->getRegister(Reg: 0, VT); | 
|---|
| 304 |  | 
|---|
| 305 | // These are 32-bit even in 64-bit mode since RIP-relative offset | 
|---|
| 306 | // is 32-bit. | 
|---|
| 307 | if (AM.GV) | 
|---|
| 308 | Disp = CurDAG->getTargetGlobalAddress(GV: AM.GV, DL: SDLoc(), | 
|---|
| 309 | VT: MVT::i32, offset: AM.Disp, | 
|---|
| 310 | TargetFlags: AM.SymbolFlags); | 
|---|
| 311 | else if (AM.CP) | 
|---|
| 312 | Disp = CurDAG->getTargetConstantPool(C: AM.CP, VT: MVT::i32, Align: AM.Alignment, | 
|---|
| 313 | Offset: AM.Disp, TargetFlags: AM.SymbolFlags); | 
|---|
| 314 | else if (AM.ES) { | 
|---|
| 315 | assert(!AM.Disp && "Non-zero displacement is ignored with ES."); | 
|---|
| 316 | Disp = CurDAG->getTargetExternalSymbol(Sym: AM.ES, VT: MVT::i32, TargetFlags: AM.SymbolFlags); | 
|---|
| 317 | } else if (AM.MCSym) { | 
|---|
| 318 | assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); | 
|---|
| 319 | assert(AM.SymbolFlags == 0 && "oo"); | 
|---|
| 320 | Disp = CurDAG->getMCSymbol(Sym: AM.MCSym, VT: MVT::i32); | 
|---|
| 321 | } else if (AM.JT != -1) { | 
|---|
| 322 | assert(!AM.Disp && "Non-zero displacement is ignored with JT."); | 
|---|
| 323 | Disp = CurDAG->getTargetJumpTable(JTI: AM.JT, VT: MVT::i32, TargetFlags: AM.SymbolFlags); | 
|---|
| 324 | } else if (AM.BlockAddr) | 
|---|
| 325 | Disp = CurDAG->getTargetBlockAddress(BA: AM.BlockAddr, VT: MVT::i32, Offset: AM.Disp, | 
|---|
| 326 | TargetFlags: AM.SymbolFlags); | 
|---|
| 327 | else | 
|---|
| 328 | Disp = CurDAG->getSignedTargetConstant(Val: AM.Disp, DL, VT: MVT::i32); | 
|---|
| 329 |  | 
|---|
| 330 | if (AM.Segment.getNode()) | 
|---|
| 331 | Segment = AM.Segment; | 
|---|
| 332 | else | 
|---|
| 333 | Segment = CurDAG->getRegister(Reg: 0, VT: MVT::i16); | 
|---|
| 334 | } | 
|---|
| 335 |  | 
|---|
| 336 | // Utility function to determine whether it is AMX SDNode right after | 
|---|
| 337 | // lowering but before ISEL. | 
|---|
| 338 | bool isAMXSDNode(SDNode *N) const { | 
|---|
| 339 | // Check if N is AMX SDNode: | 
|---|
| 340 | // 1. check specific opcode since these carry MVT::Untyped instead of | 
|---|
| 341 | // x86amx_type; | 
|---|
| 342 | // 2. check result type; | 
|---|
| 343 | // 3. check operand type; | 
|---|
| 344 | switch (N->getOpcode()) { | 
|---|
| 345 | default: | 
|---|
| 346 | break; | 
|---|
| 347 | case X86::PT2RPNTLVWZ0V: | 
|---|
| 348 | case X86::PT2RPNTLVWZ0T1V: | 
|---|
| 349 | case X86::PT2RPNTLVWZ1V: | 
|---|
| 350 | case X86::PT2RPNTLVWZ1T1V: | 
|---|
| 351 | case X86::PT2RPNTLVWZ0RSV: | 
|---|
| 352 | case X86::PT2RPNTLVWZ0RST1V: | 
|---|
| 353 | case X86::PT2RPNTLVWZ1RSV: | 
|---|
| 354 | case X86::PT2RPNTLVWZ1RST1V: | 
|---|
| 355 | return true; | 
|---|
| 356 | } | 
|---|
| 357 | for (unsigned Idx = 0, E = N->getNumValues(); Idx != E; ++Idx) { | 
|---|
| 358 | if (N->getValueType(ResNo: Idx) == MVT::x86amx) | 
|---|
| 359 | return true; | 
|---|
| 360 | } | 
|---|
| 361 | for (unsigned Idx = 0, E = N->getNumOperands(); Idx != E; ++Idx) { | 
|---|
| 362 | SDValue Op = N->getOperand(Num: Idx); | 
|---|
| 363 | if (Op.getValueType() == MVT::x86amx) | 
|---|
| 364 | return true; | 
|---|
| 365 | } | 
|---|
| 366 | return false; | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | // Utility function to determine whether we should avoid selecting | 
|---|
| 370 | // immediate forms of instructions for better code size or not. | 
|---|
| 371 | // At a high level, we'd like to avoid such instructions when | 
|---|
| 372 | // we have similar constants used within the same basic block | 
|---|
| 373 | // that can be kept in a register. | 
|---|
| 374 | // | 
|---|
| 375 | bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { | 
|---|
| 376 | uint32_t UseCount = 0; | 
|---|
| 377 |  | 
|---|
| 378 | // Do not want to hoist if we're not optimizing for size. | 
|---|
| 379 | // TODO: We'd like to remove this restriction. | 
|---|
| 380 | // See the comment in X86InstrInfo.td for more info. | 
|---|
| 381 | if (!CurDAG->shouldOptForSize()) | 
|---|
| 382 | return false; | 
|---|
| 383 |  | 
|---|
| 384 | // Walk all the users of the immediate. | 
|---|
| 385 | for (const SDNode *User : N->users()) { | 
|---|
| 386 | if (UseCount >= 2) | 
|---|
| 387 | break; | 
|---|
| 388 |  | 
|---|
| 389 | // This user is already selected. Count it as a legitimate use and | 
|---|
| 390 | // move on. | 
|---|
| 391 | if (User->isMachineOpcode()) { | 
|---|
| 392 | UseCount++; | 
|---|
| 393 | continue; | 
|---|
| 394 | } | 
|---|
| 395 |  | 
|---|
| 396 | // We want to count stores of immediates as real uses. | 
|---|
| 397 | if (User->getOpcode() == ISD::STORE && | 
|---|
| 398 | User->getOperand(Num: 1).getNode() == N) { | 
|---|
| 399 | UseCount++; | 
|---|
| 400 | continue; | 
|---|
| 401 | } | 
|---|
| 402 |  | 
|---|
| 403 | // We don't currently match users that have > 2 operands (except | 
|---|
| 404 | // for stores, which are handled above) | 
|---|
| 405 | // Those instruction won't match in ISEL, for now, and would | 
|---|
| 406 | // be counted incorrectly. | 
|---|
| 407 | // This may change in the future as we add additional instruction | 
|---|
| 408 | // types. | 
|---|
| 409 | if (User->getNumOperands() != 2) | 
|---|
| 410 | continue; | 
|---|
| 411 |  | 
|---|
| 412 | // If this is a sign-extended 8-bit integer immediate used in an ALU | 
|---|
| 413 | // instruction, there is probably an opcode encoding to save space. | 
|---|
| 414 | auto *C = dyn_cast<ConstantSDNode>(Val: N); | 
|---|
| 415 | if (C && isInt<8>(x: C->getSExtValue())) | 
|---|
| 416 | continue; | 
|---|
| 417 |  | 
|---|
| 418 | // Immediates that are used for offsets as part of stack | 
|---|
| 419 | // manipulation should be left alone. These are typically | 
|---|
| 420 | // used to indicate SP offsets for argument passing and | 
|---|
| 421 | // will get pulled into stores/pushes (implicitly). | 
|---|
| 422 | if (User->getOpcode() == X86ISD::ADD || | 
|---|
| 423 | User->getOpcode() == ISD::ADD    || | 
|---|
| 424 | User->getOpcode() == X86ISD::SUB || | 
|---|
| 425 | User->getOpcode() == ISD::SUB) { | 
|---|
| 426 |  | 
|---|
| 427 | // Find the other operand of the add/sub. | 
|---|
| 428 | SDValue OtherOp = User->getOperand(Num: 0); | 
|---|
| 429 | if (OtherOp.getNode() == N) | 
|---|
| 430 | OtherOp = User->getOperand(Num: 1); | 
|---|
| 431 |  | 
|---|
| 432 | // Don't count if the other operand is SP. | 
|---|
| 433 | RegisterSDNode *RegNode; | 
|---|
| 434 | if (OtherOp->getOpcode() == ISD::CopyFromReg && | 
|---|
| 435 | (RegNode = dyn_cast_or_null<RegisterSDNode>( | 
|---|
| 436 | Val: OtherOp->getOperand(Num: 1).getNode()))) | 
|---|
| 437 | if ((RegNode->getReg() == X86::ESP) || | 
|---|
| 438 | (RegNode->getReg() == X86::RSP)) | 
|---|
| 439 | continue; | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | // ... otherwise, count this and move on. | 
|---|
| 443 | UseCount++; | 
|---|
| 444 | } | 
|---|
| 445 |  | 
|---|
| 446 | // If we have more than 1 use, then recommend for hoisting. | 
|---|
| 447 | return (UseCount > 1); | 
|---|
| 448 | } | 
|---|
| 449 |  | 
|---|
| 450 | /// Return a target constant with the specified value of type i8. | 
|---|
| 451 | inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) { | 
|---|
| 452 | return CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i8); | 
|---|
| 453 | } | 
|---|
| 454 |  | 
|---|
| 455 | /// Return a target constant with the specified value, of type i32. | 
|---|
| 456 | inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { | 
|---|
| 457 | return CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32); | 
|---|
| 458 | } | 
|---|
| 459 |  | 
|---|
| 460 | /// Return a target constant with the specified value, of type i64. | 
|---|
| 461 | inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { | 
|---|
| 462 | return CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i64); | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | SDValue (SDNode *N, unsigned VecWidth, | 
|---|
| 466 | const SDLoc &DL) { | 
|---|
| 467 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); | 
|---|
| 468 | uint64_t Index = N->getConstantOperandVal(Num: 1); | 
|---|
| 469 | MVT VecVT = N->getOperand(Num: 0).getSimpleValueType(); | 
|---|
| 470 | return getI8Imm(Imm: (Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); | 
|---|
| 471 | } | 
|---|
| 472 |  | 
|---|
| 473 | SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, | 
|---|
| 474 | const SDLoc &DL) { | 
|---|
| 475 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); | 
|---|
| 476 | uint64_t Index = N->getConstantOperandVal(Num: 2); | 
|---|
| 477 | MVT VecVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 478 | return getI8Imm(Imm: (Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); | 
|---|
| 479 | } | 
|---|
| 480 |  | 
|---|
| 481 | SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth, | 
|---|
| 482 | const SDLoc &DL) { | 
|---|
| 483 | assert(VecWidth == 128 && "Unexpected vector width"); | 
|---|
| 484 | uint64_t Index = N->getConstantOperandVal(Num: 2); | 
|---|
| 485 | MVT VecVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 486 | uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth; | 
|---|
| 487 | assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index"); | 
|---|
| 488 | // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub) | 
|---|
| 489 | // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub) | 
|---|
| 490 | return getI8Imm(Imm: InsertIdx ? 0x02 : 0x30, DL); | 
|---|
| 491 | } | 
|---|
| 492 |  | 
|---|
| 493 | SDValue getSBBZero(SDNode *N) { | 
|---|
| 494 | SDLoc dl(N); | 
|---|
| 495 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 496 |  | 
|---|
| 497 | // Create zero. | 
|---|
| 498 | SDVTList VTs = CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32); | 
|---|
| 499 | SDValue Zero = | 
|---|
| 500 | SDValue(CurDAG->getMachineNode(Opcode: X86::MOV32r0, dl, VTs, Ops: {}), 0); | 
|---|
| 501 | if (VT == MVT::i64) { | 
|---|
| 502 | Zero = SDValue( | 
|---|
| 503 | CurDAG->getMachineNode( | 
|---|
| 504 | Opcode: TargetOpcode::SUBREG_TO_REG, dl, VT: MVT::i64, | 
|---|
| 505 | Op1: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64), Op2: Zero, | 
|---|
| 506 | Op3: CurDAG->getTargetConstant(Val: X86::sub_32bit, DL: dl, VT: MVT::i32)), | 
|---|
| 507 | 0); | 
|---|
| 508 | } | 
|---|
| 509 |  | 
|---|
| 510 | // Copy flags to the EFLAGS register and glue it to next node. | 
|---|
| 511 | unsigned Opcode = N->getOpcode(); | 
|---|
| 512 | assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) && | 
|---|
| 513 | "Unexpected opcode for SBB materialization"); | 
|---|
| 514 | unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1; | 
|---|
| 515 | SDValue EFLAGS = | 
|---|
| 516 | CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: X86::EFLAGS, | 
|---|
| 517 | N: N->getOperand(Num: FlagOpIndex), Glue: SDValue()); | 
|---|
| 518 |  | 
|---|
| 519 | // Create a 64-bit instruction if the result is 64-bits otherwise use the | 
|---|
| 520 | // 32-bit version. | 
|---|
| 521 | unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; | 
|---|
| 522 | MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; | 
|---|
| 523 | VTs = CurDAG->getVTList(VT1: SBBVT, VT2: MVT::i32); | 
|---|
| 524 | return SDValue( | 
|---|
| 525 | CurDAG->getMachineNode(Opcode: Opc, dl, VTs, | 
|---|
| 526 | Ops: {Zero, Zero, EFLAGS, EFLAGS.getValue(R: 1)}), | 
|---|
| 527 | 0); | 
|---|
| 528 | } | 
|---|
| 529 |  | 
|---|
| 530 | // Helper to detect unneeded and instructions on shift amounts. Called | 
|---|
| 531 | // from PatFrags in tablegen. | 
|---|
| 532 | bool isUnneededShiftMask(SDNode *N, unsigned Width) const { | 
|---|
| 533 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); | 
|---|
| 534 | const APInt &Val = N->getConstantOperandAPInt(Num: 1); | 
|---|
| 535 |  | 
|---|
| 536 | if (Val.countr_one() >= Width) | 
|---|
| 537 | return true; | 
|---|
| 538 |  | 
|---|
| 539 | APInt Mask = Val | CurDAG->computeKnownBits(Op: N->getOperand(Num: 0)).Zero; | 
|---|
| 540 | return Mask.countr_one() >= Width; | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | /// Return an SDNode that returns the value of the global base register. | 
|---|
| 544 | /// Output instructions required to initialize the global base register, | 
|---|
| 545 | /// if necessary. | 
|---|
| 546 | SDNode *getGlobalBaseReg(); | 
|---|
| 547 |  | 
|---|
| 548 | /// Return a reference to the TargetMachine, casted to the target-specific | 
|---|
| 549 | /// type. | 
|---|
| 550 | const X86TargetMachine &getTargetMachine() const { | 
|---|
| 551 | return static_cast<const X86TargetMachine &>(TM); | 
|---|
| 552 | } | 
|---|
| 553 |  | 
|---|
| 554 | /// Return a reference to the TargetInstrInfo, casted to the target-specific | 
|---|
| 555 | /// type. | 
|---|
| 556 | const X86InstrInfo *getInstrInfo() const { | 
|---|
| 557 | return Subtarget->getInstrInfo(); | 
|---|
| 558 | } | 
|---|
| 559 |  | 
|---|
| 560 | /// Return a condition code of the given SDNode | 
|---|
| 561 | X86::CondCode getCondFromNode(SDNode *N) const; | 
|---|
| 562 |  | 
|---|
| 563 | /// Address-mode matching performs shift-of-and to and-of-shift | 
|---|
| 564 | /// reassociation in order to expose more scaled addressing | 
|---|
| 565 | /// opportunities. | 
|---|
| 566 | bool ComplexPatternFuncMutatesDAG() const override { | 
|---|
| 567 | return true; | 
|---|
| 568 | } | 
|---|
| 569 |  | 
|---|
| 570 | bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const; | 
|---|
| 571 |  | 
|---|
| 572 | // Indicates we should prefer to use a non-temporal load for this load. | 
|---|
| 573 | bool useNonTemporalLoad(LoadSDNode *N) const { | 
|---|
| 574 | if (!N->isNonTemporal()) | 
|---|
| 575 | return false; | 
|---|
| 576 |  | 
|---|
| 577 | unsigned StoreSize = N->getMemoryVT().getStoreSize(); | 
|---|
| 578 |  | 
|---|
| 579 | if (N->getAlign().value() < StoreSize) | 
|---|
| 580 | return false; | 
|---|
| 581 |  | 
|---|
| 582 | switch (StoreSize) { | 
|---|
| 583 | default: llvm_unreachable( "Unsupported store size"); | 
|---|
| 584 | case 4: | 
|---|
| 585 | case 8: | 
|---|
| 586 | return false; | 
|---|
| 587 | case 16: | 
|---|
| 588 | return Subtarget->hasSSE41(); | 
|---|
| 589 | case 32: | 
|---|
| 590 | return Subtarget->hasAVX2(); | 
|---|
| 591 | case 64: | 
|---|
| 592 | return Subtarget->hasAVX512(); | 
|---|
| 593 | } | 
|---|
| 594 | } | 
|---|
| 595 |  | 
|---|
| 596 | bool foldLoadStoreIntoMemOperand(SDNode *Node); | 
|---|
| 597 | MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); | 
|---|
| 598 | bool matchBitExtract(SDNode *Node); | 
|---|
| 599 | bool shrinkAndImmediate(SDNode *N); | 
|---|
| 600 | bool isMaskZeroExtended(SDNode *N) const; | 
|---|
| 601 | bool tryShiftAmountMod(SDNode *N); | 
|---|
| 602 | bool tryShrinkShlLogicImm(SDNode *N); | 
|---|
| 603 | bool tryVPTERNLOG(SDNode *N); | 
|---|
| 604 | bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB, | 
|---|
| 605 | SDNode *ParentC, SDValue A, SDValue B, SDValue C, | 
|---|
| 606 | uint8_t Imm); | 
|---|
| 607 | bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); | 
|---|
| 608 | bool tryMatchBitSelect(SDNode *N); | 
|---|
| 609 |  | 
|---|
| 610 | MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, | 
|---|
| 611 | const SDLoc &dl, MVT VT, SDNode *Node); | 
|---|
| 612 | MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, | 
|---|
| 613 | const SDLoc &dl, MVT VT, SDNode *Node, | 
|---|
| 614 | SDValue &InGlue); | 
|---|
| 615 |  | 
|---|
| 616 | bool tryOptimizeRem8Extend(SDNode *N); | 
|---|
| 617 |  | 
|---|
| 618 | bool onlyUsesZeroFlag(SDValue Flags) const; | 
|---|
| 619 | bool hasNoSignFlagUses(SDValue Flags) const; | 
|---|
| 620 | bool hasNoCarryFlagUses(SDValue Flags) const; | 
|---|
| 621 | }; | 
|---|
| 622 |  | 
|---|
| 623 | class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy { | 
|---|
| 624 | public: | 
|---|
| 625 | static char ID; | 
|---|
| 626 | explicit X86DAGToDAGISelLegacy(X86TargetMachine &tm, | 
|---|
| 627 | CodeGenOptLevel OptLevel) | 
|---|
| 628 | : SelectionDAGISelLegacy( | 
|---|
| 629 | ID, std::make_unique<X86DAGToDAGISel>(args&: tm, args&: OptLevel)) {} | 
|---|
| 630 | }; | 
|---|
| 631 | } | 
|---|
| 632 |  | 
|---|
| 633 | char X86DAGToDAGISelLegacy::ID = 0; | 
|---|
| 634 |  | 
|---|
| 635 | INITIALIZE_PASS(X86DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) | 
|---|
| 636 |  | 
|---|
| 637 | // Returns true if this masked compare can be implemented legally with this | 
|---|
| 638 | // type. | 
|---|
| 639 | static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { | 
|---|
| 640 | unsigned Opcode = N->getOpcode(); | 
|---|
| 641 | if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM || | 
|---|
| 642 | Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC || | 
|---|
| 643 | Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) { | 
|---|
| 644 | // We can get 256-bit 8 element types here without VLX being enabled. When | 
|---|
| 645 | // this happens we will use 512-bit operations and the mask will not be | 
|---|
| 646 | // zero extended. | 
|---|
| 647 | EVT OpVT = N->getOperand(Num: 0).getValueType(); | 
|---|
| 648 | // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the | 
|---|
| 649 | // second operand. | 
|---|
| 650 | if (Opcode == X86ISD::STRICT_CMPM) | 
|---|
| 651 | OpVT = N->getOperand(Num: 1).getValueType(); | 
|---|
| 652 | if (OpVT.is256BitVector() || OpVT.is128BitVector()) | 
|---|
| 653 | return Subtarget->hasVLX(); | 
|---|
| 654 |  | 
|---|
| 655 | return true; | 
|---|
| 656 | } | 
|---|
| 657 | // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check. | 
|---|
| 658 | if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM || | 
|---|
| 659 | Opcode == X86ISD::FSETCCM_SAE) | 
|---|
| 660 | return true; | 
|---|
| 661 |  | 
|---|
| 662 | return false; | 
|---|
| 663 | } | 
|---|
| 664 |  | 
|---|
| 665 | // Returns true if we can assume the writer of the mask has zero extended it | 
|---|
| 666 | // for us. | 
|---|
| 667 | bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { | 
|---|
| 668 | // If this is an AND, check if we have a compare on either side. As long as | 
|---|
| 669 | // one side guarantees the mask is zero extended, the AND will preserve those | 
|---|
| 670 | // zeros. | 
|---|
| 671 | if (N->getOpcode() == ISD::AND) | 
|---|
| 672 | return isLegalMaskCompare(N: N->getOperand(Num: 0).getNode(), Subtarget) || | 
|---|
| 673 | isLegalMaskCompare(N: N->getOperand(Num: 1).getNode(), Subtarget); | 
|---|
| 674 |  | 
|---|
| 675 | return isLegalMaskCompare(N, Subtarget); | 
|---|
| 676 | } | 
|---|
| 677 |  | 
|---|
| 678 | bool | 
|---|
| 679 | X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { | 
|---|
| 680 | if (OptLevel == CodeGenOptLevel::None) | 
|---|
| 681 | return false; | 
|---|
| 682 |  | 
|---|
| 683 | if (!N.hasOneUse()) | 
|---|
| 684 | return false; | 
|---|
| 685 |  | 
|---|
| 686 | if (N.getOpcode() != ISD::LOAD) | 
|---|
| 687 | return true; | 
|---|
| 688 |  | 
|---|
| 689 | // Don't fold non-temporal loads if we have an instruction for them. | 
|---|
| 690 | if (useNonTemporalLoad(N: cast<LoadSDNode>(Val&: N))) | 
|---|
| 691 | return false; | 
|---|
| 692 |  | 
|---|
| 693 | // If N is a load, do additional profitability checks. | 
|---|
| 694 | if (U == Root) { | 
|---|
| 695 | switch (U->getOpcode()) { | 
|---|
| 696 | default: break; | 
|---|
| 697 | case X86ISD::ADD: | 
|---|
| 698 | case X86ISD::ADC: | 
|---|
| 699 | case X86ISD::SUB: | 
|---|
| 700 | case X86ISD::SBB: | 
|---|
| 701 | case X86ISD::AND: | 
|---|
| 702 | case X86ISD::XOR: | 
|---|
| 703 | case X86ISD::OR: | 
|---|
| 704 | case ISD::ADD: | 
|---|
| 705 | case ISD::UADDO_CARRY: | 
|---|
| 706 | case ISD::AND: | 
|---|
| 707 | case ISD::OR: | 
|---|
| 708 | case ISD::XOR: { | 
|---|
| 709 | SDValue Op1 = U->getOperand(Num: 1); | 
|---|
| 710 |  | 
|---|
| 711 | // If the other operand is a 8-bit immediate we should fold the immediate | 
|---|
| 712 | // instead. This reduces code size. | 
|---|
| 713 | // e.g. | 
|---|
| 714 | // movl 4(%esp), %eax | 
|---|
| 715 | // addl $4, %eax | 
|---|
| 716 | // vs. | 
|---|
| 717 | // movl $4, %eax | 
|---|
| 718 | // addl 4(%esp), %eax | 
|---|
| 719 | // The former is 2 bytes shorter. In case where the increment is 1, then | 
|---|
| 720 | // the saving can be 4 bytes (by using incl %eax). | 
|---|
| 721 | if (auto *Imm = dyn_cast<ConstantSDNode>(Val&: Op1)) { | 
|---|
| 722 | if (Imm->getAPIntValue().isSignedIntN(N: 8)) | 
|---|
| 723 | return false; | 
|---|
| 724 |  | 
|---|
| 725 | // If this is a 64-bit AND with an immediate that fits in 32-bits, | 
|---|
| 726 | // prefer using the smaller and over folding the load. This is needed to | 
|---|
| 727 | // make sure immediates created by shrinkAndImmediate are always folded. | 
|---|
| 728 | // Ideally we would narrow the load during DAG combine and get the | 
|---|
| 729 | // best of both worlds. | 
|---|
| 730 | if (U->getOpcode() == ISD::AND && | 
|---|
| 731 | Imm->getAPIntValue().getBitWidth() == 64 && | 
|---|
| 732 | Imm->getAPIntValue().isIntN(N: 32)) | 
|---|
| 733 | return false; | 
|---|
| 734 |  | 
|---|
| 735 | // If this really a zext_inreg that can be represented with a movzx | 
|---|
| 736 | // instruction, prefer that. | 
|---|
| 737 | // TODO: We could shrink the load and fold if it is non-volatile. | 
|---|
| 738 | if (U->getOpcode() == ISD::AND && | 
|---|
| 739 | (Imm->getAPIntValue() == UINT8_MAX || | 
|---|
| 740 | Imm->getAPIntValue() == UINT16_MAX || | 
|---|
| 741 | Imm->getAPIntValue() == UINT32_MAX)) | 
|---|
| 742 | return false; | 
|---|
| 743 |  | 
|---|
| 744 | // ADD/SUB with can negate the immediate and use the opposite operation | 
|---|
| 745 | // to fit 128 into a sign extended 8 bit immediate. | 
|---|
| 746 | if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) && | 
|---|
| 747 | (-Imm->getAPIntValue()).isSignedIntN(N: 8)) | 
|---|
| 748 | return false; | 
|---|
| 749 |  | 
|---|
| 750 | if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) && | 
|---|
| 751 | (-Imm->getAPIntValue()).isSignedIntN(N: 8) && | 
|---|
| 752 | hasNoCarryFlagUses(Flags: SDValue(U, 1))) | 
|---|
| 753 | return false; | 
|---|
| 754 | } | 
|---|
| 755 |  | 
|---|
| 756 | // If the other operand is a TLS address, we should fold it instead. | 
|---|
| 757 | // This produces | 
|---|
| 758 | // movl    %gs:0, %eax | 
|---|
| 759 | // leal    i@NTPOFF(%eax), %eax | 
|---|
| 760 | // instead of | 
|---|
| 761 | // movl    $i@NTPOFF, %eax | 
|---|
| 762 | // addl    %gs:0, %eax | 
|---|
| 763 | // if the block also has an access to a second TLS address this will save | 
|---|
| 764 | // a load. | 
|---|
| 765 | // FIXME: This is probably also true for non-TLS addresses. | 
|---|
| 766 | if (Op1.getOpcode() == X86ISD::Wrapper) { | 
|---|
| 767 | SDValue Val = Op1.getOperand(i: 0); | 
|---|
| 768 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) | 
|---|
| 769 | return false; | 
|---|
| 770 | } | 
|---|
| 771 |  | 
|---|
| 772 | // Don't fold load if this matches the BTS/BTR/BTC patterns. | 
|---|
| 773 | // BTS: (or X, (shl 1, n)) | 
|---|
| 774 | // BTR: (and X, (rotl -2, n)) | 
|---|
| 775 | // BTC: (xor X, (shl 1, n)) | 
|---|
| 776 | if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) { | 
|---|
| 777 | if (U->getOperand(Num: 0).getOpcode() == ISD::SHL && | 
|---|
| 778 | isOneConstant(V: U->getOperand(Num: 0).getOperand(i: 0))) | 
|---|
| 779 | return false; | 
|---|
| 780 |  | 
|---|
| 781 | if (U->getOperand(Num: 1).getOpcode() == ISD::SHL && | 
|---|
| 782 | isOneConstant(V: U->getOperand(Num: 1).getOperand(i: 0))) | 
|---|
| 783 | return false; | 
|---|
| 784 | } | 
|---|
| 785 | if (U->getOpcode() == ISD::AND) { | 
|---|
| 786 | SDValue U0 = U->getOperand(Num: 0); | 
|---|
| 787 | SDValue U1 = U->getOperand(Num: 1); | 
|---|
| 788 | if (U0.getOpcode() == ISD::ROTL) { | 
|---|
| 789 | auto *C = dyn_cast<ConstantSDNode>(Val: U0.getOperand(i: 0)); | 
|---|
| 790 | if (C && C->getSExtValue() == -2) | 
|---|
| 791 | return false; | 
|---|
| 792 | } | 
|---|
| 793 |  | 
|---|
| 794 | if (U1.getOpcode() == ISD::ROTL) { | 
|---|
| 795 | auto *C = dyn_cast<ConstantSDNode>(Val: U1.getOperand(i: 0)); | 
|---|
| 796 | if (C && C->getSExtValue() == -2) | 
|---|
| 797 | return false; | 
|---|
| 798 | } | 
|---|
| 799 | } | 
|---|
| 800 |  | 
|---|
| 801 | break; | 
|---|
| 802 | } | 
|---|
| 803 | case ISD::SHL: | 
|---|
| 804 | case ISD::SRA: | 
|---|
| 805 | case ISD::SRL: | 
|---|
| 806 | // Don't fold a load into a shift by immediate. The BMI2 instructions | 
|---|
| 807 | // support folding a load, but not an immediate. The legacy instructions | 
|---|
| 808 | // support folding an immediate, but can't fold a load. Folding an | 
|---|
| 809 | // immediate is preferable to folding a load. | 
|---|
| 810 | if (isa<ConstantSDNode>(Val: U->getOperand(Num: 1))) | 
|---|
| 811 | return false; | 
|---|
| 812 |  | 
|---|
| 813 | break; | 
|---|
| 814 | } | 
|---|
| 815 | } | 
|---|
| 816 |  | 
|---|
| 817 | // Prevent folding a load if this can implemented with an insert_subreg or | 
|---|
| 818 | // a move that implicitly zeroes. | 
|---|
| 819 | if (Root->getOpcode() == ISD::INSERT_SUBVECTOR && | 
|---|
| 820 | isNullConstant(V: Root->getOperand(Num: 2)) && | 
|---|
| 821 | (Root->getOperand(Num: 0).isUndef() || | 
|---|
| 822 | ISD::isBuildVectorAllZeros(N: Root->getOperand(Num: 0).getNode()))) | 
|---|
| 823 | return false; | 
|---|
| 824 |  | 
|---|
| 825 | return true; | 
|---|
| 826 | } | 
|---|
| 827 |  | 
|---|
| 828 | // Indicates it is profitable to form an AVX512 masked operation. Returning | 
|---|
| 829 | // false will favor a masked register-register masked move or vblendm and the | 
|---|
| 830 | // operation will be selected separately. | 
|---|
| 831 | bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { | 
|---|
| 832 | assert( | 
|---|
| 833 | (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && | 
|---|
| 834 | "Unexpected opcode!"); | 
|---|
| 835 |  | 
|---|
| 836 | // If the operation has additional users, the operation will be duplicated. | 
|---|
| 837 | // Check the use count to prevent that. | 
|---|
| 838 | // FIXME: Are there cheap opcodes we might want to duplicate? | 
|---|
| 839 | return N->getOperand(Num: 1).hasOneUse(); | 
|---|
| 840 | } | 
|---|
| 841 |  | 
|---|
| 842 | /// Replace the original chain operand of the call with | 
|---|
| 843 | /// load's chain operand and move load below the call's chain operand. | 
|---|
| 844 | static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, | 
|---|
| 845 | SDValue Call, SDValue OrigChain) { | 
|---|
| 846 | SmallVector<SDValue, 8> Ops; | 
|---|
| 847 | SDValue Chain = OrigChain.getOperand(i: 0); | 
|---|
| 848 | if (Chain.getNode() == Load.getNode()) | 
|---|
| 849 | Ops.push_back(Elt: Load.getOperand(i: 0)); | 
|---|
| 850 | else { | 
|---|
| 851 | assert(Chain.getOpcode() == ISD::TokenFactor && | 
|---|
| 852 | "Unexpected chain operand"); | 
|---|
| 853 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) | 
|---|
| 854 | if (Chain.getOperand(i).getNode() == Load.getNode()) | 
|---|
| 855 | Ops.push_back(Elt: Load.getOperand(i: 0)); | 
|---|
| 856 | else | 
|---|
| 857 | Ops.push_back(Elt: Chain.getOperand(i)); | 
|---|
| 858 | SDValue NewChain = | 
|---|
| 859 | CurDAG->getNode(Opcode: ISD::TokenFactor, DL: SDLoc(Load), VT: MVT::Other, Ops); | 
|---|
| 860 | Ops.clear(); | 
|---|
| 861 | Ops.push_back(Elt: NewChain); | 
|---|
| 862 | } | 
|---|
| 863 | Ops.append(in_start: OrigChain->op_begin() + 1, in_end: OrigChain->op_end()); | 
|---|
| 864 | CurDAG->UpdateNodeOperands(N: OrigChain.getNode(), Ops); | 
|---|
| 865 | CurDAG->UpdateNodeOperands(N: Load.getNode(), Op1: Call.getOperand(i: 0), | 
|---|
| 866 | Op2: Load.getOperand(i: 1), Op3: Load.getOperand(i: 2)); | 
|---|
| 867 |  | 
|---|
| 868 | Ops.clear(); | 
|---|
| 869 | Ops.push_back(Elt: SDValue(Load.getNode(), 1)); | 
|---|
| 870 | Ops.append(in_start: Call->op_begin() + 1, in_end: Call->op_end()); | 
|---|
| 871 | CurDAG->UpdateNodeOperands(N: Call.getNode(), Ops); | 
|---|
| 872 | } | 
|---|
| 873 |  | 
|---|
| 874 | /// Return true if call address is a load and it can be | 
|---|
| 875 | /// moved below CALLSEQ_START and the chains leading up to the call. | 
|---|
| 876 | /// Return the CALLSEQ_START by reference as a second output. | 
|---|
| 877 | /// In the case of a tail call, there isn't a callseq node between the call | 
|---|
| 878 | /// chain and the load. | 
|---|
| 879 | static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { | 
|---|
| 880 | // The transformation is somewhat dangerous if the call's chain was glued to | 
|---|
| 881 | // the call. After MoveBelowOrigChain the load is moved between the call and | 
|---|
| 882 | // the chain, this can create a cycle if the load is not folded. So it is | 
|---|
| 883 | // *really* important that we are sure the load will be folded. | 
|---|
| 884 | if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) | 
|---|
| 885 | return false; | 
|---|
| 886 | auto *LD = dyn_cast<LoadSDNode>(Val: Callee.getNode()); | 
|---|
| 887 | if (!LD || | 
|---|
| 888 | !LD->isSimple() || | 
|---|
| 889 | LD->getAddressingMode() != ISD::UNINDEXED || | 
|---|
| 890 | LD->getExtensionType() != ISD::NON_EXTLOAD) | 
|---|
| 891 | return false; | 
|---|
| 892 |  | 
|---|
| 893 | // Now let's find the callseq_start. | 
|---|
| 894 | while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { | 
|---|
| 895 | if (!Chain.hasOneUse()) | 
|---|
| 896 | return false; | 
|---|
| 897 | Chain = Chain.getOperand(i: 0); | 
|---|
| 898 | } | 
|---|
| 899 |  | 
|---|
| 900 | if (!Chain.getNumOperands()) | 
|---|
| 901 | return false; | 
|---|
| 902 | // Since we are not checking for AA here, conservatively abort if the chain | 
|---|
| 903 | // writes to memory. It's not safe to move the callee (a load) across a store. | 
|---|
| 904 | if (isa<MemSDNode>(Val: Chain.getNode()) && | 
|---|
| 905 | cast<MemSDNode>(Val: Chain.getNode())->writeMem()) | 
|---|
| 906 | return false; | 
|---|
| 907 | if (Chain.getOperand(i: 0).getNode() == Callee.getNode()) | 
|---|
| 908 | return true; | 
|---|
| 909 | if (Chain.getOperand(i: 0).getOpcode() == ISD::TokenFactor && | 
|---|
| 910 | Callee.getValue(R: 1).isOperandOf(N: Chain.getOperand(i: 0).getNode()) && | 
|---|
| 911 | Callee.getValue(R: 1).hasOneUse()) | 
|---|
| 912 | return true; | 
|---|
| 913 | return false; | 
|---|
| 914 | } | 
|---|
| 915 |  | 
|---|
| 916 | static bool isEndbrImm64(uint64_t Imm) { | 
|---|
| 917 | // There may be some other prefix bytes between 0xF3 and 0x0F1EFA. | 
|---|
| 918 | // i.g: 0xF3660F1EFA, 0xF3670F1EFA | 
|---|
| 919 | if ((Imm & 0x00FFFFFF) != 0x0F1EFA) | 
|---|
| 920 | return false; | 
|---|
| 921 |  | 
|---|
| 922 | uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64, | 
|---|
| 923 | 0x65, 0x66, 0x67, 0xf0, 0xf2}; | 
|---|
| 924 | int i = 24; // 24bit 0x0F1EFA has matched | 
|---|
| 925 | while (i < 64) { | 
|---|
| 926 | uint8_t Byte = (Imm >> i) & 0xFF; | 
|---|
| 927 | if (Byte == 0xF3) | 
|---|
| 928 | return true; | 
|---|
| 929 | if (!llvm::is_contained(Range&: OptionalPrefixBytes, Element: Byte)) | 
|---|
| 930 | return false; | 
|---|
| 931 | i += 8; | 
|---|
| 932 | } | 
|---|
| 933 |  | 
|---|
| 934 | return false; | 
|---|
| 935 | } | 
|---|
| 936 |  | 
|---|
| 937 | static bool needBWI(MVT VT) { | 
|---|
| 938 | return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8); | 
|---|
| 939 | } | 
|---|
| 940 |  | 
|---|
| 941 | void X86DAGToDAGISel::PreprocessISelDAG() { | 
|---|
| 942 | bool MadeChange = false; | 
|---|
| 943 | for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), | 
|---|
| 944 | E = CurDAG->allnodes_end(); I != E; ) { | 
|---|
| 945 | SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. | 
|---|
| 946 |  | 
|---|
| 947 | // This is for CET enhancement. | 
|---|
| 948 | // | 
|---|
| 949 | // ENDBR32 and ENDBR64 have specific opcodes: | 
|---|
| 950 | // ENDBR32: F3 0F 1E FB | 
|---|
| 951 | // ENDBR64: F3 0F 1E FA | 
|---|
| 952 | // And we want that attackers won’t find unintended ENDBR32/64 | 
|---|
| 953 | // opcode matches in the binary | 
|---|
| 954 | // Here’s an example: | 
|---|
| 955 | // If the compiler had to generate asm for the following code: | 
|---|
| 956 | // a = 0xF30F1EFA | 
|---|
| 957 | // it could, for example, generate: | 
|---|
| 958 | // mov 0xF30F1EFA, dword ptr[a] | 
|---|
| 959 | // In such a case, the binary would include a gadget that starts | 
|---|
| 960 | // with a fake ENDBR64 opcode. Therefore, we split such generation | 
|---|
| 961 | // into multiple operations, let it not shows in the binary | 
|---|
| 962 | if (N->getOpcode() == ISD::Constant) { | 
|---|
| 963 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 964 | int64_t Imm = cast<ConstantSDNode>(Val: N)->getSExtValue(); | 
|---|
| 965 | int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB; | 
|---|
| 966 | if (Imm == EndbrImm || isEndbrImm64(Imm)) { | 
|---|
| 967 | // Check that the cf-protection-branch is enabled. | 
|---|
| 968 | Metadata *CFProtectionBranch = | 
|---|
| 969 | MF->getFunction().getParent()->getModuleFlag( | 
|---|
| 970 | Key: "cf-protection-branch"); | 
|---|
| 971 | if (CFProtectionBranch || IndirectBranchTracking) { | 
|---|
| 972 | SDLoc dl(N); | 
|---|
| 973 | SDValue Complement = CurDAG->getConstant(Val: ~Imm, DL: dl, VT, isTarget: false, isOpaque: true); | 
|---|
| 974 | Complement = CurDAG->getNOT(DL: dl, Val: Complement, VT); | 
|---|
| 975 | --I; | 
|---|
| 976 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Complement); | 
|---|
| 977 | ++I; | 
|---|
| 978 | MadeChange = true; | 
|---|
| 979 | continue; | 
|---|
| 980 | } | 
|---|
| 981 | } | 
|---|
| 982 | } | 
|---|
| 983 |  | 
|---|
| 984 | // If this is a target specific AND node with no flag usages, turn it back | 
|---|
| 985 | // into ISD::AND to enable test instruction matching. | 
|---|
| 986 | if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(Value: 1)) { | 
|---|
| 987 | SDValue Res = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), | 
|---|
| 988 | N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1)); | 
|---|
| 989 | --I; | 
|---|
| 990 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res); | 
|---|
| 991 | ++I; | 
|---|
| 992 | MadeChange = true; | 
|---|
| 993 | continue; | 
|---|
| 994 | } | 
|---|
| 995 |  | 
|---|
| 996 | // Convert vector increment or decrement to sub/add with an all-ones | 
|---|
| 997 | // constant: | 
|---|
| 998 | // add X, <1, 1...> --> sub X, <-1, -1...> | 
|---|
| 999 | // sub X, <1, 1...> --> add X, <-1, -1...> | 
|---|
| 1000 | // The all-ones vector constant can be materialized using a pcmpeq | 
|---|
| 1001 | // instruction that is commonly recognized as an idiom (has no register | 
|---|
| 1002 | // dependency), so that's better/smaller than loading a splat 1 constant. | 
|---|
| 1003 | // | 
|---|
| 1004 | // But don't do this if it would inhibit a potentially profitable load | 
|---|
| 1005 | // folding opportunity for the other operand. That only occurs with the | 
|---|
| 1006 | // intersection of: | 
|---|
| 1007 | // (1) The other operand (op0) is load foldable. | 
|---|
| 1008 | // (2) The op is an add (otherwise, we are *creating* an add and can still | 
|---|
| 1009 | //     load fold the other op). | 
|---|
| 1010 | // (3) The target has AVX (otherwise, we have a destructive add and can't | 
|---|
| 1011 | //     load fold the other op without killing the constant op). | 
|---|
| 1012 | // (4) The constant 1 vector has multiple uses (so it is profitable to load | 
|---|
| 1013 | //     into a register anyway). | 
|---|
| 1014 | auto mayPreventLoadFold = [&]() { | 
|---|
| 1015 | return X86::mayFoldLoad(Op: N->getOperand(Num: 0), Subtarget: *Subtarget) && | 
|---|
| 1016 | N->getOpcode() == ISD::ADD && Subtarget->hasAVX() && | 
|---|
| 1017 | !N->getOperand(Num: 1).hasOneUse(); | 
|---|
| 1018 | }; | 
|---|
| 1019 | if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && | 
|---|
| 1020 | N->getSimpleValueType(ResNo: 0).isVector() && !mayPreventLoadFold()) { | 
|---|
| 1021 | APInt SplatVal; | 
|---|
| 1022 | if (X86::isConstantSplat(Op: N->getOperand(Num: 1), SplatVal) && | 
|---|
| 1023 | SplatVal.isOne()) { | 
|---|
| 1024 | SDLoc DL(N); | 
|---|
| 1025 |  | 
|---|
| 1026 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1027 | unsigned NumElts = VT.getSizeInBits() / 32; | 
|---|
| 1028 | SDValue AllOnes = | 
|---|
| 1029 | CurDAG->getAllOnesConstant(DL, VT: MVT::getVectorVT(VT: MVT::i32, NumElements: NumElts)); | 
|---|
| 1030 | AllOnes = CurDAG->getBitcast(VT, V: AllOnes); | 
|---|
| 1031 |  | 
|---|
| 1032 | unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; | 
|---|
| 1033 | SDValue Res = | 
|---|
| 1034 | CurDAG->getNode(Opcode: NewOpcode, DL, VT, N1: N->getOperand(Num: 0), N2: AllOnes); | 
|---|
| 1035 | --I; | 
|---|
| 1036 | CurDAG->ReplaceAllUsesWith(From: N, To: Res.getNode()); | 
|---|
| 1037 | ++I; | 
|---|
| 1038 | MadeChange = true; | 
|---|
| 1039 | continue; | 
|---|
| 1040 | } | 
|---|
| 1041 | } | 
|---|
| 1042 |  | 
|---|
| 1043 | switch (N->getOpcode()) { | 
|---|
| 1044 | case X86ISD::VBROADCAST: { | 
|---|
| 1045 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1046 | // Emulate v32i16/v64i8 broadcast without BWI. | 
|---|
| 1047 | if (!Subtarget->hasBWI() && needBWI(VT)) { | 
|---|
| 1048 | MVT NarrowVT = VT.getHalfNumVectorElementsVT(); | 
|---|
| 1049 | SDLoc dl(N); | 
|---|
| 1050 | SDValue NarrowBCast = | 
|---|
| 1051 | CurDAG->getNode(Opcode: X86ISD::VBROADCAST, DL: dl, VT: NarrowVT, Operand: N->getOperand(Num: 0)); | 
|---|
| 1052 | SDValue Res = | 
|---|
| 1053 | CurDAG->getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: CurDAG->getUNDEF(VT), | 
|---|
| 1054 | N2: NarrowBCast, N3: CurDAG->getIntPtrConstant(Val: 0, DL: dl)); | 
|---|
| 1055 | unsigned Index = NarrowVT.getVectorMinNumElements(); | 
|---|
| 1056 | Res = CurDAG->getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: Res, N2: NarrowBCast, | 
|---|
| 1057 | N3: CurDAG->getIntPtrConstant(Val: Index, DL: dl)); | 
|---|
| 1058 |  | 
|---|
| 1059 | --I; | 
|---|
| 1060 | CurDAG->ReplaceAllUsesWith(From: N, To: Res.getNode()); | 
|---|
| 1061 | ++I; | 
|---|
| 1062 | MadeChange = true; | 
|---|
| 1063 | continue; | 
|---|
| 1064 | } | 
|---|
| 1065 |  | 
|---|
| 1066 | break; | 
|---|
| 1067 | } | 
|---|
| 1068 | case X86ISD::VBROADCAST_LOAD: { | 
|---|
| 1069 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1070 | // Emulate v32i16/v64i8 broadcast without BWI. | 
|---|
| 1071 | if (!Subtarget->hasBWI() && needBWI(VT)) { | 
|---|
| 1072 | MVT NarrowVT = VT.getHalfNumVectorElementsVT(); | 
|---|
| 1073 | auto *MemNode = cast<MemSDNode>(Val: N); | 
|---|
| 1074 | SDLoc dl(N); | 
|---|
| 1075 | SDVTList VTs = CurDAG->getVTList(VT1: NarrowVT, VT2: MVT::Other); | 
|---|
| 1076 | SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()}; | 
|---|
| 1077 | SDValue NarrowBCast = CurDAG->getMemIntrinsicNode( | 
|---|
| 1078 | Opcode: X86ISD::VBROADCAST_LOAD, dl, VTList: VTs, Ops, MemVT: MemNode->getMemoryVT(), | 
|---|
| 1079 | MMO: MemNode->getMemOperand()); | 
|---|
| 1080 | SDValue Res = | 
|---|
| 1081 | CurDAG->getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: CurDAG->getUNDEF(VT), | 
|---|
| 1082 | N2: NarrowBCast, N3: CurDAG->getIntPtrConstant(Val: 0, DL: dl)); | 
|---|
| 1083 | unsigned Index = NarrowVT.getVectorMinNumElements(); | 
|---|
| 1084 | Res = CurDAG->getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: Res, N2: NarrowBCast, | 
|---|
| 1085 | N3: CurDAG->getIntPtrConstant(Val: Index, DL: dl)); | 
|---|
| 1086 |  | 
|---|
| 1087 | --I; | 
|---|
| 1088 | SDValue To[] = {Res, NarrowBCast.getValue(R: 1)}; | 
|---|
| 1089 | CurDAG->ReplaceAllUsesWith(From: N, To); | 
|---|
| 1090 | ++I; | 
|---|
| 1091 | MadeChange = true; | 
|---|
| 1092 | continue; | 
|---|
| 1093 | } | 
|---|
| 1094 |  | 
|---|
| 1095 | break; | 
|---|
| 1096 | } | 
|---|
| 1097 | case ISD::LOAD: { | 
|---|
| 1098 | // If this is a XMM/YMM load of the same lower bits as another YMM/ZMM | 
|---|
| 1099 | // load, then just extract the lower subvector and avoid the second load. | 
|---|
| 1100 | auto *Ld = cast<LoadSDNode>(Val: N); | 
|---|
| 1101 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1102 | if (!ISD::isNormalLoad(N: Ld) || !Ld->isSimple() || | 
|---|
| 1103 | !(VT.is128BitVector() || VT.is256BitVector())) | 
|---|
| 1104 | break; | 
|---|
| 1105 |  | 
|---|
| 1106 | MVT MaxVT = VT; | 
|---|
| 1107 | SDNode *MaxLd = nullptr; | 
|---|
| 1108 | SDValue Ptr = Ld->getBasePtr(); | 
|---|
| 1109 | SDValue Chain = Ld->getChain(); | 
|---|
| 1110 | for (SDNode *User : Ptr->users()) { | 
|---|
| 1111 | auto *UserLd = dyn_cast<LoadSDNode>(Val: User); | 
|---|
| 1112 | MVT UserVT = User->getSimpleValueType(ResNo: 0); | 
|---|
| 1113 | if (User != N && UserLd && ISD::isNormalLoad(N: User) && | 
|---|
| 1114 | UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain && | 
|---|
| 1115 | !User->hasAnyUseOfValue(Value: 1) && | 
|---|
| 1116 | (UserVT.is256BitVector() || UserVT.is512BitVector()) && | 
|---|
| 1117 | UserVT.getSizeInBits() > VT.getSizeInBits() && | 
|---|
| 1118 | (!MaxLd || UserVT.getSizeInBits() > MaxVT.getSizeInBits())) { | 
|---|
| 1119 | MaxLd = User; | 
|---|
| 1120 | MaxVT = UserVT; | 
|---|
| 1121 | } | 
|---|
| 1122 | } | 
|---|
| 1123 | if (MaxLd) { | 
|---|
| 1124 | SDLoc dl(N); | 
|---|
| 1125 | unsigned NumSubElts = VT.getSizeInBits() / MaxVT.getScalarSizeInBits(); | 
|---|
| 1126 | MVT SubVT = MVT::getVectorVT(VT: MaxVT.getScalarType(), NumElements: NumSubElts); | 
|---|
| 1127 | SDValue  = CurDAG->getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubVT, | 
|---|
| 1128 | N1: SDValue(MaxLd, 0), | 
|---|
| 1129 | N2: CurDAG->getIntPtrConstant(Val: 0, DL: dl)); | 
|---|
| 1130 | SDValue Res = CurDAG->getBitcast(VT, V: Extract); | 
|---|
| 1131 |  | 
|---|
| 1132 | --I; | 
|---|
| 1133 | SDValue To[] = {Res, SDValue(MaxLd, 1)}; | 
|---|
| 1134 | CurDAG->ReplaceAllUsesWith(From: N, To); | 
|---|
| 1135 | ++I; | 
|---|
| 1136 | MadeChange = true; | 
|---|
| 1137 | continue; | 
|---|
| 1138 | } | 
|---|
| 1139 | break; | 
|---|
| 1140 | } | 
|---|
| 1141 | case ISD::VSELECT: { | 
|---|
| 1142 | // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG. | 
|---|
| 1143 | EVT EleVT = N->getOperand(Num: 0).getValueType().getVectorElementType(); | 
|---|
| 1144 | if (EleVT == MVT::i1) | 
|---|
| 1145 | break; | 
|---|
| 1146 |  | 
|---|
| 1147 | assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); | 
|---|
| 1148 | assert(N->getValueType(0).getVectorElementType() != MVT::i16 && | 
|---|
| 1149 | "We can't replace VSELECT with BLENDV in vXi16!"); | 
|---|
| 1150 | SDValue R; | 
|---|
| 1151 | if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(Op: N->getOperand(Num: 0)) == | 
|---|
| 1152 | EleVT.getSizeInBits()) { | 
|---|
| 1153 | R = CurDAG->getNode(Opcode: X86ISD::VPTERNLOG, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), | 
|---|
| 1154 | N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1), N3: N->getOperand(Num: 2), | 
|---|
| 1155 | N4: CurDAG->getTargetConstant(Val: 0xCA, DL: SDLoc(N), VT: MVT::i8)); | 
|---|
| 1156 | } else { | 
|---|
| 1157 | R = CurDAG->getNode(Opcode: X86ISD::BLENDV, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), | 
|---|
| 1158 | N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1), | 
|---|
| 1159 | N3: N->getOperand(Num: 2)); | 
|---|
| 1160 | } | 
|---|
| 1161 | --I; | 
|---|
| 1162 | CurDAG->ReplaceAllUsesWith(From: N, To: R.getNode()); | 
|---|
| 1163 | ++I; | 
|---|
| 1164 | MadeChange = true; | 
|---|
| 1165 | continue; | 
|---|
| 1166 | } | 
|---|
| 1167 | case ISD::FP_ROUND: | 
|---|
| 1168 | case ISD::STRICT_FP_ROUND: | 
|---|
| 1169 | case ISD::FP_TO_SINT: | 
|---|
| 1170 | case ISD::FP_TO_UINT: | 
|---|
| 1171 | case ISD::STRICT_FP_TO_SINT: | 
|---|
| 1172 | case ISD::STRICT_FP_TO_UINT: { | 
|---|
| 1173 | // Replace vector fp_to_s/uint with their X86 specific equivalent so we | 
|---|
| 1174 | // don't need 2 sets of patterns. | 
|---|
| 1175 | if (!N->getSimpleValueType(ResNo: 0).isVector()) | 
|---|
| 1176 | break; | 
|---|
| 1177 |  | 
|---|
| 1178 | unsigned NewOpc; | 
|---|
| 1179 | switch (N->getOpcode()) { | 
|---|
| 1180 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 1181 | case ISD::FP_ROUND:          NewOpc = X86ISD::VFPROUND;        break; | 
|---|
| 1182 | case ISD::STRICT_FP_ROUND:   NewOpc = X86ISD::STRICT_VFPROUND; break; | 
|---|
| 1183 | case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; | 
|---|
| 1184 | case ISD::FP_TO_SINT:        NewOpc = X86ISD::CVTTP2SI;        break; | 
|---|
| 1185 | case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; | 
|---|
| 1186 | case ISD::FP_TO_UINT:        NewOpc = X86ISD::CVTTP2UI;        break; | 
|---|
| 1187 | } | 
|---|
| 1188 | SDValue Res; | 
|---|
| 1189 | if (N->isStrictFPOpcode()) | 
|---|
| 1190 | Res = | 
|---|
| 1191 | CurDAG->getNode(Opcode: NewOpc, DL: SDLoc(N), ResultTys: {N->getValueType(ResNo: 0), MVT::Other}, | 
|---|
| 1192 | Ops: {N->getOperand(Num: 0), N->getOperand(Num: 1)}); | 
|---|
| 1193 | else | 
|---|
| 1194 | Res = | 
|---|
| 1195 | CurDAG->getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), | 
|---|
| 1196 | Operand: N->getOperand(Num: 0)); | 
|---|
| 1197 | --I; | 
|---|
| 1198 | CurDAG->ReplaceAllUsesWith(From: N, To: Res.getNode()); | 
|---|
| 1199 | ++I; | 
|---|
| 1200 | MadeChange = true; | 
|---|
| 1201 | continue; | 
|---|
| 1202 | } | 
|---|
| 1203 | case ISD::SHL: | 
|---|
| 1204 | case ISD::SRA: | 
|---|
| 1205 | case ISD::SRL: { | 
|---|
| 1206 | // Replace vector shifts with their X86 specific equivalent so we don't | 
|---|
| 1207 | // need 2 sets of patterns. | 
|---|
| 1208 | if (!N->getValueType(ResNo: 0).isVector()) | 
|---|
| 1209 | break; | 
|---|
| 1210 |  | 
|---|
| 1211 | unsigned NewOpc; | 
|---|
| 1212 | switch (N->getOpcode()) { | 
|---|
| 1213 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 1214 | case ISD::SHL: NewOpc = X86ISD::VSHLV; break; | 
|---|
| 1215 | case ISD::SRA: NewOpc = X86ISD::VSRAV; break; | 
|---|
| 1216 | case ISD::SRL: NewOpc = X86ISD::VSRLV; break; | 
|---|
| 1217 | } | 
|---|
| 1218 | SDValue Res = CurDAG->getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), | 
|---|
| 1219 | N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1)); | 
|---|
| 1220 | --I; | 
|---|
| 1221 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res); | 
|---|
| 1222 | ++I; | 
|---|
| 1223 | MadeChange = true; | 
|---|
| 1224 | continue; | 
|---|
| 1225 | } | 
|---|
| 1226 | case ISD::ANY_EXTEND: | 
|---|
| 1227 | case ISD::ANY_EXTEND_VECTOR_INREG: { | 
|---|
| 1228 | // Replace vector any extend with the zero extend equivalents so we don't | 
|---|
| 1229 | // need 2 sets of patterns. Ignore vXi1 extensions. | 
|---|
| 1230 | if (!N->getValueType(ResNo: 0).isVector()) | 
|---|
| 1231 | break; | 
|---|
| 1232 |  | 
|---|
| 1233 | unsigned NewOpc; | 
|---|
| 1234 | if (N->getOperand(Num: 0).getScalarValueSizeInBits() == 1) { | 
|---|
| 1235 | assert(N->getOpcode() == ISD::ANY_EXTEND && | 
|---|
| 1236 | "Unexpected opcode for mask vector!"); | 
|---|
| 1237 | NewOpc = ISD::SIGN_EXTEND; | 
|---|
| 1238 | } else { | 
|---|
| 1239 | NewOpc = N->getOpcode() == ISD::ANY_EXTEND | 
|---|
| 1240 | ? ISD::ZERO_EXTEND | 
|---|
| 1241 | : ISD::ZERO_EXTEND_VECTOR_INREG; | 
|---|
| 1242 | } | 
|---|
| 1243 |  | 
|---|
| 1244 | SDValue Res = CurDAG->getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), | 
|---|
| 1245 | Operand: N->getOperand(Num: 0)); | 
|---|
| 1246 | --I; | 
|---|
| 1247 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res); | 
|---|
| 1248 | ++I; | 
|---|
| 1249 | MadeChange = true; | 
|---|
| 1250 | continue; | 
|---|
| 1251 | } | 
|---|
| 1252 | case ISD::FCEIL: | 
|---|
| 1253 | case ISD::STRICT_FCEIL: | 
|---|
| 1254 | case ISD::FFLOOR: | 
|---|
| 1255 | case ISD::STRICT_FFLOOR: | 
|---|
| 1256 | case ISD::FTRUNC: | 
|---|
| 1257 | case ISD::STRICT_FTRUNC: | 
|---|
| 1258 | case ISD::FROUNDEVEN: | 
|---|
| 1259 | case ISD::STRICT_FROUNDEVEN: | 
|---|
| 1260 | case ISD::FNEARBYINT: | 
|---|
| 1261 | case ISD::STRICT_FNEARBYINT: | 
|---|
| 1262 | case ISD::FRINT: | 
|---|
| 1263 | case ISD::STRICT_FRINT: { | 
|---|
| 1264 | // Replace fp rounding with their X86 specific equivalent so we don't | 
|---|
| 1265 | // need 2 sets of patterns. | 
|---|
| 1266 | unsigned Imm; | 
|---|
| 1267 | switch (N->getOpcode()) { | 
|---|
| 1268 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 1269 | case ISD::STRICT_FCEIL: | 
|---|
| 1270 | case ISD::FCEIL:      Imm = 0xA; break; | 
|---|
| 1271 | case ISD::STRICT_FFLOOR: | 
|---|
| 1272 | case ISD::FFLOOR:     Imm = 0x9; break; | 
|---|
| 1273 | case ISD::STRICT_FTRUNC: | 
|---|
| 1274 | case ISD::FTRUNC:     Imm = 0xB; break; | 
|---|
| 1275 | case ISD::STRICT_FROUNDEVEN: | 
|---|
| 1276 | case ISD::FROUNDEVEN: Imm = 0x8; break; | 
|---|
| 1277 | case ISD::STRICT_FNEARBYINT: | 
|---|
| 1278 | case ISD::FNEARBYINT: Imm = 0xC; break; | 
|---|
| 1279 | case ISD::STRICT_FRINT: | 
|---|
| 1280 | case ISD::FRINT:      Imm = 0x4; break; | 
|---|
| 1281 | } | 
|---|
| 1282 | SDLoc dl(N); | 
|---|
| 1283 | bool IsStrict = N->isStrictFPOpcode(); | 
|---|
| 1284 | SDValue Res; | 
|---|
| 1285 | if (IsStrict) | 
|---|
| 1286 | Res = CurDAG->getNode(Opcode: X86ISD::STRICT_VRNDSCALE, DL: dl, | 
|---|
| 1287 | ResultTys: {N->getValueType(ResNo: 0), MVT::Other}, | 
|---|
| 1288 | Ops: {N->getOperand(Num: 0), N->getOperand(Num: 1), | 
|---|
| 1289 | CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32)}); | 
|---|
| 1290 | else | 
|---|
| 1291 | Res = CurDAG->getNode(Opcode: X86ISD::VRNDSCALE, DL: dl, VT: N->getValueType(ResNo: 0), | 
|---|
| 1292 | N1: N->getOperand(Num: 0), | 
|---|
| 1293 | N2: CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32)); | 
|---|
| 1294 | --I; | 
|---|
| 1295 | CurDAG->ReplaceAllUsesWith(From: N, To: Res.getNode()); | 
|---|
| 1296 | ++I; | 
|---|
| 1297 | MadeChange = true; | 
|---|
| 1298 | continue; | 
|---|
| 1299 | } | 
|---|
| 1300 | case X86ISD::FANDN: | 
|---|
| 1301 | case X86ISD::FAND: | 
|---|
| 1302 | case X86ISD::FOR: | 
|---|
| 1303 | case X86ISD::FXOR: { | 
|---|
| 1304 | // Widen scalar fp logic ops to vector to reduce isel patterns. | 
|---|
| 1305 | // FIXME: Can we do this during lowering/combine. | 
|---|
| 1306 | MVT VT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1307 | if (VT.isVector() || VT == MVT::f128) | 
|---|
| 1308 | break; | 
|---|
| 1309 |  | 
|---|
| 1310 | MVT VecVT = VT == MVT::f64   ? MVT::v2f64 | 
|---|
| 1311 | : VT == MVT::f32 ? MVT::v4f32 | 
|---|
| 1312 | : MVT::v8f16; | 
|---|
| 1313 |  | 
|---|
| 1314 | SDLoc dl(N); | 
|---|
| 1315 | SDValue Op0 = CurDAG->getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VecVT, | 
|---|
| 1316 | Operand: N->getOperand(Num: 0)); | 
|---|
| 1317 | SDValue Op1 = CurDAG->getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VecVT, | 
|---|
| 1318 | Operand: N->getOperand(Num: 1)); | 
|---|
| 1319 |  | 
|---|
| 1320 | SDValue Res; | 
|---|
| 1321 | if (Subtarget->hasSSE2()) { | 
|---|
| 1322 | EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); | 
|---|
| 1323 | Op0 = CurDAG->getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Op0); | 
|---|
| 1324 | Op1 = CurDAG->getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Op1); | 
|---|
| 1325 | unsigned Opc; | 
|---|
| 1326 | switch (N->getOpcode()) { | 
|---|
| 1327 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 1328 | case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; | 
|---|
| 1329 | case X86ISD::FAND:  Opc = ISD::AND;      break; | 
|---|
| 1330 | case X86ISD::FOR:   Opc = ISD::OR;       break; | 
|---|
| 1331 | case X86ISD::FXOR:  Opc = ISD::XOR;      break; | 
|---|
| 1332 | } | 
|---|
| 1333 | Res = CurDAG->getNode(Opcode: Opc, DL: dl, VT: IntVT, N1: Op0, N2: Op1); | 
|---|
| 1334 | Res = CurDAG->getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecVT, Operand: Res); | 
|---|
| 1335 | } else { | 
|---|
| 1336 | Res = CurDAG->getNode(Opcode: N->getOpcode(), DL: dl, VT: VecVT, N1: Op0, N2: Op1); | 
|---|
| 1337 | } | 
|---|
| 1338 | Res = CurDAG->getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT, N1: Res, | 
|---|
| 1339 | N2: CurDAG->getIntPtrConstant(Val: 0, DL: dl)); | 
|---|
| 1340 | --I; | 
|---|
| 1341 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res); | 
|---|
| 1342 | ++I; | 
|---|
| 1343 | MadeChange = true; | 
|---|
| 1344 | continue; | 
|---|
| 1345 | } | 
|---|
| 1346 | } | 
|---|
| 1347 |  | 
|---|
| 1348 | if (OptLevel != CodeGenOptLevel::None && | 
|---|
| 1349 | // Only do this when the target can fold the load into the call or | 
|---|
| 1350 | // jmp. | 
|---|
| 1351 | !Subtarget->useIndirectThunkCalls() && | 
|---|
| 1352 | ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || | 
|---|
| 1353 | (N->getOpcode() == X86ISD::TC_RETURN && | 
|---|
| 1354 | (Subtarget->is64Bit() || | 
|---|
| 1355 | !getTargetMachine().isPositionIndependent())))) { | 
|---|
| 1356 | /// Also try moving call address load from outside callseq_start to just | 
|---|
| 1357 | /// before the call to allow it to be folded. | 
|---|
| 1358 | /// | 
|---|
| 1359 | ///     [Load chain] | 
|---|
| 1360 | ///         ^ | 
|---|
| 1361 | ///         | | 
|---|
| 1362 | ///       [Load] | 
|---|
| 1363 | ///       ^    ^ | 
|---|
| 1364 | ///       |    | | 
|---|
| 1365 | ///      /      \-- | 
|---|
| 1366 | ///     /          | | 
|---|
| 1367 | ///[CALLSEQ_START] | | 
|---|
| 1368 | ///     ^          | | 
|---|
| 1369 | ///     |          | | 
|---|
| 1370 | /// [LOAD/C2Reg]   | | 
|---|
| 1371 | ///     |          | | 
|---|
| 1372 | ///      \        / | 
|---|
| 1373 | ///       \      / | 
|---|
| 1374 | ///       [CALL] | 
|---|
| 1375 | bool HasCallSeq = N->getOpcode() == X86ISD::CALL; | 
|---|
| 1376 | SDValue Chain = N->getOperand(Num: 0); | 
|---|
| 1377 | SDValue Load  = N->getOperand(Num: 1); | 
|---|
| 1378 | if (!isCalleeLoad(Callee: Load, Chain, HasCallSeq)) | 
|---|
| 1379 | continue; | 
|---|
| 1380 | moveBelowOrigChain(CurDAG, Load, Call: SDValue(N, 0), OrigChain: Chain); | 
|---|
| 1381 | ++NumLoadMoved; | 
|---|
| 1382 | MadeChange = true; | 
|---|
| 1383 | continue; | 
|---|
| 1384 | } | 
|---|
| 1385 |  | 
|---|
| 1386 | // Lower fpround and fpextend nodes that target the FP stack to be store and | 
|---|
| 1387 | // load to the stack.  This is a gross hack.  We would like to simply mark | 
|---|
| 1388 | // these as being illegal, but when we do that, legalize produces these when | 
|---|
| 1389 | // it expands calls, then expands these in the same legalize pass.  We would | 
|---|
| 1390 | // like dag combine to be able to hack on these between the call expansion | 
|---|
| 1391 | // and the node legalization.  As such this pass basically does "really | 
|---|
| 1392 | // late" legalization of these inline with the X86 isel pass. | 
|---|
| 1393 | // FIXME: This should only happen when not compiled with -O0. | 
|---|
| 1394 | switch (N->getOpcode()) { | 
|---|
| 1395 | default: continue; | 
|---|
| 1396 | case ISD::FP_ROUND: | 
|---|
| 1397 | case ISD::FP_EXTEND: | 
|---|
| 1398 | { | 
|---|
| 1399 | MVT SrcVT = N->getOperand(Num: 0).getSimpleValueType(); | 
|---|
| 1400 | MVT DstVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1401 |  | 
|---|
| 1402 | // If any of the sources are vectors, no fp stack involved. | 
|---|
| 1403 | if (SrcVT.isVector() || DstVT.isVector()) | 
|---|
| 1404 | continue; | 
|---|
| 1405 |  | 
|---|
| 1406 | // If the source and destination are SSE registers, then this is a legal | 
|---|
| 1407 | // conversion that should not be lowered. | 
|---|
| 1408 | const X86TargetLowering *X86Lowering = | 
|---|
| 1409 | static_cast<const X86TargetLowering *>(TLI); | 
|---|
| 1410 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(VT: SrcVT); | 
|---|
| 1411 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(VT: DstVT); | 
|---|
| 1412 | if (SrcIsSSE && DstIsSSE) | 
|---|
| 1413 | continue; | 
|---|
| 1414 |  | 
|---|
| 1415 | if (!SrcIsSSE && !DstIsSSE) { | 
|---|
| 1416 | // If this is an FPStack extension, it is a noop. | 
|---|
| 1417 | if (N->getOpcode() == ISD::FP_EXTEND) | 
|---|
| 1418 | continue; | 
|---|
| 1419 | // If this is a value-preserving FPStack truncation, it is a noop. | 
|---|
| 1420 | if (N->getConstantOperandVal(Num: 1)) | 
|---|
| 1421 | continue; | 
|---|
| 1422 | } | 
|---|
| 1423 |  | 
|---|
| 1424 | // Here we could have an FP stack truncation or an FPStack <-> SSE convert. | 
|---|
| 1425 | // FPStack has extload and truncstore.  SSE can fold direct loads into other | 
|---|
| 1426 | // operations.  Based on this, decide what we want to do. | 
|---|
| 1427 | MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT; | 
|---|
| 1428 | SDValue MemTmp = CurDAG->CreateStackTemporary(VT: MemVT); | 
|---|
| 1429 | int SPFI = cast<FrameIndexSDNode>(Val&: MemTmp)->getIndex(); | 
|---|
| 1430 | MachinePointerInfo MPI = | 
|---|
| 1431 | MachinePointerInfo::getFixedStack(MF&: CurDAG->getMachineFunction(), FI: SPFI); | 
|---|
| 1432 | SDLoc dl(N); | 
|---|
| 1433 |  | 
|---|
| 1434 | // FIXME: optimize the case where the src/dest is a load or store? | 
|---|
| 1435 |  | 
|---|
| 1436 | SDValue Store = CurDAG->getTruncStore( | 
|---|
| 1437 | Chain: CurDAG->getEntryNode(), dl, Val: N->getOperand(Num: 0), Ptr: MemTmp, PtrInfo: MPI, SVT: MemVT); | 
|---|
| 1438 | SDValue Result = CurDAG->getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: DstVT, Chain: Store, | 
|---|
| 1439 | Ptr: MemTmp, PtrInfo: MPI, MemVT); | 
|---|
| 1440 |  | 
|---|
| 1441 | // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the | 
|---|
| 1442 | // extload we created.  This will cause general havok on the dag because | 
|---|
| 1443 | // anything below the conversion could be folded into other existing nodes. | 
|---|
| 1444 | // To avoid invalidating 'I', back it up to the convert node. | 
|---|
| 1445 | --I; | 
|---|
| 1446 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result); | 
|---|
| 1447 | break; | 
|---|
| 1448 | } | 
|---|
| 1449 |  | 
|---|
| 1450 | //The sequence of events for lowering STRICT_FP versions of these nodes requires | 
|---|
| 1451 | //dealing with the chain differently, as there is already a preexisting chain. | 
|---|
| 1452 | case ISD::STRICT_FP_ROUND: | 
|---|
| 1453 | case ISD::STRICT_FP_EXTEND: | 
|---|
| 1454 | { | 
|---|
| 1455 | MVT SrcVT = N->getOperand(Num: 1).getSimpleValueType(); | 
|---|
| 1456 | MVT DstVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 1457 |  | 
|---|
| 1458 | // If any of the sources are vectors, no fp stack involved. | 
|---|
| 1459 | if (SrcVT.isVector() || DstVT.isVector()) | 
|---|
| 1460 | continue; | 
|---|
| 1461 |  | 
|---|
| 1462 | // If the source and destination are SSE registers, then this is a legal | 
|---|
| 1463 | // conversion that should not be lowered. | 
|---|
| 1464 | const X86TargetLowering *X86Lowering = | 
|---|
| 1465 | static_cast<const X86TargetLowering *>(TLI); | 
|---|
| 1466 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(VT: SrcVT); | 
|---|
| 1467 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(VT: DstVT); | 
|---|
| 1468 | if (SrcIsSSE && DstIsSSE) | 
|---|
| 1469 | continue; | 
|---|
| 1470 |  | 
|---|
| 1471 | if (!SrcIsSSE && !DstIsSSE) { | 
|---|
| 1472 | // If this is an FPStack extension, it is a noop. | 
|---|
| 1473 | if (N->getOpcode() == ISD::STRICT_FP_EXTEND) | 
|---|
| 1474 | continue; | 
|---|
| 1475 | // If this is a value-preserving FPStack truncation, it is a noop. | 
|---|
| 1476 | if (N->getConstantOperandVal(Num: 2)) | 
|---|
| 1477 | continue; | 
|---|
| 1478 | } | 
|---|
| 1479 |  | 
|---|
| 1480 | // Here we could have an FP stack truncation or an FPStack <-> SSE convert. | 
|---|
| 1481 | // FPStack has extload and truncstore.  SSE can fold direct loads into other | 
|---|
| 1482 | // operations.  Based on this, decide what we want to do. | 
|---|
| 1483 | MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT; | 
|---|
| 1484 | SDValue MemTmp = CurDAG->CreateStackTemporary(VT: MemVT); | 
|---|
| 1485 | int SPFI = cast<FrameIndexSDNode>(Val&: MemTmp)->getIndex(); | 
|---|
| 1486 | MachinePointerInfo MPI = | 
|---|
| 1487 | MachinePointerInfo::getFixedStack(MF&: CurDAG->getMachineFunction(), FI: SPFI); | 
|---|
| 1488 | SDLoc dl(N); | 
|---|
| 1489 |  | 
|---|
| 1490 | // FIXME: optimize the case where the src/dest is a load or store? | 
|---|
| 1491 |  | 
|---|
| 1492 | //Since the operation is StrictFP, use the preexisting chain. | 
|---|
| 1493 | SDValue Store, Result; | 
|---|
| 1494 | if (!SrcIsSSE) { | 
|---|
| 1495 | SDVTList VTs = CurDAG->getVTList(VT: MVT::Other); | 
|---|
| 1496 | SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), MemTmp}; | 
|---|
| 1497 | Store = CurDAG->getMemIntrinsicNode(Opcode: X86ISD::FST, dl, VTList: VTs, Ops, MemVT, | 
|---|
| 1498 | PtrInfo: MPI, /*Align*/ Alignment: std::nullopt, | 
|---|
| 1499 | Flags: MachineMemOperand::MOStore); | 
|---|
| 1500 | if (N->getFlags().hasNoFPExcept()) { | 
|---|
| 1501 | SDNodeFlags Flags = Store->getFlags(); | 
|---|
| 1502 | Flags.setNoFPExcept(true); | 
|---|
| 1503 | Store->setFlags(Flags); | 
|---|
| 1504 | } | 
|---|
| 1505 | } else { | 
|---|
| 1506 | assert(SrcVT == MemVT && "Unexpected VT!"); | 
|---|
| 1507 | Store = CurDAG->getStore(Chain: N->getOperand(Num: 0), dl, Val: N->getOperand(Num: 1), Ptr: MemTmp, | 
|---|
| 1508 | PtrInfo: MPI); | 
|---|
| 1509 | } | 
|---|
| 1510 |  | 
|---|
| 1511 | if (!DstIsSSE) { | 
|---|
| 1512 | SDVTList VTs = CurDAG->getVTList(VT1: DstVT, VT2: MVT::Other); | 
|---|
| 1513 | SDValue Ops[] = {Store, MemTmp}; | 
|---|
| 1514 | Result = CurDAG->getMemIntrinsicNode( | 
|---|
| 1515 | Opcode: X86ISD::FLD, dl, VTList: VTs, Ops, MemVT, PtrInfo: MPI, | 
|---|
| 1516 | /*Align*/ Alignment: std::nullopt, Flags: MachineMemOperand::MOLoad); | 
|---|
| 1517 | if (N->getFlags().hasNoFPExcept()) { | 
|---|
| 1518 | SDNodeFlags Flags = Result->getFlags(); | 
|---|
| 1519 | Flags.setNoFPExcept(true); | 
|---|
| 1520 | Result->setFlags(Flags); | 
|---|
| 1521 | } | 
|---|
| 1522 | } else { | 
|---|
| 1523 | assert(DstVT == MemVT && "Unexpected VT!"); | 
|---|
| 1524 | Result = CurDAG->getLoad(VT: DstVT, dl, Chain: Store, Ptr: MemTmp, PtrInfo: MPI); | 
|---|
| 1525 | } | 
|---|
| 1526 |  | 
|---|
| 1527 | // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the | 
|---|
| 1528 | // extload we created.  This will cause general havok on the dag because | 
|---|
| 1529 | // anything below the conversion could be folded into other existing nodes. | 
|---|
| 1530 | // To avoid invalidating 'I', back it up to the convert node. | 
|---|
| 1531 | --I; | 
|---|
| 1532 | CurDAG->ReplaceAllUsesWith(From: N, To: Result.getNode()); | 
|---|
| 1533 | break; | 
|---|
| 1534 | } | 
|---|
| 1535 | } | 
|---|
| 1536 |  | 
|---|
| 1537 |  | 
|---|
| 1538 | // Now that we did that, the node is dead.  Increment the iterator to the | 
|---|
| 1539 | // next node to process, then delete N. | 
|---|
| 1540 | ++I; | 
|---|
| 1541 | MadeChange = true; | 
|---|
| 1542 | } | 
|---|
| 1543 |  | 
|---|
| 1544 | // Remove any dead nodes that may have been left behind. | 
|---|
| 1545 | if (MadeChange) | 
|---|
| 1546 | CurDAG->RemoveDeadNodes(); | 
|---|
| 1547 | } | 
|---|
| 1548 |  | 
|---|
| 1549 | // Look for a redundant movzx/movsx that can occur after an 8-bit divrem. | 
|---|
| 1550 | bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { | 
|---|
| 1551 | unsigned Opc = N->getMachineOpcode(); | 
|---|
| 1552 | if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && | 
|---|
| 1553 | Opc != X86::MOVSX64rr8) | 
|---|
| 1554 | return false; | 
|---|
| 1555 |  | 
|---|
| 1556 | SDValue N0 = N->getOperand(Num: 0); | 
|---|
| 1557 |  | 
|---|
| 1558 | // We need to be extracting the lower bit of an extend. | 
|---|
| 1559 | if (!N0.isMachineOpcode() || | 
|---|
| 1560 | N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || | 
|---|
| 1561 | N0.getConstantOperandVal(i: 1) != X86::sub_8bit) | 
|---|
| 1562 | return false; | 
|---|
| 1563 |  | 
|---|
| 1564 | // We're looking for either a movsx or movzx to match the original opcode. | 
|---|
| 1565 | unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX | 
|---|
| 1566 | : X86::MOVSX32rr8_NOREX; | 
|---|
| 1567 | SDValue N00 = N0.getOperand(i: 0); | 
|---|
| 1568 | if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) | 
|---|
| 1569 | return false; | 
|---|
| 1570 |  | 
|---|
| 1571 | if (Opc == X86::MOVSX64rr8) { | 
|---|
| 1572 | // If we had a sign extend from 8 to 64 bits. We still need to go from 32 | 
|---|
| 1573 | // to 64. | 
|---|
| 1574 | MachineSDNode *Extend = CurDAG->getMachineNode(Opcode: X86::MOVSX64rr32, dl: SDLoc(N), | 
|---|
| 1575 | VT: MVT::i64, Op1: N00); | 
|---|
| 1576 | ReplaceUses(F: N, T: Extend); | 
|---|
| 1577 | } else { | 
|---|
| 1578 | // Ok we can drop this extend and just use the original extend. | 
|---|
| 1579 | ReplaceUses(F: N, T: N00.getNode()); | 
|---|
| 1580 | } | 
|---|
| 1581 |  | 
|---|
| 1582 | return true; | 
|---|
| 1583 | } | 
|---|
| 1584 |  | 
|---|
| 1585 | void X86DAGToDAGISel::PostprocessISelDAG() { | 
|---|
| 1586 | // Skip peepholes at -O0. | 
|---|
| 1587 | if (TM.getOptLevel() == CodeGenOptLevel::None) | 
|---|
| 1588 | return; | 
|---|
| 1589 |  | 
|---|
| 1590 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | 
|---|
| 1591 |  | 
|---|
| 1592 | bool MadeChange = false; | 
|---|
| 1593 | while (Position != CurDAG->allnodes_begin()) { | 
|---|
| 1594 | SDNode *N = &*--Position; | 
|---|
| 1595 | // Skip dead nodes and any non-machine opcodes. | 
|---|
| 1596 | if (N->use_empty() || !N->isMachineOpcode()) | 
|---|
| 1597 | continue; | 
|---|
| 1598 |  | 
|---|
| 1599 | if (tryOptimizeRem8Extend(N)) { | 
|---|
| 1600 | MadeChange = true; | 
|---|
| 1601 | continue; | 
|---|
| 1602 | } | 
|---|
| 1603 |  | 
|---|
| 1604 | unsigned Opc = N->getMachineOpcode(); | 
|---|
| 1605 | switch (Opc) { | 
|---|
| 1606 | default: | 
|---|
| 1607 | continue; | 
|---|
| 1608 | // ANDrr/rm + TESTrr+ -> TESTrr/TESTmr | 
|---|
| 1609 | case X86::TEST8rr: | 
|---|
| 1610 | case X86::TEST16rr: | 
|---|
| 1611 | case X86::TEST32rr: | 
|---|
| 1612 | case X86::TEST64rr: | 
|---|
| 1613 | // ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr | 
|---|
| 1614 | case X86::CTEST8rr: | 
|---|
| 1615 | case X86::CTEST16rr: | 
|---|
| 1616 | case X86::CTEST32rr: | 
|---|
| 1617 | case X86::CTEST64rr: { | 
|---|
| 1618 | auto &Op0 = N->getOperand(Num: 0); | 
|---|
| 1619 | if (Op0 != N->getOperand(Num: 1) || !Op0->hasNUsesOfValue(NUses: 2, Value: Op0.getResNo()) || | 
|---|
| 1620 | !Op0.isMachineOpcode()) | 
|---|
| 1621 | continue; | 
|---|
| 1622 | SDValue And = N->getOperand(Num: 0); | 
|---|
| 1623 | #define CASE_ND(OP)                                                            \ | 
|---|
| 1624 | case X86::OP:                                                                \ | 
|---|
| 1625 | case X86::OP##_ND: | 
|---|
| 1626 | switch (And.getMachineOpcode()) { | 
|---|
| 1627 | default: | 
|---|
| 1628 | continue; | 
|---|
| 1629 | CASE_ND(AND8rr) | 
|---|
| 1630 | CASE_ND(AND16rr) | 
|---|
| 1631 | CASE_ND(AND32rr) | 
|---|
| 1632 | CASE_ND(AND64rr) { | 
|---|
| 1633 | if (And->hasAnyUseOfValue(Value: 1)) | 
|---|
| 1634 | continue; | 
|---|
| 1635 | SmallVector<SDValue> Ops(N->op_values()); | 
|---|
| 1636 | Ops[0] = And.getOperand(i: 0); | 
|---|
| 1637 | Ops[1] = And.getOperand(i: 1); | 
|---|
| 1638 | MachineSDNode *Test = | 
|---|
| 1639 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: MVT::i32, Ops); | 
|---|
| 1640 | ReplaceUses(F: N, T: Test); | 
|---|
| 1641 | MadeChange = true; | 
|---|
| 1642 | continue; | 
|---|
| 1643 | } | 
|---|
| 1644 | CASE_ND(AND8rm) | 
|---|
| 1645 | CASE_ND(AND16rm) | 
|---|
| 1646 | CASE_ND(AND32rm) | 
|---|
| 1647 | CASE_ND(AND64rm) { | 
|---|
| 1648 | if (And->hasAnyUseOfValue(Value: 1)) | 
|---|
| 1649 | continue; | 
|---|
| 1650 | unsigned NewOpc; | 
|---|
| 1651 | bool IsCTESTCC = X86::isCTESTCC(Opcode: Opc); | 
|---|
| 1652 | #define FROM_TO(A, B)                                                          \ | 
|---|
| 1653 | CASE_ND(A) NewOpc = IsCTESTCC ? X86::C##B : X86::B;                          \ | 
|---|
| 1654 | break; | 
|---|
| 1655 | switch (And.getMachineOpcode()) { | 
|---|
| 1656 | FROM_TO(AND8rm, TEST8mr); | 
|---|
| 1657 | FROM_TO(AND16rm, TEST16mr); | 
|---|
| 1658 | FROM_TO(AND32rm, TEST32mr); | 
|---|
| 1659 | FROM_TO(AND64rm, TEST64mr); | 
|---|
| 1660 | } | 
|---|
| 1661 | #undef FROM_TO | 
|---|
| 1662 | #undef CASE_ND | 
|---|
| 1663 | // Need to swap the memory and register operand. | 
|---|
| 1664 | SmallVector<SDValue> Ops = {And.getOperand(i: 1), And.getOperand(i: 2), | 
|---|
| 1665 | And.getOperand(i: 3), And.getOperand(i: 4), | 
|---|
| 1666 | And.getOperand(i: 5), And.getOperand(i: 0)}; | 
|---|
| 1667 | // CC, Cflags. | 
|---|
| 1668 | if (IsCTESTCC) { | 
|---|
| 1669 | Ops.push_back(Elt: N->getOperand(Num: 2)); | 
|---|
| 1670 | Ops.push_back(Elt: N->getOperand(Num: 3)); | 
|---|
| 1671 | } | 
|---|
| 1672 | // Chain of memory load | 
|---|
| 1673 | Ops.push_back(Elt: And.getOperand(i: 6)); | 
|---|
| 1674 | // Glue | 
|---|
| 1675 | if (IsCTESTCC) | 
|---|
| 1676 | Ops.push_back(Elt: N->getOperand(Num: 4)); | 
|---|
| 1677 |  | 
|---|
| 1678 | MachineSDNode *Test = CurDAG->getMachineNode( | 
|---|
| 1679 | Opcode: NewOpc, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::Other, Ops); | 
|---|
| 1680 | CurDAG->setNodeMemRefs( | 
|---|
| 1681 | N: Test, NewMemRefs: cast<MachineSDNode>(Val: And.getNode())->memoperands()); | 
|---|
| 1682 | ReplaceUses(F: And.getValue(R: 2), T: SDValue(Test, 1)); | 
|---|
| 1683 | ReplaceUses(F: SDValue(N, 0), T: SDValue(Test, 0)); | 
|---|
| 1684 | MadeChange = true; | 
|---|
| 1685 | continue; | 
|---|
| 1686 | } | 
|---|
| 1687 | } | 
|---|
| 1688 | } | 
|---|
| 1689 | // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is | 
|---|
| 1690 | // used. We're doing this late so we can prefer to fold the AND into masked | 
|---|
| 1691 | // comparisons. Doing that can be better for the live range of the mask | 
|---|
| 1692 | // register. | 
|---|
| 1693 | case X86::KORTESTBkk: | 
|---|
| 1694 | case X86::KORTESTWkk: | 
|---|
| 1695 | case X86::KORTESTDkk: | 
|---|
| 1696 | case X86::KORTESTQkk: { | 
|---|
| 1697 | SDValue Op0 = N->getOperand(Num: 0); | 
|---|
| 1698 | if (Op0 != N->getOperand(Num: 1) || !N->isOnlyUserOf(N: Op0.getNode()) || | 
|---|
| 1699 | !Op0.isMachineOpcode() || !onlyUsesZeroFlag(Flags: SDValue(N, 0))) | 
|---|
| 1700 | continue; | 
|---|
| 1701 | #define CASE(A)                                                                \ | 
|---|
| 1702 | case X86::A:                                                                 \ | 
|---|
| 1703 | break; | 
|---|
| 1704 | switch (Op0.getMachineOpcode()) { | 
|---|
| 1705 | default: | 
|---|
| 1706 | continue; | 
|---|
| 1707 | CASE(KANDBkk) | 
|---|
| 1708 | CASE(KANDWkk) | 
|---|
| 1709 | CASE(KANDDkk) | 
|---|
| 1710 | CASE(KANDQkk) | 
|---|
| 1711 | } | 
|---|
| 1712 | unsigned NewOpc; | 
|---|
| 1713 | #define FROM_TO(A, B)                                                          \ | 
|---|
| 1714 | case X86::A:                                                                 \ | 
|---|
| 1715 | NewOpc = X86::B;                                                           \ | 
|---|
| 1716 | break; | 
|---|
| 1717 | switch (Opc) { | 
|---|
| 1718 | FROM_TO(KORTESTBkk, KTESTBkk) | 
|---|
| 1719 | FROM_TO(KORTESTWkk, KTESTWkk) | 
|---|
| 1720 | FROM_TO(KORTESTDkk, KTESTDkk) | 
|---|
| 1721 | FROM_TO(KORTESTQkk, KTESTQkk) | 
|---|
| 1722 | } | 
|---|
| 1723 | // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other | 
|---|
| 1724 | // KAND instructions and KTEST use the same ISA feature. | 
|---|
| 1725 | if (NewOpc == X86::KTESTWkk && !Subtarget->hasDQI()) | 
|---|
| 1726 | continue; | 
|---|
| 1727 | #undef FROM_TO | 
|---|
| 1728 | MachineSDNode *KTest = CurDAG->getMachineNode( | 
|---|
| 1729 | Opcode: NewOpc, dl: SDLoc(N), VT: MVT::i32, Op1: Op0.getOperand(i: 0), Op2: Op0.getOperand(i: 1)); | 
|---|
| 1730 | ReplaceUses(F: N, T: KTest); | 
|---|
| 1731 | MadeChange = true; | 
|---|
| 1732 | continue; | 
|---|
| 1733 | } | 
|---|
| 1734 | // Attempt to remove vectors moves that were inserted to zero upper bits. | 
|---|
| 1735 | case TargetOpcode::SUBREG_TO_REG: { | 
|---|
| 1736 | unsigned SubRegIdx = N->getConstantOperandVal(Num: 2); | 
|---|
| 1737 | if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm) | 
|---|
| 1738 | continue; | 
|---|
| 1739 |  | 
|---|
| 1740 | SDValue Move = N->getOperand(Num: 1); | 
|---|
| 1741 | if (!Move.isMachineOpcode()) | 
|---|
| 1742 | continue; | 
|---|
| 1743 |  | 
|---|
| 1744 | // Make sure its one of the move opcodes we recognize. | 
|---|
| 1745 | switch (Move.getMachineOpcode()) { | 
|---|
| 1746 | default: | 
|---|
| 1747 | continue; | 
|---|
| 1748 | CASE(VMOVAPDrr)       CASE(VMOVUPDrr) | 
|---|
| 1749 | CASE(VMOVAPSrr)       CASE(VMOVUPSrr) | 
|---|
| 1750 | CASE(VMOVDQArr)       CASE(VMOVDQUrr) | 
|---|
| 1751 | CASE(VMOVAPDYrr)      CASE(VMOVUPDYrr) | 
|---|
| 1752 | CASE(VMOVAPSYrr)      CASE(VMOVUPSYrr) | 
|---|
| 1753 | CASE(VMOVDQAYrr)      CASE(VMOVDQUYrr) | 
|---|
| 1754 | CASE(VMOVAPDZ128rr)   CASE(VMOVUPDZ128rr) | 
|---|
| 1755 | CASE(VMOVAPSZ128rr)   CASE(VMOVUPSZ128rr) | 
|---|
| 1756 | CASE(VMOVDQA32Z128rr) CASE(VMOVDQU32Z128rr) | 
|---|
| 1757 | CASE(VMOVDQA64Z128rr) CASE(VMOVDQU64Z128rr) | 
|---|
| 1758 | CASE(VMOVAPDZ256rr)   CASE(VMOVUPDZ256rr) | 
|---|
| 1759 | CASE(VMOVAPSZ256rr)   CASE(VMOVUPSZ256rr) | 
|---|
| 1760 | CASE(VMOVDQA32Z256rr) CASE(VMOVDQU32Z256rr) | 
|---|
| 1761 | CASE(VMOVDQA64Z256rr) CASE(VMOVDQU64Z256rr) | 
|---|
| 1762 | } | 
|---|
| 1763 | #undef CASE | 
|---|
| 1764 |  | 
|---|
| 1765 | SDValue In = Move.getOperand(i: 0); | 
|---|
| 1766 | if (!In.isMachineOpcode() || | 
|---|
| 1767 | In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END) | 
|---|
| 1768 | continue; | 
|---|
| 1769 |  | 
|---|
| 1770 | // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers | 
|---|
| 1771 | // the SHA instructions which use a legacy encoding. | 
|---|
| 1772 | uint64_t TSFlags = getInstrInfo()->get(Opcode: In.getMachineOpcode()).TSFlags; | 
|---|
| 1773 | if ((TSFlags & X86II::EncodingMask) != X86II::VEX && | 
|---|
| 1774 | (TSFlags & X86II::EncodingMask) != X86II::EVEX && | 
|---|
| 1775 | (TSFlags & X86II::EncodingMask) != X86II::XOP) | 
|---|
| 1776 | continue; | 
|---|
| 1777 |  | 
|---|
| 1778 | // Producing instruction is another vector instruction. We can drop the | 
|---|
| 1779 | // move. | 
|---|
| 1780 | CurDAG->UpdateNodeOperands(N, Op1: N->getOperand(Num: 0), Op2: In, Op3: N->getOperand(Num: 2)); | 
|---|
| 1781 | MadeChange = true; | 
|---|
| 1782 | } | 
|---|
| 1783 | } | 
|---|
| 1784 | } | 
|---|
| 1785 |  | 
|---|
| 1786 | if (MadeChange) | 
|---|
| 1787 | CurDAG->RemoveDeadNodes(); | 
|---|
| 1788 | } | 
|---|
| 1789 |  | 
|---|
| 1790 |  | 
|---|
| 1791 | /// Emit any code that needs to be executed only in the main function. | 
|---|
| 1792 | void X86DAGToDAGISel::emitSpecialCodeForMain() { | 
|---|
| 1793 | if (Subtarget->isTargetCygMing()) { | 
|---|
| 1794 | TargetLowering::ArgListTy Args; | 
|---|
| 1795 | auto &DL = CurDAG->getDataLayout(); | 
|---|
| 1796 |  | 
|---|
| 1797 | TargetLowering::CallLoweringInfo CLI(*CurDAG); | 
|---|
| 1798 | CLI.setChain(CurDAG->getRoot()) | 
|---|
| 1799 | .setCallee(CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *CurDAG->getContext()), | 
|---|
| 1800 | Target: CurDAG->getExternalSymbol(Sym: "__main", VT: TLI->getPointerTy(DL)), | 
|---|
| 1801 | ArgsList: std::move(Args)); | 
|---|
| 1802 | const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); | 
|---|
| 1803 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); | 
|---|
| 1804 | CurDAG->setRoot(Result.second); | 
|---|
| 1805 | } | 
|---|
| 1806 | } | 
|---|
| 1807 |  | 
|---|
| 1808 | void X86DAGToDAGISel::emitFunctionEntryCode() { | 
|---|
| 1809 | // If this is main, emit special code for main. | 
|---|
| 1810 | const Function &F = MF->getFunction(); | 
|---|
| 1811 | if (F.hasExternalLinkage() && F.getName() == "main") | 
|---|
| 1812 | emitSpecialCodeForMain(); | 
|---|
| 1813 | } | 
|---|
| 1814 |  | 
|---|
| 1815 | static bool isDispSafeForFrameIndexOrRegBase(int64_t Val) { | 
|---|
| 1816 | // We can run into an issue where a frame index or a register base | 
|---|
| 1817 | // includes a displacement that, when added to the explicit displacement, | 
|---|
| 1818 | // will overflow the displacement field. Assuming that the | 
|---|
| 1819 | // displacement fits into a 31-bit integer  (which is only slightly more | 
|---|
| 1820 | // aggressive than the current fundamental assumption that it fits into | 
|---|
| 1821 | // a 32-bit integer), a 31-bit disp should always be safe. | 
|---|
| 1822 | return isInt<31>(x: Val); | 
|---|
| 1823 | } | 
|---|
| 1824 |  | 
|---|
| 1825 | bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, | 
|---|
| 1826 | X86ISelAddressMode &AM) { | 
|---|
| 1827 | // We may have already matched a displacement and the caller just added the | 
|---|
| 1828 | // symbolic displacement. So we still need to do the checks even if Offset | 
|---|
| 1829 | // is zero. | 
|---|
| 1830 |  | 
|---|
| 1831 | int64_t Val = AM.Disp + Offset; | 
|---|
| 1832 |  | 
|---|
| 1833 | // Cannot combine ExternalSymbol displacements with integer offsets. | 
|---|
| 1834 | if (Val != 0 && (AM.ES || AM.MCSym)) | 
|---|
| 1835 | return true; | 
|---|
| 1836 |  | 
|---|
| 1837 | CodeModel::Model M = TM.getCodeModel(); | 
|---|
| 1838 | if (Subtarget->is64Bit()) { | 
|---|
| 1839 | if (Val != 0 && | 
|---|
| 1840 | !X86::isOffsetSuitableForCodeModel(Offset: Val, M, | 
|---|
| 1841 | hasSymbolicDisplacement: AM.hasSymbolicDisplacement())) | 
|---|
| 1842 | return true; | 
|---|
| 1843 | // In addition to the checks required for a register base, check that | 
|---|
| 1844 | // we do not try to use an unsafe Disp with a frame index. | 
|---|
| 1845 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && | 
|---|
| 1846 | !isDispSafeForFrameIndexOrRegBase(Val)) | 
|---|
| 1847 | return true; | 
|---|
| 1848 | // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to | 
|---|
| 1849 | // 64 bits. Instructions with 32-bit register addresses perform this zero | 
|---|
| 1850 | // extension for us and we can safely ignore the high bits of Offset. | 
|---|
| 1851 | // Instructions with only a 32-bit immediate address do not, though: they | 
|---|
| 1852 | // sign extend instead. This means only address the low 2GB of address space | 
|---|
| 1853 | // is directly addressable, we need indirect addressing for the high 2GB of | 
|---|
| 1854 | // address space. | 
|---|
| 1855 | // TODO: Some of the earlier checks may be relaxed for ILP32 mode as the | 
|---|
| 1856 | // implicit zero extension of instructions would cover up any problem. | 
|---|
| 1857 | // However, we have asserts elsewhere that get triggered if we do, so keep | 
|---|
| 1858 | // the checks for now. | 
|---|
| 1859 | // TODO: We would actually be able to accept these, as well as the same | 
|---|
| 1860 | // addresses in LP64 mode, by adding the EIZ pseudo-register as an operand | 
|---|
| 1861 | // to get an address size override to be emitted. However, this | 
|---|
| 1862 | // pseudo-register is not part of any register class and therefore causes | 
|---|
| 1863 | // MIR verification to fail. | 
|---|
| 1864 | if (Subtarget->isTarget64BitILP32() && | 
|---|
| 1865 | !isDispSafeForFrameIndexOrRegBase(Val: (uint32_t)Val) && | 
|---|
| 1866 | !AM.hasBaseOrIndexReg()) | 
|---|
| 1867 | return true; | 
|---|
| 1868 | } else if (AM.hasBaseOrIndexReg() && !isDispSafeForFrameIndexOrRegBase(Val)) | 
|---|
| 1869 | // For 32-bit X86, make sure the displacement still isn't close to the | 
|---|
| 1870 | // expressible limit. | 
|---|
| 1871 | return true; | 
|---|
| 1872 | AM.Disp = Val; | 
|---|
| 1873 | return false; | 
|---|
| 1874 | } | 
|---|
| 1875 |  | 
|---|
| 1876 | bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, | 
|---|
| 1877 | bool AllowSegmentRegForX32) { | 
|---|
| 1878 | SDValue Address = N->getOperand(Num: 1); | 
|---|
| 1879 |  | 
|---|
| 1880 | // load gs:0 -> GS segment register. | 
|---|
| 1881 | // load fs:0 -> FS segment register. | 
|---|
| 1882 | // | 
|---|
| 1883 | // This optimization is generally valid because the GNU TLS model defines that | 
|---|
| 1884 | // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode | 
|---|
| 1885 | // with 32-bit registers, as we get in ILP32 mode, those registers are first | 
|---|
| 1886 | // zero-extended to 64 bits and then added it to the base address, which gives | 
|---|
| 1887 | // unwanted results when the register holds a negative value. | 
|---|
| 1888 | // For more information see http://people.redhat.com/drepper/tls.pdf | 
|---|
| 1889 | if (isNullConstant(V: Address) && AM.Segment.getNode() == nullptr && | 
|---|
| 1890 | !IndirectTlsSegRefs && | 
|---|
| 1891 | (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || | 
|---|
| 1892 | Subtarget->isTargetFuchsia())) { | 
|---|
| 1893 | if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32) | 
|---|
| 1894 | return true; | 
|---|
| 1895 | switch (N->getPointerInfo().getAddrSpace()) { | 
|---|
| 1896 | case X86AS::GS: | 
|---|
| 1897 | AM.Segment = CurDAG->getRegister(Reg: X86::GS, VT: MVT::i16); | 
|---|
| 1898 | return false; | 
|---|
| 1899 | case X86AS::FS: | 
|---|
| 1900 | AM.Segment = CurDAG->getRegister(Reg: X86::FS, VT: MVT::i16); | 
|---|
| 1901 | return false; | 
|---|
| 1902 | // Address space X86AS::SS is not handled here, because it is not used to | 
|---|
| 1903 | // address TLS areas. | 
|---|
| 1904 | } | 
|---|
| 1905 | } | 
|---|
| 1906 |  | 
|---|
| 1907 | return true; | 
|---|
| 1908 | } | 
|---|
| 1909 |  | 
|---|
| 1910 | /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing | 
|---|
| 1911 | /// mode. These wrap things that will resolve down into a symbol reference. | 
|---|
| 1912 | /// If no match is possible, this returns true, otherwise it returns false. | 
|---|
| 1913 | bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { | 
|---|
| 1914 | // If the addressing mode already has a symbol as the displacement, we can | 
|---|
| 1915 | // never match another symbol. | 
|---|
| 1916 | if (AM.hasSymbolicDisplacement()) | 
|---|
| 1917 | return true; | 
|---|
| 1918 |  | 
|---|
| 1919 | bool IsRIPRelTLS = false; | 
|---|
| 1920 | bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; | 
|---|
| 1921 | if (IsRIPRel) { | 
|---|
| 1922 | SDValue Val = N.getOperand(i: 0); | 
|---|
| 1923 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) | 
|---|
| 1924 | IsRIPRelTLS = true; | 
|---|
| 1925 | } | 
|---|
| 1926 |  | 
|---|
| 1927 | // We can't use an addressing mode in the 64-bit large code model. | 
|---|
| 1928 | // Global TLS addressing is an exception. In the medium code model, | 
|---|
| 1929 | // we use can use a mode when RIP wrappers are present. | 
|---|
| 1930 | // That signifies access to globals that are known to be "near", | 
|---|
| 1931 | // such as the GOT itself. | 
|---|
| 1932 | CodeModel::Model M = TM.getCodeModel(); | 
|---|
| 1933 | if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS) | 
|---|
| 1934 | return true; | 
|---|
| 1935 |  | 
|---|
| 1936 | // Base and index reg must be 0 in order to use %rip as base. | 
|---|
| 1937 | if (IsRIPRel && AM.hasBaseOrIndexReg()) | 
|---|
| 1938 | return true; | 
|---|
| 1939 |  | 
|---|
| 1940 | // Make a local copy in case we can't do this fold. | 
|---|
| 1941 | X86ISelAddressMode Backup = AM; | 
|---|
| 1942 |  | 
|---|
| 1943 | int64_t Offset = 0; | 
|---|
| 1944 | SDValue N0 = N.getOperand(i: 0); | 
|---|
| 1945 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: N0)) { | 
|---|
| 1946 | AM.GV = G->getGlobal(); | 
|---|
| 1947 | AM.SymbolFlags = G->getTargetFlags(); | 
|---|
| 1948 | Offset = G->getOffset(); | 
|---|
| 1949 | } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(Val&: N0)) { | 
|---|
| 1950 | AM.CP = CP->getConstVal(); | 
|---|
| 1951 | AM.Alignment = CP->getAlign(); | 
|---|
| 1952 | AM.SymbolFlags = CP->getTargetFlags(); | 
|---|
| 1953 | Offset = CP->getOffset(); | 
|---|
| 1954 | } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Val&: N0)) { | 
|---|
| 1955 | AM.ES = S->getSymbol(); | 
|---|
| 1956 | AM.SymbolFlags = S->getTargetFlags(); | 
|---|
| 1957 | } else if (auto *S = dyn_cast<MCSymbolSDNode>(Val&: N0)) { | 
|---|
| 1958 | AM.MCSym = S->getMCSymbol(); | 
|---|
| 1959 | } else if (auto *J = dyn_cast<JumpTableSDNode>(Val&: N0)) { | 
|---|
| 1960 | AM.JT = J->getIndex(); | 
|---|
| 1961 | AM.SymbolFlags = J->getTargetFlags(); | 
|---|
| 1962 | } else if (auto *BA = dyn_cast<BlockAddressSDNode>(Val&: N0)) { | 
|---|
| 1963 | AM.BlockAddr = BA->getBlockAddress(); | 
|---|
| 1964 | AM.SymbolFlags = BA->getTargetFlags(); | 
|---|
| 1965 | Offset = BA->getOffset(); | 
|---|
| 1966 | } else | 
|---|
| 1967 | llvm_unreachable( "Unhandled symbol reference node."); | 
|---|
| 1968 |  | 
|---|
| 1969 | // Can't use an addressing mode with large globals. | 
|---|
| 1970 | if (Subtarget->is64Bit() && !IsRIPRel && AM.GV && | 
|---|
| 1971 | TM.isLargeGlobalValue(GV: AM.GV)) { | 
|---|
| 1972 | AM = Backup; | 
|---|
| 1973 | return true; | 
|---|
| 1974 | } | 
|---|
| 1975 |  | 
|---|
| 1976 | if (foldOffsetIntoAddress(Offset, AM)) { | 
|---|
| 1977 | AM = Backup; | 
|---|
| 1978 | return true; | 
|---|
| 1979 | } | 
|---|
| 1980 |  | 
|---|
| 1981 | if (IsRIPRel) | 
|---|
| 1982 | AM.setBaseReg(CurDAG->getRegister(Reg: X86::RIP, VT: MVT::i64)); | 
|---|
| 1983 |  | 
|---|
| 1984 | // Commit the changes now that we know this fold is safe. | 
|---|
| 1985 | return false; | 
|---|
| 1986 | } | 
|---|
| 1987 |  | 
|---|
| 1988 | /// Add the specified node to the specified addressing mode, returning true if | 
|---|
| 1989 | /// it cannot be done. This just pattern matches for the addressing mode. | 
|---|
| 1990 | bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { | 
|---|
| 1991 | if (matchAddressRecursively(N, AM, Depth: 0)) | 
|---|
| 1992 | return true; | 
|---|
| 1993 |  | 
|---|
| 1994 | // Post-processing: Make a second attempt to fold a load, if we now know | 
|---|
| 1995 | // that there will not be any other register. This is only performed for | 
|---|
| 1996 | // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded | 
|---|
| 1997 | // any foldable load the first time. | 
|---|
| 1998 | if (Subtarget->isTarget64BitILP32() && | 
|---|
| 1999 | AM.BaseType == X86ISelAddressMode::RegBase && | 
|---|
| 2000 | AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) { | 
|---|
| 2001 | SDValue Save_Base_Reg = AM.Base_Reg; | 
|---|
| 2002 | if (auto *LoadN = dyn_cast<LoadSDNode>(Val&: Save_Base_Reg)) { | 
|---|
| 2003 | AM.Base_Reg = SDValue(); | 
|---|
| 2004 | if (matchLoadInAddress(N: LoadN, AM, /*AllowSegmentRegForX32=*/true)) | 
|---|
| 2005 | AM.Base_Reg = Save_Base_Reg; | 
|---|
| 2006 | } | 
|---|
| 2007 | } | 
|---|
| 2008 |  | 
|---|
| 2009 | // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has | 
|---|
| 2010 | // a smaller encoding and avoids a scaled-index. | 
|---|
| 2011 | if (AM.Scale == 2 && | 
|---|
| 2012 | AM.BaseType == X86ISelAddressMode::RegBase && | 
|---|
| 2013 | AM.Base_Reg.getNode() == nullptr) { | 
|---|
| 2014 | AM.Base_Reg = AM.IndexReg; | 
|---|
| 2015 | AM.Scale = 1; | 
|---|
| 2016 | } | 
|---|
| 2017 |  | 
|---|
| 2018 | // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, | 
|---|
| 2019 | // because it has a smaller encoding. | 
|---|
| 2020 | if (TM.getCodeModel() != CodeModel::Large && | 
|---|
| 2021 | (!AM.GV || !TM.isLargeGlobalValue(GV: AM.GV)) && Subtarget->is64Bit() && | 
|---|
| 2022 | AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && | 
|---|
| 2023 | AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr && | 
|---|
| 2024 | AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) { | 
|---|
| 2025 | // However, when GV is a local function symbol and in the same section as | 
|---|
| 2026 | // the current instruction, and AM.Disp is negative and near INT32_MIN, | 
|---|
| 2027 | // referencing GV+Disp generates a relocation referencing the section symbol | 
|---|
| 2028 | // with an even smaller offset, which might underflow. We should bail out if | 
|---|
| 2029 | // the negative offset is too close to INT32_MIN. Actually, we are more | 
|---|
| 2030 | // conservative here, using a smaller magic number also used by | 
|---|
| 2031 | // isOffsetSuitableForCodeModel. | 
|---|
| 2032 | if (isa_and_nonnull<Function>(Val: AM.GV) && AM.Disp < -16 * 1024 * 1024) | 
|---|
| 2033 | return true; | 
|---|
| 2034 |  | 
|---|
| 2035 | AM.Base_Reg = CurDAG->getRegister(Reg: X86::RIP, VT: MVT::i64); | 
|---|
| 2036 | } | 
|---|
| 2037 |  | 
|---|
| 2038 | return false; | 
|---|
| 2039 | } | 
|---|
| 2040 |  | 
|---|
| 2041 | bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, | 
|---|
| 2042 | unsigned Depth) { | 
|---|
| 2043 | // Add an artificial use to this node so that we can keep track of | 
|---|
| 2044 | // it if it gets CSE'd with a different node. | 
|---|
| 2045 | HandleSDNode Handle(N); | 
|---|
| 2046 |  | 
|---|
| 2047 | X86ISelAddressMode Backup = AM; | 
|---|
| 2048 | if (!matchAddressRecursively(N: N.getOperand(i: 0), AM, Depth: Depth+1) && | 
|---|
| 2049 | !matchAddressRecursively(N: Handle.getValue().getOperand(i: 1), AM, Depth: Depth+1)) | 
|---|
| 2050 | return false; | 
|---|
| 2051 | AM = Backup; | 
|---|
| 2052 |  | 
|---|
| 2053 | // Try again after commutating the operands. | 
|---|
| 2054 | if (!matchAddressRecursively(N: Handle.getValue().getOperand(i: 1), AM, | 
|---|
| 2055 | Depth: Depth + 1) && | 
|---|
| 2056 | !matchAddressRecursively(N: Handle.getValue().getOperand(i: 0), AM, Depth: Depth + 1)) | 
|---|
| 2057 | return false; | 
|---|
| 2058 | AM = Backup; | 
|---|
| 2059 |  | 
|---|
| 2060 | // If we couldn't fold both operands into the address at the same time, | 
|---|
| 2061 | // see if we can just put each operand into a register and fold at least | 
|---|
| 2062 | // the add. | 
|---|
| 2063 | if (AM.BaseType == X86ISelAddressMode::RegBase && | 
|---|
| 2064 | !AM.Base_Reg.getNode() && | 
|---|
| 2065 | !AM.IndexReg.getNode()) { | 
|---|
| 2066 | N = Handle.getValue(); | 
|---|
| 2067 | AM.Base_Reg = N.getOperand(i: 0); | 
|---|
| 2068 | AM.IndexReg = N.getOperand(i: 1); | 
|---|
| 2069 | AM.Scale = 1; | 
|---|
| 2070 | return false; | 
|---|
| 2071 | } | 
|---|
| 2072 | N = Handle.getValue(); | 
|---|
| 2073 | return true; | 
|---|
| 2074 | } | 
|---|
| 2075 |  | 
|---|
| 2076 | // Insert a node into the DAG at least before the Pos node's position. This | 
|---|
| 2077 | // will reposition the node as needed, and will assign it a node ID that is <= | 
|---|
| 2078 | // the Pos node's ID. Note that this does *not* preserve the uniqueness of node | 
|---|
| 2079 | // IDs! The selection DAG must no longer depend on their uniqueness when this | 
|---|
| 2080 | // is used. | 
|---|
| 2081 | static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { | 
|---|
| 2082 | if (N->getNodeId() == -1 || | 
|---|
| 2083 | (SelectionDAGISel::getUninvalidatedNodeId(N: N.getNode()) > | 
|---|
| 2084 | SelectionDAGISel::getUninvalidatedNodeId(N: Pos.getNode()))) { | 
|---|
| 2085 | DAG.RepositionNode(Position: Pos->getIterator(), N: N.getNode()); | 
|---|
| 2086 | // Mark Node as invalid for pruning as after this it may be a successor to a | 
|---|
| 2087 | // selected node but otherwise be in the same position of Pos. | 
|---|
| 2088 | // Conservatively mark it with the same -abs(Id) to assure node id | 
|---|
| 2089 | // invariant is preserved. | 
|---|
| 2090 | N->setNodeId(Pos->getNodeId()); | 
|---|
| 2091 | SelectionDAGISel::InvalidateNodeId(N: N.getNode()); | 
|---|
| 2092 | } | 
|---|
| 2093 | } | 
|---|
| 2094 |  | 
|---|
| 2095 | // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if | 
|---|
| 2096 | // safe. This allows us to convert the shift and and into an h-register | 
|---|
| 2097 | // extract and a scaled index. Returns false if the simplification is | 
|---|
| 2098 | // performed. | 
|---|
| 2099 | static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, | 
|---|
| 2100 | uint64_t Mask, | 
|---|
| 2101 | SDValue Shift, SDValue X, | 
|---|
| 2102 | X86ISelAddressMode &AM) { | 
|---|
| 2103 | if (Shift.getOpcode() != ISD::SRL || | 
|---|
| 2104 | !isa<ConstantSDNode>(Val: Shift.getOperand(i: 1)) || | 
|---|
| 2105 | !Shift.hasOneUse()) | 
|---|
| 2106 | return true; | 
|---|
| 2107 |  | 
|---|
| 2108 | int ScaleLog = 8 - Shift.getConstantOperandVal(i: 1); | 
|---|
| 2109 | if (ScaleLog <= 0 || ScaleLog >= 4 || | 
|---|
| 2110 | Mask != (0xffu << ScaleLog)) | 
|---|
| 2111 | return true; | 
|---|
| 2112 |  | 
|---|
| 2113 | MVT XVT = X.getSimpleValueType(); | 
|---|
| 2114 | MVT VT = N.getSimpleValueType(); | 
|---|
| 2115 | SDLoc DL(N); | 
|---|
| 2116 | SDValue Eight = DAG.getConstant(Val: 8, DL, VT: MVT::i8); | 
|---|
| 2117 | SDValue NewMask = DAG.getConstant(Val: 0xff, DL, VT: XVT); | 
|---|
| 2118 | SDValue Srl = DAG.getNode(Opcode: ISD::SRL, DL, VT: XVT, N1: X, N2: Eight); | 
|---|
| 2119 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: XVT, N1: Srl, N2: NewMask); | 
|---|
| 2120 | SDValue Ext = DAG.getZExtOrTrunc(Op: And, DL, VT); | 
|---|
| 2121 | SDValue ShlCount = DAG.getConstant(Val: ScaleLog, DL, VT: MVT::i8); | 
|---|
| 2122 | SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Ext, N2: ShlCount); | 
|---|
| 2123 |  | 
|---|
| 2124 | // Insert the new nodes into the topological ordering. We must do this in | 
|---|
| 2125 | // a valid topological ordering as nothing is going to go back and re-sort | 
|---|
| 2126 | // these nodes. We continually insert before 'N' in sequence as this is | 
|---|
| 2127 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | 
|---|
| 2128 | // hierarchy left to express. | 
|---|
| 2129 | insertDAGNode(DAG, Pos: N, N: Eight); | 
|---|
| 2130 | insertDAGNode(DAG, Pos: N, N: NewMask); | 
|---|
| 2131 | insertDAGNode(DAG, Pos: N, N: Srl); | 
|---|
| 2132 | insertDAGNode(DAG, Pos: N, N: And); | 
|---|
| 2133 | insertDAGNode(DAG, Pos: N, N: Ext); | 
|---|
| 2134 | insertDAGNode(DAG, Pos: N, N: ShlCount); | 
|---|
| 2135 | insertDAGNode(DAG, Pos: N, N: Shl); | 
|---|
| 2136 | DAG.ReplaceAllUsesWith(From: N, To: Shl); | 
|---|
| 2137 | DAG.RemoveDeadNode(N: N.getNode()); | 
|---|
| 2138 | AM.IndexReg = Ext; | 
|---|
| 2139 | AM.Scale = (1 << ScaleLog); | 
|---|
| 2140 | return false; | 
|---|
| 2141 | } | 
|---|
| 2142 |  | 
|---|
| 2143 | // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this | 
|---|
| 2144 | // allows us to fold the shift into this addressing mode. Returns false if the | 
|---|
| 2145 | // transform succeeded. | 
|---|
| 2146 | static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, | 
|---|
| 2147 | X86ISelAddressMode &AM) { | 
|---|
| 2148 | SDValue Shift = N.getOperand(i: 0); | 
|---|
| 2149 |  | 
|---|
| 2150 | // Use a signed mask so that shifting right will insert sign bits. These | 
|---|
| 2151 | // bits will be removed when we shift the result left so it doesn't matter | 
|---|
| 2152 | // what we use. This might allow a smaller immediate encoding. | 
|---|
| 2153 | int64_t Mask = cast<ConstantSDNode>(Val: N->getOperand(Num: 1))->getSExtValue(); | 
|---|
| 2154 |  | 
|---|
| 2155 | // If we have an any_extend feeding the AND, look through it to see if there | 
|---|
| 2156 | // is a shift behind it. But only if the AND doesn't use the extended bits. | 
|---|
| 2157 | // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? | 
|---|
| 2158 | bool FoundAnyExtend = false; | 
|---|
| 2159 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && | 
|---|
| 2160 | Shift.getOperand(i: 0).getSimpleValueType() == MVT::i32 && | 
|---|
| 2161 | isUInt<32>(x: Mask)) { | 
|---|
| 2162 | FoundAnyExtend = true; | 
|---|
| 2163 | Shift = Shift.getOperand(i: 0); | 
|---|
| 2164 | } | 
|---|
| 2165 |  | 
|---|
| 2166 | if (Shift.getOpcode() != ISD::SHL || | 
|---|
| 2167 | !isa<ConstantSDNode>(Val: Shift.getOperand(i: 1))) | 
|---|
| 2168 | return true; | 
|---|
| 2169 |  | 
|---|
| 2170 | SDValue X = Shift.getOperand(i: 0); | 
|---|
| 2171 |  | 
|---|
| 2172 | // Not likely to be profitable if either the AND or SHIFT node has more | 
|---|
| 2173 | // than one use (unless all uses are for address computation). Besides, | 
|---|
| 2174 | // isel mechanism requires their node ids to be reused. | 
|---|
| 2175 | if (!N.hasOneUse() || !Shift.hasOneUse()) | 
|---|
| 2176 | return true; | 
|---|
| 2177 |  | 
|---|
| 2178 | // Verify that the shift amount is something we can fold. | 
|---|
| 2179 | unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1); | 
|---|
| 2180 | if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) | 
|---|
| 2181 | return true; | 
|---|
| 2182 |  | 
|---|
| 2183 | MVT VT = N.getSimpleValueType(); | 
|---|
| 2184 | SDLoc DL(N); | 
|---|
| 2185 | if (FoundAnyExtend) { | 
|---|
| 2186 | SDValue NewX = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: X); | 
|---|
| 2187 | insertDAGNode(DAG, Pos: N, N: NewX); | 
|---|
| 2188 | X = NewX; | 
|---|
| 2189 | } | 
|---|
| 2190 |  | 
|---|
| 2191 | SDValue NewMask = DAG.getSignedConstant(Val: Mask >> ShiftAmt, DL, VT); | 
|---|
| 2192 | SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: X, N2: NewMask); | 
|---|
| 2193 | SDValue NewShift = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NewAnd, N2: Shift.getOperand(i: 1)); | 
|---|
| 2194 |  | 
|---|
| 2195 | // Insert the new nodes into the topological ordering. We must do this in | 
|---|
| 2196 | // a valid topological ordering as nothing is going to go back and re-sort | 
|---|
| 2197 | // these nodes. We continually insert before 'N' in sequence as this is | 
|---|
| 2198 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | 
|---|
| 2199 | // hierarchy left to express. | 
|---|
| 2200 | insertDAGNode(DAG, Pos: N, N: NewMask); | 
|---|
| 2201 | insertDAGNode(DAG, Pos: N, N: NewAnd); | 
|---|
| 2202 | insertDAGNode(DAG, Pos: N, N: NewShift); | 
|---|
| 2203 | DAG.ReplaceAllUsesWith(From: N, To: NewShift); | 
|---|
| 2204 | DAG.RemoveDeadNode(N: N.getNode()); | 
|---|
| 2205 |  | 
|---|
| 2206 | AM.Scale = 1 << ShiftAmt; | 
|---|
| 2207 | AM.IndexReg = NewAnd; | 
|---|
| 2208 | return false; | 
|---|
| 2209 | } | 
|---|
| 2210 |  | 
|---|
| 2211 | // Implement some heroics to detect shifts of masked values where the mask can | 
|---|
| 2212 | // be replaced by extending the shift and undoing that in the addressing mode | 
|---|
| 2213 | // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and | 
|---|
| 2214 | // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in | 
|---|
| 2215 | // the addressing mode. This results in code such as: | 
|---|
| 2216 | // | 
|---|
| 2217 | //   int f(short *y, int *lookup_table) { | 
|---|
| 2218 | //     ... | 
|---|
| 2219 | //     return *y + lookup_table[*y >> 11]; | 
|---|
| 2220 | //   } | 
|---|
| 2221 | // | 
|---|
| 2222 | // Turning into: | 
|---|
| 2223 | //   movzwl (%rdi), %eax | 
|---|
| 2224 | //   movl %eax, %ecx | 
|---|
| 2225 | //   shrl $11, %ecx | 
|---|
| 2226 | //   addl (%rsi,%rcx,4), %eax | 
|---|
| 2227 | // | 
|---|
| 2228 | // Instead of: | 
|---|
| 2229 | //   movzwl (%rdi), %eax | 
|---|
| 2230 | //   movl %eax, %ecx | 
|---|
| 2231 | //   shrl $9, %ecx | 
|---|
| 2232 | //   andl $124, %rcx | 
|---|
| 2233 | //   addl (%rsi,%rcx), %eax | 
|---|
| 2234 | // | 
|---|
| 2235 | // Note that this function assumes the mask is provided as a mask *after* the | 
|---|
| 2236 | // value is shifted. The input chain may or may not match that, but computing | 
|---|
| 2237 | // such a mask is trivial. | 
|---|
| 2238 | static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, | 
|---|
| 2239 | uint64_t Mask, | 
|---|
| 2240 | SDValue Shift, SDValue X, | 
|---|
| 2241 | X86ISelAddressMode &AM) { | 
|---|
| 2242 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || | 
|---|
| 2243 | !isa<ConstantSDNode>(Val: Shift.getOperand(i: 1))) | 
|---|
| 2244 | return true; | 
|---|
| 2245 |  | 
|---|
| 2246 | // We need to ensure that mask is a continuous run of bits. | 
|---|
| 2247 | unsigned MaskIdx, MaskLen; | 
|---|
| 2248 | if (!isShiftedMask_64(Value: Mask, MaskIdx, MaskLen)) | 
|---|
| 2249 | return true; | 
|---|
| 2250 | unsigned MaskLZ = 64 - (MaskIdx + MaskLen); | 
|---|
| 2251 |  | 
|---|
| 2252 | unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1); | 
|---|
| 2253 |  | 
|---|
| 2254 | // The amount of shift we're trying to fit into the addressing mode is taken | 
|---|
| 2255 | // from the shifted mask index (number of trailing zeros of the mask). | 
|---|
| 2256 | unsigned AMShiftAmt = MaskIdx; | 
|---|
| 2257 |  | 
|---|
| 2258 | // There is nothing we can do here unless the mask is removing some bits. | 
|---|
| 2259 | // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. | 
|---|
| 2260 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; | 
|---|
| 2261 |  | 
|---|
| 2262 | // Scale the leading zero count down based on the actual size of the value. | 
|---|
| 2263 | // Also scale it down based on the size of the shift. | 
|---|
| 2264 | unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; | 
|---|
| 2265 | if (MaskLZ < ScaleDown) | 
|---|
| 2266 | return true; | 
|---|
| 2267 | MaskLZ -= ScaleDown; | 
|---|
| 2268 |  | 
|---|
| 2269 | // The final check is to ensure that any masked out high bits of X are | 
|---|
| 2270 | // already known to be zero. Otherwise, the mask has a semantic impact | 
|---|
| 2271 | // other than masking out a couple of low bits. Unfortunately, because of | 
|---|
| 2272 | // the mask, zero extensions will be removed from operands in some cases. | 
|---|
| 2273 | // This code works extra hard to look through extensions because we can | 
|---|
| 2274 | // replace them with zero extensions cheaply if necessary. | 
|---|
| 2275 | bool ReplacingAnyExtend = false; | 
|---|
| 2276 | if (X.getOpcode() == ISD::ANY_EXTEND) { | 
|---|
| 2277 | unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - | 
|---|
| 2278 | X.getOperand(i: 0).getSimpleValueType().getSizeInBits(); | 
|---|
| 2279 | // Assume that we'll replace the any-extend with a zero-extend, and | 
|---|
| 2280 | // narrow the search to the extended value. | 
|---|
| 2281 | X = X.getOperand(i: 0); | 
|---|
| 2282 | MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; | 
|---|
| 2283 | ReplacingAnyExtend = true; | 
|---|
| 2284 | } | 
|---|
| 2285 | APInt MaskedHighBits = | 
|---|
| 2286 | APInt::getHighBitsSet(numBits: X.getSimpleValueType().getSizeInBits(), hiBitsSet: MaskLZ); | 
|---|
| 2287 | if (!DAG.MaskedValueIsZero(Op: X, Mask: MaskedHighBits)) | 
|---|
| 2288 | return true; | 
|---|
| 2289 |  | 
|---|
| 2290 | // We've identified a pattern that can be transformed into a single shift | 
|---|
| 2291 | // and an addressing mode. Make it so. | 
|---|
| 2292 | MVT VT = N.getSimpleValueType(); | 
|---|
| 2293 | if (ReplacingAnyExtend) { | 
|---|
| 2294 | assert(X.getValueType() != VT); | 
|---|
| 2295 | // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. | 
|---|
| 2296 | SDValue NewX = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(X), VT, Operand: X); | 
|---|
| 2297 | insertDAGNode(DAG, Pos: N, N: NewX); | 
|---|
| 2298 | X = NewX; | 
|---|
| 2299 | } | 
|---|
| 2300 |  | 
|---|
| 2301 | MVT XVT = X.getSimpleValueType(); | 
|---|
| 2302 | SDLoc DL(N); | 
|---|
| 2303 | SDValue NewSRLAmt = DAG.getConstant(Val: ShiftAmt + AMShiftAmt, DL, VT: MVT::i8); | 
|---|
| 2304 | SDValue NewSRL = DAG.getNode(Opcode: ISD::SRL, DL, VT: XVT, N1: X, N2: NewSRLAmt); | 
|---|
| 2305 | SDValue NewExt = DAG.getZExtOrTrunc(Op: NewSRL, DL, VT); | 
|---|
| 2306 | SDValue NewSHLAmt = DAG.getConstant(Val: AMShiftAmt, DL, VT: MVT::i8); | 
|---|
| 2307 | SDValue NewSHL = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NewExt, N2: NewSHLAmt); | 
|---|
| 2308 |  | 
|---|
| 2309 | // Insert the new nodes into the topological ordering. We must do this in | 
|---|
| 2310 | // a valid topological ordering as nothing is going to go back and re-sort | 
|---|
| 2311 | // these nodes. We continually insert before 'N' in sequence as this is | 
|---|
| 2312 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | 
|---|
| 2313 | // hierarchy left to express. | 
|---|
| 2314 | insertDAGNode(DAG, Pos: N, N: NewSRLAmt); | 
|---|
| 2315 | insertDAGNode(DAG, Pos: N, N: NewSRL); | 
|---|
| 2316 | insertDAGNode(DAG, Pos: N, N: NewExt); | 
|---|
| 2317 | insertDAGNode(DAG, Pos: N, N: NewSHLAmt); | 
|---|
| 2318 | insertDAGNode(DAG, Pos: N, N: NewSHL); | 
|---|
| 2319 | DAG.ReplaceAllUsesWith(From: N, To: NewSHL); | 
|---|
| 2320 | DAG.RemoveDeadNode(N: N.getNode()); | 
|---|
| 2321 |  | 
|---|
| 2322 | AM.Scale = 1 << AMShiftAmt; | 
|---|
| 2323 | AM.IndexReg = NewExt; | 
|---|
| 2324 | return false; | 
|---|
| 2325 | } | 
|---|
| 2326 |  | 
|---|
| 2327 | // Transform "(X >> SHIFT) & (MASK << C1)" to | 
|---|
| 2328 | // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be | 
|---|
| 2329 | // matched to a BEXTR later. Returns false if the simplification is performed. | 
|---|
| 2330 | static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, | 
|---|
| 2331 | uint64_t Mask, | 
|---|
| 2332 | SDValue Shift, SDValue X, | 
|---|
| 2333 | X86ISelAddressMode &AM, | 
|---|
| 2334 | const X86Subtarget &Subtarget) { | 
|---|
| 2335 | if (Shift.getOpcode() != ISD::SRL || | 
|---|
| 2336 | !isa<ConstantSDNode>(Val: Shift.getOperand(i: 1)) || | 
|---|
| 2337 | !Shift.hasOneUse() || !N.hasOneUse()) | 
|---|
| 2338 | return true; | 
|---|
| 2339 |  | 
|---|
| 2340 | // Only do this if BEXTR will be matched by matchBEXTRFromAndImm. | 
|---|
| 2341 | if (!Subtarget.hasTBM() && | 
|---|
| 2342 | !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) | 
|---|
| 2343 | return true; | 
|---|
| 2344 |  | 
|---|
| 2345 | // We need to ensure that mask is a continuous run of bits. | 
|---|
| 2346 | unsigned MaskIdx, MaskLen; | 
|---|
| 2347 | if (!isShiftedMask_64(Value: Mask, MaskIdx, MaskLen)) | 
|---|
| 2348 | return true; | 
|---|
| 2349 |  | 
|---|
| 2350 | unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1); | 
|---|
| 2351 |  | 
|---|
| 2352 | // The amount of shift we're trying to fit into the addressing mode is taken | 
|---|
| 2353 | // from the shifted mask index (number of trailing zeros of the mask). | 
|---|
| 2354 | unsigned AMShiftAmt = MaskIdx; | 
|---|
| 2355 |  | 
|---|
| 2356 | // There is nothing we can do here unless the mask is removing some bits. | 
|---|
| 2357 | // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. | 
|---|
| 2358 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; | 
|---|
| 2359 |  | 
|---|
| 2360 | MVT XVT = X.getSimpleValueType(); | 
|---|
| 2361 | MVT VT = N.getSimpleValueType(); | 
|---|
| 2362 | SDLoc DL(N); | 
|---|
| 2363 | SDValue NewSRLAmt = DAG.getConstant(Val: ShiftAmt + AMShiftAmt, DL, VT: MVT::i8); | 
|---|
| 2364 | SDValue NewSRL = DAG.getNode(Opcode: ISD::SRL, DL, VT: XVT, N1: X, N2: NewSRLAmt); | 
|---|
| 2365 | SDValue NewMask = DAG.getConstant(Val: Mask >> AMShiftAmt, DL, VT: XVT); | 
|---|
| 2366 | SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL, VT: XVT, N1: NewSRL, N2: NewMask); | 
|---|
| 2367 | SDValue NewExt = DAG.getZExtOrTrunc(Op: NewAnd, DL, VT); | 
|---|
| 2368 | SDValue NewSHLAmt = DAG.getConstant(Val: AMShiftAmt, DL, VT: MVT::i8); | 
|---|
| 2369 | SDValue NewSHL = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: NewExt, N2: NewSHLAmt); | 
|---|
| 2370 |  | 
|---|
| 2371 | // Insert the new nodes into the topological ordering. We must do this in | 
|---|
| 2372 | // a valid topological ordering as nothing is going to go back and re-sort | 
|---|
| 2373 | // these nodes. We continually insert before 'N' in sequence as this is | 
|---|
| 2374 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | 
|---|
| 2375 | // hierarchy left to express. | 
|---|
| 2376 | insertDAGNode(DAG, Pos: N, N: NewSRLAmt); | 
|---|
| 2377 | insertDAGNode(DAG, Pos: N, N: NewSRL); | 
|---|
| 2378 | insertDAGNode(DAG, Pos: N, N: NewMask); | 
|---|
| 2379 | insertDAGNode(DAG, Pos: N, N: NewAnd); | 
|---|
| 2380 | insertDAGNode(DAG, Pos: N, N: NewExt); | 
|---|
| 2381 | insertDAGNode(DAG, Pos: N, N: NewSHLAmt); | 
|---|
| 2382 | insertDAGNode(DAG, Pos: N, N: NewSHL); | 
|---|
| 2383 | DAG.ReplaceAllUsesWith(From: N, To: NewSHL); | 
|---|
| 2384 | DAG.RemoveDeadNode(N: N.getNode()); | 
|---|
| 2385 |  | 
|---|
| 2386 | AM.Scale = 1 << AMShiftAmt; | 
|---|
| 2387 | AM.IndexReg = NewExt; | 
|---|
| 2388 | return false; | 
|---|
| 2389 | } | 
|---|
| 2390 |  | 
|---|
| 2391 | // Attempt to peek further into a scaled index register, collecting additional | 
|---|
| 2392 | // extensions / offsets / etc. Returns /p N if we can't peek any further. | 
|---|
| 2393 | SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N, | 
|---|
| 2394 | X86ISelAddressMode &AM, | 
|---|
| 2395 | unsigned Depth) { | 
|---|
| 2396 | assert(AM.IndexReg.getNode() == nullptr && "IndexReg already matched"); | 
|---|
| 2397 | assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) && | 
|---|
| 2398 | "Illegal index scale"); | 
|---|
| 2399 |  | 
|---|
| 2400 | // Limit recursion. | 
|---|
| 2401 | if (Depth >= SelectionDAG::MaxRecursionDepth) | 
|---|
| 2402 | return N; | 
|---|
| 2403 |  | 
|---|
| 2404 | EVT VT = N.getValueType(); | 
|---|
| 2405 | unsigned Opc = N.getOpcode(); | 
|---|
| 2406 |  | 
|---|
| 2407 | // index: add(x,c) -> index: x, disp + c | 
|---|
| 2408 | if (CurDAG->isBaseWithConstantOffset(Op: N)) { | 
|---|
| 2409 | auto *AddVal = cast<ConstantSDNode>(Val: N.getOperand(i: 1)); | 
|---|
| 2410 | uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale; | 
|---|
| 2411 | if (!foldOffsetIntoAddress(Offset, AM)) | 
|---|
| 2412 | return matchIndexRecursively(N: N.getOperand(i: 0), AM, Depth: Depth + 1); | 
|---|
| 2413 | } | 
|---|
| 2414 |  | 
|---|
| 2415 | // index: add(x,x) -> index: x, scale * 2 | 
|---|
| 2416 | if (Opc == ISD::ADD && N.getOperand(i: 0) == N.getOperand(i: 1)) { | 
|---|
| 2417 | if (AM.Scale <= 4) { | 
|---|
| 2418 | AM.Scale *= 2; | 
|---|
| 2419 | return matchIndexRecursively(N: N.getOperand(i: 0), AM, Depth: Depth + 1); | 
|---|
| 2420 | } | 
|---|
| 2421 | } | 
|---|
| 2422 |  | 
|---|
| 2423 | // index: shl(x,i) -> index: x, scale * (1 << i) | 
|---|
| 2424 | if (Opc == X86ISD::VSHLI) { | 
|---|
| 2425 | uint64_t ShiftAmt = N.getConstantOperandVal(i: 1); | 
|---|
| 2426 | uint64_t ScaleAmt = 1ULL << ShiftAmt; | 
|---|
| 2427 | if ((AM.Scale * ScaleAmt) <= 8) { | 
|---|
| 2428 | AM.Scale *= ScaleAmt; | 
|---|
| 2429 | return matchIndexRecursively(N: N.getOperand(i: 0), AM, Depth: Depth + 1); | 
|---|
| 2430 | } | 
|---|
| 2431 | } | 
|---|
| 2432 |  | 
|---|
| 2433 | // index: sext(add_nsw(x,c)) -> index: sext(x), disp + sext(c) | 
|---|
| 2434 | // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext? | 
|---|
| 2435 | if (Opc == ISD::SIGN_EXTEND && !VT.isVector() && N.hasOneUse()) { | 
|---|
| 2436 | SDValue Src = N.getOperand(i: 0); | 
|---|
| 2437 | if (Src.getOpcode() == ISD::ADD && Src->getFlags().hasNoSignedWrap() && | 
|---|
| 2438 | Src.hasOneUse()) { | 
|---|
| 2439 | if (CurDAG->isBaseWithConstantOffset(Op: Src)) { | 
|---|
| 2440 | SDValue AddSrc = Src.getOperand(i: 0); | 
|---|
| 2441 | auto *AddVal = cast<ConstantSDNode>(Val: Src.getOperand(i: 1)); | 
|---|
| 2442 | int64_t Offset = AddVal->getSExtValue(); | 
|---|
| 2443 | if (!foldOffsetIntoAddress(Offset: (uint64_t)Offset * AM.Scale, AM)) { | 
|---|
| 2444 | SDLoc DL(N); | 
|---|
| 2445 | SDValue ExtSrc = CurDAG->getNode(Opcode: Opc, DL, VT, Operand: AddSrc); | 
|---|
| 2446 | SDValue ExtVal = CurDAG->getSignedConstant(Val: Offset, DL, VT); | 
|---|
| 2447 | SDValue ExtAdd = CurDAG->getNode(Opcode: ISD::ADD, DL, VT, N1: ExtSrc, N2: ExtVal); | 
|---|
| 2448 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtSrc); | 
|---|
| 2449 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtVal); | 
|---|
| 2450 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtAdd); | 
|---|
| 2451 | CurDAG->ReplaceAllUsesWith(From: N, To: ExtAdd); | 
|---|
| 2452 | CurDAG->RemoveDeadNode(N: N.getNode()); | 
|---|
| 2453 | return ExtSrc; | 
|---|
| 2454 | } | 
|---|
| 2455 | } | 
|---|
| 2456 | } | 
|---|
| 2457 | } | 
|---|
| 2458 |  | 
|---|
| 2459 | // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c) | 
|---|
| 2460 | // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c) | 
|---|
| 2461 | // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext? | 
|---|
| 2462 | if (Opc == ISD::ZERO_EXTEND && !VT.isVector() && N.hasOneUse()) { | 
|---|
| 2463 | SDValue Src = N.getOperand(i: 0); | 
|---|
| 2464 | unsigned SrcOpc = Src.getOpcode(); | 
|---|
| 2465 | if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) || | 
|---|
| 2466 | CurDAG->isADDLike(Op: Src, /*NoWrap=*/true)) && | 
|---|
| 2467 | Src.hasOneUse()) { | 
|---|
| 2468 | if (CurDAG->isBaseWithConstantOffset(Op: Src)) { | 
|---|
| 2469 | SDValue AddSrc = Src.getOperand(i: 0); | 
|---|
| 2470 | uint64_t Offset = Src.getConstantOperandVal(i: 1); | 
|---|
| 2471 | if (!foldOffsetIntoAddress(Offset: Offset * AM.Scale, AM)) { | 
|---|
| 2472 | SDLoc DL(N); | 
|---|
| 2473 | SDValue Res; | 
|---|
| 2474 | // If we're also scaling, see if we can use that as well. | 
|---|
| 2475 | if (AddSrc.getOpcode() == ISD::SHL && | 
|---|
| 2476 | isa<ConstantSDNode>(Val: AddSrc.getOperand(i: 1))) { | 
|---|
| 2477 | SDValue ShVal = AddSrc.getOperand(i: 0); | 
|---|
| 2478 | uint64_t ShAmt = AddSrc.getConstantOperandVal(i: 1); | 
|---|
| 2479 | APInt HiBits = | 
|---|
| 2480 | APInt::getHighBitsSet(numBits: AddSrc.getScalarValueSizeInBits(), hiBitsSet: ShAmt); | 
|---|
| 2481 | uint64_t ScaleAmt = 1ULL << ShAmt; | 
|---|
| 2482 | if ((AM.Scale * ScaleAmt) <= 8 && | 
|---|
| 2483 | (AddSrc->getFlags().hasNoUnsignedWrap() || | 
|---|
| 2484 | CurDAG->MaskedValueIsZero(Op: ShVal, Mask: HiBits))) { | 
|---|
| 2485 | AM.Scale *= ScaleAmt; | 
|---|
| 2486 | SDValue ExtShVal = CurDAG->getNode(Opcode: Opc, DL, VT, Operand: ShVal); | 
|---|
| 2487 | SDValue ExtShift = CurDAG->getNode(Opcode: ISD::SHL, DL, VT, N1: ExtShVal, | 
|---|
| 2488 | N2: AddSrc.getOperand(i: 1)); | 
|---|
| 2489 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtShVal); | 
|---|
| 2490 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtShift); | 
|---|
| 2491 | AddSrc = ExtShift; | 
|---|
| 2492 | Res = ExtShVal; | 
|---|
| 2493 | } | 
|---|
| 2494 | } | 
|---|
| 2495 | SDValue ExtSrc = CurDAG->getNode(Opcode: Opc, DL, VT, Operand: AddSrc); | 
|---|
| 2496 | SDValue ExtVal = CurDAG->getConstant(Val: Offset, DL, VT); | 
|---|
| 2497 | SDValue ExtAdd = CurDAG->getNode(Opcode: SrcOpc, DL, VT, N1: ExtSrc, N2: ExtVal); | 
|---|
| 2498 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtSrc); | 
|---|
| 2499 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtVal); | 
|---|
| 2500 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: ExtAdd); | 
|---|
| 2501 | CurDAG->ReplaceAllUsesWith(From: N, To: ExtAdd); | 
|---|
| 2502 | CurDAG->RemoveDeadNode(N: N.getNode()); | 
|---|
| 2503 | return Res ? Res : ExtSrc; | 
|---|
| 2504 | } | 
|---|
| 2505 | } | 
|---|
| 2506 | } | 
|---|
| 2507 | } | 
|---|
| 2508 |  | 
|---|
| 2509 | // TODO: Handle extensions, shifted masks etc. | 
|---|
| 2510 | return N; | 
|---|
| 2511 | } | 
|---|
| 2512 |  | 
|---|
| 2513 | bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, | 
|---|
| 2514 | unsigned Depth) { | 
|---|
| 2515 | LLVM_DEBUG({ | 
|---|
| 2516 | dbgs() << "MatchAddress: "; | 
|---|
| 2517 | AM.dump(CurDAG); | 
|---|
| 2518 | }); | 
|---|
| 2519 | // Limit recursion. | 
|---|
| 2520 | if (Depth >= SelectionDAG::MaxRecursionDepth) | 
|---|
| 2521 | return matchAddressBase(N, AM); | 
|---|
| 2522 |  | 
|---|
| 2523 | // If this is already a %rip relative address, we can only merge immediates | 
|---|
| 2524 | // into it.  Instead of handling this in every case, we handle it here. | 
|---|
| 2525 | // RIP relative addressing: %rip + 32-bit displacement! | 
|---|
| 2526 | if (AM.isRIPRelative()) { | 
|---|
| 2527 | // FIXME: JumpTable and ExternalSymbol address currently don't like | 
|---|
| 2528 | // displacements.  It isn't very important, but this should be fixed for | 
|---|
| 2529 | // consistency. | 
|---|
| 2530 | if (!(AM.ES || AM.MCSym) && AM.JT != -1) | 
|---|
| 2531 | return true; | 
|---|
| 2532 |  | 
|---|
| 2533 | if (auto *Cst = dyn_cast<ConstantSDNode>(Val&: N)) | 
|---|
| 2534 | if (!foldOffsetIntoAddress(Offset: Cst->getSExtValue(), AM)) | 
|---|
| 2535 | return false; | 
|---|
| 2536 | return true; | 
|---|
| 2537 | } | 
|---|
| 2538 |  | 
|---|
| 2539 | switch (N.getOpcode()) { | 
|---|
| 2540 | default: break; | 
|---|
| 2541 | case ISD::LOCAL_RECOVER: { | 
|---|
| 2542 | if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) | 
|---|
| 2543 | if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(Val: N.getOperand(i: 0))) { | 
|---|
| 2544 | // Use the symbol and don't prefix it. | 
|---|
| 2545 | AM.MCSym = ESNode->getMCSymbol(); | 
|---|
| 2546 | return false; | 
|---|
| 2547 | } | 
|---|
| 2548 | break; | 
|---|
| 2549 | } | 
|---|
| 2550 | case ISD::Constant: { | 
|---|
| 2551 | uint64_t Val = cast<ConstantSDNode>(Val&: N)->getSExtValue(); | 
|---|
| 2552 | if (!foldOffsetIntoAddress(Offset: Val, AM)) | 
|---|
| 2553 | return false; | 
|---|
| 2554 | break; | 
|---|
| 2555 | } | 
|---|
| 2556 |  | 
|---|
| 2557 | case X86ISD::Wrapper: | 
|---|
| 2558 | case X86ISD::WrapperRIP: | 
|---|
| 2559 | if (!matchWrapper(N, AM)) | 
|---|
| 2560 | return false; | 
|---|
| 2561 | break; | 
|---|
| 2562 |  | 
|---|
| 2563 | case ISD::LOAD: | 
|---|
| 2564 | if (!matchLoadInAddress(N: cast<LoadSDNode>(Val&: N), AM)) | 
|---|
| 2565 | return false; | 
|---|
| 2566 | break; | 
|---|
| 2567 |  | 
|---|
| 2568 | case ISD::FrameIndex: | 
|---|
| 2569 | if (AM.BaseType == X86ISelAddressMode::RegBase && | 
|---|
| 2570 | AM.Base_Reg.getNode() == nullptr && | 
|---|
| 2571 | (!Subtarget->is64Bit() || isDispSafeForFrameIndexOrRegBase(Val: AM.Disp))) { | 
|---|
| 2572 | AM.BaseType = X86ISelAddressMode::FrameIndexBase; | 
|---|
| 2573 | AM.Base_FrameIndex = cast<FrameIndexSDNode>(Val&: N)->getIndex(); | 
|---|
| 2574 | return false; | 
|---|
| 2575 | } | 
|---|
| 2576 | break; | 
|---|
| 2577 |  | 
|---|
| 2578 | case ISD::SHL: | 
|---|
| 2579 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) | 
|---|
| 2580 | break; | 
|---|
| 2581 |  | 
|---|
| 2582 | if (auto *CN = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) { | 
|---|
| 2583 | unsigned Val = CN->getZExtValue(); | 
|---|
| 2584 | // Note that we handle x<<1 as (,x,2) rather than (x,x) here so | 
|---|
| 2585 | // that the base operand remains free for further matching. If | 
|---|
| 2586 | // the base doesn't end up getting used, a post-processing step | 
|---|
| 2587 | // in MatchAddress turns (,x,2) into (x,x), which is cheaper. | 
|---|
| 2588 | if (Val == 1 || Val == 2 || Val == 3) { | 
|---|
| 2589 | SDValue ShVal = N.getOperand(i: 0); | 
|---|
| 2590 | AM.Scale = 1 << Val; | 
|---|
| 2591 | AM.IndexReg = matchIndexRecursively(N: ShVal, AM, Depth: Depth + 1); | 
|---|
| 2592 | return false; | 
|---|
| 2593 | } | 
|---|
| 2594 | } | 
|---|
| 2595 | break; | 
|---|
| 2596 |  | 
|---|
| 2597 | case ISD::SRL: { | 
|---|
| 2598 | // Scale must not be used already. | 
|---|
| 2599 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; | 
|---|
| 2600 |  | 
|---|
| 2601 | // We only handle up to 64-bit values here as those are what matter for | 
|---|
| 2602 | // addressing mode optimizations. | 
|---|
| 2603 | assert(N.getSimpleValueType().getSizeInBits() <= 64 && | 
|---|
| 2604 | "Unexpected value size!"); | 
|---|
| 2605 |  | 
|---|
| 2606 | SDValue And = N.getOperand(i: 0); | 
|---|
| 2607 | if (And.getOpcode() != ISD::AND) break; | 
|---|
| 2608 | SDValue X = And.getOperand(i: 0); | 
|---|
| 2609 |  | 
|---|
| 2610 | // The mask used for the transform is expected to be post-shift, but we | 
|---|
| 2611 | // found the shift first so just apply the shift to the mask before passing | 
|---|
| 2612 | // it down. | 
|---|
| 2613 | if (!isa<ConstantSDNode>(Val: N.getOperand(i: 1)) || | 
|---|
| 2614 | !isa<ConstantSDNode>(Val: And.getOperand(i: 1))) | 
|---|
| 2615 | break; | 
|---|
| 2616 | uint64_t Mask = And.getConstantOperandVal(i: 1) >> N.getConstantOperandVal(i: 1); | 
|---|
| 2617 |  | 
|---|
| 2618 | // Try to fold the mask and shift into the scale, and return false if we | 
|---|
| 2619 | // succeed. | 
|---|
| 2620 | if (!foldMaskAndShiftToScale(DAG&: *CurDAG, N, Mask, Shift: N, X, AM)) | 
|---|
| 2621 | return false; | 
|---|
| 2622 | break; | 
|---|
| 2623 | } | 
|---|
| 2624 |  | 
|---|
| 2625 | case ISD::SMUL_LOHI: | 
|---|
| 2626 | case ISD::UMUL_LOHI: | 
|---|
| 2627 | // A mul_lohi where we need the low part can be folded as a plain multiply. | 
|---|
| 2628 | if (N.getResNo() != 0) break; | 
|---|
| 2629 | [[fallthrough]]; | 
|---|
| 2630 | case ISD::MUL: | 
|---|
| 2631 | case X86ISD::MUL_IMM: | 
|---|
| 2632 | // X*[3,5,9] -> X+X*[2,4,8] | 
|---|
| 2633 | if (AM.BaseType == X86ISelAddressMode::RegBase && | 
|---|
| 2634 | AM.Base_Reg.getNode() == nullptr && | 
|---|
| 2635 | AM.IndexReg.getNode() == nullptr) { | 
|---|
| 2636 | if (auto *CN = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) | 
|---|
| 2637 | if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || | 
|---|
| 2638 | CN->getZExtValue() == 9) { | 
|---|
| 2639 | AM.Scale = unsigned(CN->getZExtValue())-1; | 
|---|
| 2640 |  | 
|---|
| 2641 | SDValue MulVal = N.getOperand(i: 0); | 
|---|
| 2642 | SDValue Reg; | 
|---|
| 2643 |  | 
|---|
| 2644 | // Okay, we know that we have a scale by now.  However, if the scaled | 
|---|
| 2645 | // value is an add of something and a constant, we can fold the | 
|---|
| 2646 | // constant into the disp field here. | 
|---|
| 2647 | if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && | 
|---|
| 2648 | isa<ConstantSDNode>(Val: MulVal.getOperand(i: 1))) { | 
|---|
| 2649 | Reg = MulVal.getOperand(i: 0); | 
|---|
| 2650 | auto *AddVal = cast<ConstantSDNode>(Val: MulVal.getOperand(i: 1)); | 
|---|
| 2651 | uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); | 
|---|
| 2652 | if (foldOffsetIntoAddress(Offset: Disp, AM)) | 
|---|
| 2653 | Reg = N.getOperand(i: 0); | 
|---|
| 2654 | } else { | 
|---|
| 2655 | Reg = N.getOperand(i: 0); | 
|---|
| 2656 | } | 
|---|
| 2657 |  | 
|---|
| 2658 | AM.IndexReg = AM.Base_Reg = Reg; | 
|---|
| 2659 | return false; | 
|---|
| 2660 | } | 
|---|
| 2661 | } | 
|---|
| 2662 | break; | 
|---|
| 2663 |  | 
|---|
| 2664 | case ISD::SUB: { | 
|---|
| 2665 | // Given A-B, if A can be completely folded into the address and | 
|---|
| 2666 | // the index field with the index field unused, use -B as the index. | 
|---|
| 2667 | // This is a win if a has multiple parts that can be folded into | 
|---|
| 2668 | // the address. Also, this saves a mov if the base register has | 
|---|
| 2669 | // other uses, since it avoids a two-address sub instruction, however | 
|---|
| 2670 | // it costs an additional mov if the index register has other uses. | 
|---|
| 2671 |  | 
|---|
| 2672 | // Add an artificial use to this node so that we can keep track of | 
|---|
| 2673 | // it if it gets CSE'd with a different node. | 
|---|
| 2674 | HandleSDNode Handle(N); | 
|---|
| 2675 |  | 
|---|
| 2676 | // Test if the LHS of the sub can be folded. | 
|---|
| 2677 | X86ISelAddressMode Backup = AM; | 
|---|
| 2678 | if (matchAddressRecursively(N: N.getOperand(i: 0), AM, Depth: Depth+1)) { | 
|---|
| 2679 | N = Handle.getValue(); | 
|---|
| 2680 | AM = Backup; | 
|---|
| 2681 | break; | 
|---|
| 2682 | } | 
|---|
| 2683 | N = Handle.getValue(); | 
|---|
| 2684 | // Test if the index field is free for use. | 
|---|
| 2685 | if (AM.IndexReg.getNode() || AM.isRIPRelative()) { | 
|---|
| 2686 | AM = Backup; | 
|---|
| 2687 | break; | 
|---|
| 2688 | } | 
|---|
| 2689 |  | 
|---|
| 2690 | int Cost = 0; | 
|---|
| 2691 | SDValue RHS = N.getOperand(i: 1); | 
|---|
| 2692 | // If the RHS involves a register with multiple uses, this | 
|---|
| 2693 | // transformation incurs an extra mov, due to the neg instruction | 
|---|
| 2694 | // clobbering its operand. | 
|---|
| 2695 | if (!RHS.getNode()->hasOneUse() || | 
|---|
| 2696 | RHS.getNode()->getOpcode() == ISD::CopyFromReg || | 
|---|
| 2697 | RHS.getNode()->getOpcode() == ISD::TRUNCATE || | 
|---|
| 2698 | RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || | 
|---|
| 2699 | (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && | 
|---|
| 2700 | RHS.getOperand(i: 0).getValueType() == MVT::i32)) | 
|---|
| 2701 | ++Cost; | 
|---|
| 2702 | // If the base is a register with multiple uses, this | 
|---|
| 2703 | // transformation may save a mov. | 
|---|
| 2704 | if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && | 
|---|
| 2705 | !AM.Base_Reg.getNode()->hasOneUse()) || | 
|---|
| 2706 | AM.BaseType == X86ISelAddressMode::FrameIndexBase) | 
|---|
| 2707 | --Cost; | 
|---|
| 2708 | // If the folded LHS was interesting, this transformation saves | 
|---|
| 2709 | // address arithmetic. | 
|---|
| 2710 | if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + | 
|---|
| 2711 | ((AM.Disp != 0) && (Backup.Disp == 0)) + | 
|---|
| 2712 | (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) | 
|---|
| 2713 | --Cost; | 
|---|
| 2714 | // If it doesn't look like it may be an overall win, don't do it. | 
|---|
| 2715 | if (Cost >= 0) { | 
|---|
| 2716 | AM = Backup; | 
|---|
| 2717 | break; | 
|---|
| 2718 | } | 
|---|
| 2719 |  | 
|---|
| 2720 | // Ok, the transformation is legal and appears profitable. Go for it. | 
|---|
| 2721 | // Negation will be emitted later to avoid creating dangling nodes if this | 
|---|
| 2722 | // was an unprofitable LEA. | 
|---|
| 2723 | AM.IndexReg = RHS; | 
|---|
| 2724 | AM.NegateIndex = true; | 
|---|
| 2725 | AM.Scale = 1; | 
|---|
| 2726 | return false; | 
|---|
| 2727 | } | 
|---|
| 2728 |  | 
|---|
| 2729 | case ISD::OR: | 
|---|
| 2730 | case ISD::XOR: | 
|---|
| 2731 | // See if we can treat the OR/XOR node as an ADD node. | 
|---|
| 2732 | if (!CurDAG->isADDLike(Op: N)) | 
|---|
| 2733 | break; | 
|---|
| 2734 | [[fallthrough]]; | 
|---|
| 2735 | case ISD::ADD: | 
|---|
| 2736 | if (!matchAdd(N, AM, Depth)) | 
|---|
| 2737 | return false; | 
|---|
| 2738 | break; | 
|---|
| 2739 |  | 
|---|
| 2740 | case ISD::AND: { | 
|---|
| 2741 | // Perform some heroic transforms on an and of a constant-count shift | 
|---|
| 2742 | // with a constant to enable use of the scaled offset field. | 
|---|
| 2743 |  | 
|---|
| 2744 | // Scale must not be used already. | 
|---|
| 2745 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; | 
|---|
| 2746 |  | 
|---|
| 2747 | // We only handle up to 64-bit values here as those are what matter for | 
|---|
| 2748 | // addressing mode optimizations. | 
|---|
| 2749 | assert(N.getSimpleValueType().getSizeInBits() <= 64 && | 
|---|
| 2750 | "Unexpected value size!"); | 
|---|
| 2751 |  | 
|---|
| 2752 | if (!isa<ConstantSDNode>(Val: N.getOperand(i: 1))) | 
|---|
| 2753 | break; | 
|---|
| 2754 |  | 
|---|
| 2755 | if (N.getOperand(i: 0).getOpcode() == ISD::SRL) { | 
|---|
| 2756 | SDValue Shift = N.getOperand(i: 0); | 
|---|
| 2757 | SDValue X = Shift.getOperand(i: 0); | 
|---|
| 2758 |  | 
|---|
| 2759 | uint64_t Mask = N.getConstantOperandVal(i: 1); | 
|---|
| 2760 |  | 
|---|
| 2761 | // Try to fold the mask and shift into an extract and scale. | 
|---|
| 2762 | if (!foldMaskAndShiftToExtract(DAG&: *CurDAG, N, Mask, Shift, X, AM)) | 
|---|
| 2763 | return false; | 
|---|
| 2764 |  | 
|---|
| 2765 | // Try to fold the mask and shift directly into the scale. | 
|---|
| 2766 | if (!foldMaskAndShiftToScale(DAG&: *CurDAG, N, Mask, Shift, X, AM)) | 
|---|
| 2767 | return false; | 
|---|
| 2768 |  | 
|---|
| 2769 | // Try to fold the mask and shift into BEXTR and scale. | 
|---|
| 2770 | if (!foldMaskedShiftToBEXTR(DAG&: *CurDAG, N, Mask, Shift, X, AM, Subtarget: *Subtarget)) | 
|---|
| 2771 | return false; | 
|---|
| 2772 | } | 
|---|
| 2773 |  | 
|---|
| 2774 | // Try to swap the mask and shift to place shifts which can be done as | 
|---|
| 2775 | // a scale on the outside of the mask. | 
|---|
| 2776 | if (!foldMaskedShiftToScaledMask(DAG&: *CurDAG, N, AM)) | 
|---|
| 2777 | return false; | 
|---|
| 2778 |  | 
|---|
| 2779 | break; | 
|---|
| 2780 | } | 
|---|
| 2781 | case ISD::ZERO_EXTEND: { | 
|---|
| 2782 | // Try to widen a zexted shift left to the same size as its use, so we can | 
|---|
| 2783 | // match the shift as a scale factor. | 
|---|
| 2784 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) | 
|---|
| 2785 | break; | 
|---|
| 2786 |  | 
|---|
| 2787 | SDValue Src = N.getOperand(i: 0); | 
|---|
| 2788 |  | 
|---|
| 2789 | // See if we can match a zext(addlike(x,c)). | 
|---|
| 2790 | // TODO: Move more ZERO_EXTEND patterns into matchIndexRecursively. | 
|---|
| 2791 | if (Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::OR) | 
|---|
| 2792 | if (SDValue Index = matchIndexRecursively(N, AM, Depth: Depth + 1)) | 
|---|
| 2793 | if (Index != N) { | 
|---|
| 2794 | AM.IndexReg = Index; | 
|---|
| 2795 | return false; | 
|---|
| 2796 | } | 
|---|
| 2797 |  | 
|---|
| 2798 | // Peek through mask: zext(and(shl(x,c1),c2)) | 
|---|
| 2799 | APInt Mask = APInt::getAllOnes(numBits: Src.getScalarValueSizeInBits()); | 
|---|
| 2800 | if (Src.getOpcode() == ISD::AND && Src.hasOneUse()) | 
|---|
| 2801 | if (auto *MaskC = dyn_cast<ConstantSDNode>(Val: Src.getOperand(i: 1))) { | 
|---|
| 2802 | Mask = MaskC->getAPIntValue(); | 
|---|
| 2803 | Src = Src.getOperand(i: 0); | 
|---|
| 2804 | } | 
|---|
| 2805 |  | 
|---|
| 2806 | if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) { | 
|---|
| 2807 | // Give up if the shift is not a valid scale factor [1,2,3]. | 
|---|
| 2808 | SDValue ShlSrc = Src.getOperand(i: 0); | 
|---|
| 2809 | SDValue ShlAmt = Src.getOperand(i: 1); | 
|---|
| 2810 | auto *ShAmtC = dyn_cast<ConstantSDNode>(Val&: ShlAmt); | 
|---|
| 2811 | if (!ShAmtC) | 
|---|
| 2812 | break; | 
|---|
| 2813 | unsigned ShAmtV = ShAmtC->getZExtValue(); | 
|---|
| 2814 | if (ShAmtV > 3) | 
|---|
| 2815 | break; | 
|---|
| 2816 |  | 
|---|
| 2817 | // The narrow shift must only shift out zero bits (it must be 'nuw'). | 
|---|
| 2818 | // That makes it safe to widen to the destination type. | 
|---|
| 2819 | APInt HighZeros = | 
|---|
| 2820 | APInt::getHighBitsSet(numBits: ShlSrc.getValueSizeInBits(), hiBitsSet: ShAmtV); | 
|---|
| 2821 | if (!Src->getFlags().hasNoUnsignedWrap() && | 
|---|
| 2822 | !CurDAG->MaskedValueIsZero(Op: ShlSrc, Mask: HighZeros & Mask)) | 
|---|
| 2823 | break; | 
|---|
| 2824 |  | 
|---|
| 2825 | // zext (shl nuw i8 %x, C1) to i32 | 
|---|
| 2826 | // --> shl (zext i8 %x to i32), (zext C1) | 
|---|
| 2827 | // zext (and (shl nuw i8 %x, C1), C2) to i32 | 
|---|
| 2828 | // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1) | 
|---|
| 2829 | MVT SrcVT = ShlSrc.getSimpleValueType(); | 
|---|
| 2830 | MVT VT = N.getSimpleValueType(); | 
|---|
| 2831 | SDLoc DL(N); | 
|---|
| 2832 |  | 
|---|
| 2833 | SDValue Res = ShlSrc; | 
|---|
| 2834 | if (!Mask.isAllOnes()) { | 
|---|
| 2835 | Res = CurDAG->getConstant(Val: Mask.lshr(shiftAmt: ShAmtV), DL, VT: SrcVT); | 
|---|
| 2836 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: Res); | 
|---|
| 2837 | Res = CurDAG->getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: ShlSrc, N2: Res); | 
|---|
| 2838 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: Res); | 
|---|
| 2839 | } | 
|---|
| 2840 | SDValue Zext = CurDAG->getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Res); | 
|---|
| 2841 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: Zext); | 
|---|
| 2842 | SDValue NewShl = CurDAG->getNode(Opcode: ISD::SHL, DL, VT, N1: Zext, N2: ShlAmt); | 
|---|
| 2843 | insertDAGNode(DAG&: *CurDAG, Pos: N, N: NewShl); | 
|---|
| 2844 | CurDAG->ReplaceAllUsesWith(From: N, To: NewShl); | 
|---|
| 2845 | CurDAG->RemoveDeadNode(N: N.getNode()); | 
|---|
| 2846 |  | 
|---|
| 2847 | // Convert the shift to scale factor. | 
|---|
| 2848 | AM.Scale = 1 << ShAmtV; | 
|---|
| 2849 | // If matchIndexRecursively is not called here, | 
|---|
| 2850 | // Zext may be replaced by other nodes but later used to call a builder | 
|---|
| 2851 | // method | 
|---|
| 2852 | AM.IndexReg = matchIndexRecursively(N: Zext, AM, Depth: Depth + 1); | 
|---|
| 2853 | return false; | 
|---|
| 2854 | } | 
|---|
| 2855 |  | 
|---|
| 2856 | if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) { | 
|---|
| 2857 | // Try to fold the mask and shift into an extract and scale. | 
|---|
| 2858 | if (!foldMaskAndShiftToExtract(DAG&: *CurDAG, N, Mask: Mask.getZExtValue(), Shift: Src, | 
|---|
| 2859 | X: Src.getOperand(i: 0), AM)) | 
|---|
| 2860 | return false; | 
|---|
| 2861 |  | 
|---|
| 2862 | // Try to fold the mask and shift directly into the scale. | 
|---|
| 2863 | if (!foldMaskAndShiftToScale(DAG&: *CurDAG, N, Mask: Mask.getZExtValue(), Shift: Src, | 
|---|
| 2864 | X: Src.getOperand(i: 0), AM)) | 
|---|
| 2865 | return false; | 
|---|
| 2866 |  | 
|---|
| 2867 | // Try to fold the mask and shift into BEXTR and scale. | 
|---|
| 2868 | if (!foldMaskedShiftToBEXTR(DAG&: *CurDAG, N, Mask: Mask.getZExtValue(), Shift: Src, | 
|---|
| 2869 | X: Src.getOperand(i: 0), AM, Subtarget: *Subtarget)) | 
|---|
| 2870 | return false; | 
|---|
| 2871 | } | 
|---|
| 2872 |  | 
|---|
| 2873 | break; | 
|---|
| 2874 | } | 
|---|
| 2875 | } | 
|---|
| 2876 |  | 
|---|
| 2877 | return matchAddressBase(N, AM); | 
|---|
| 2878 | } | 
|---|
| 2879 |  | 
|---|
| 2880 | /// Helper for MatchAddress. Add the specified node to the | 
|---|
| 2881 | /// specified addressing mode without any further recursion. | 
|---|
| 2882 | bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { | 
|---|
| 2883 | // Is the base register already occupied? | 
|---|
| 2884 | if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { | 
|---|
| 2885 | // If so, check to see if the scale index register is set. | 
|---|
| 2886 | if (!AM.IndexReg.getNode()) { | 
|---|
| 2887 | AM.IndexReg = N; | 
|---|
| 2888 | AM.Scale = 1; | 
|---|
| 2889 | return false; | 
|---|
| 2890 | } | 
|---|
| 2891 |  | 
|---|
| 2892 | // Otherwise, we cannot select it. | 
|---|
| 2893 | return true; | 
|---|
| 2894 | } | 
|---|
| 2895 |  | 
|---|
| 2896 | // Default, generate it as a register. | 
|---|
| 2897 | AM.BaseType = X86ISelAddressMode::RegBase; | 
|---|
| 2898 | AM.Base_Reg = N; | 
|---|
| 2899 | return false; | 
|---|
| 2900 | } | 
|---|
| 2901 |  | 
|---|
| 2902 | bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N, | 
|---|
| 2903 | X86ISelAddressMode &AM, | 
|---|
| 2904 | unsigned Depth) { | 
|---|
| 2905 | LLVM_DEBUG({ | 
|---|
| 2906 | dbgs() << "MatchVectorAddress: "; | 
|---|
| 2907 | AM.dump(CurDAG); | 
|---|
| 2908 | }); | 
|---|
| 2909 | // Limit recursion. | 
|---|
| 2910 | if (Depth >= SelectionDAG::MaxRecursionDepth) | 
|---|
| 2911 | return matchAddressBase(N, AM); | 
|---|
| 2912 |  | 
|---|
| 2913 | // TODO: Support other operations. | 
|---|
| 2914 | switch (N.getOpcode()) { | 
|---|
| 2915 | case ISD::Constant: { | 
|---|
| 2916 | uint64_t Val = cast<ConstantSDNode>(Val&: N)->getSExtValue(); | 
|---|
| 2917 | if (!foldOffsetIntoAddress(Offset: Val, AM)) | 
|---|
| 2918 | return false; | 
|---|
| 2919 | break; | 
|---|
| 2920 | } | 
|---|
| 2921 | case X86ISD::Wrapper: | 
|---|
| 2922 | if (!matchWrapper(N, AM)) | 
|---|
| 2923 | return false; | 
|---|
| 2924 | break; | 
|---|
| 2925 | case ISD::ADD: { | 
|---|
| 2926 | // Add an artificial use to this node so that we can keep track of | 
|---|
| 2927 | // it if it gets CSE'd with a different node. | 
|---|
| 2928 | HandleSDNode Handle(N); | 
|---|
| 2929 |  | 
|---|
| 2930 | X86ISelAddressMode Backup = AM; | 
|---|
| 2931 | if (!matchVectorAddressRecursively(N: N.getOperand(i: 0), AM, Depth: Depth + 1) && | 
|---|
| 2932 | !matchVectorAddressRecursively(N: Handle.getValue().getOperand(i: 1), AM, | 
|---|
| 2933 | Depth: Depth + 1)) | 
|---|
| 2934 | return false; | 
|---|
| 2935 | AM = Backup; | 
|---|
| 2936 |  | 
|---|
| 2937 | // Try again after commuting the operands. | 
|---|
| 2938 | if (!matchVectorAddressRecursively(N: Handle.getValue().getOperand(i: 1), AM, | 
|---|
| 2939 | Depth: Depth + 1) && | 
|---|
| 2940 | !matchVectorAddressRecursively(N: Handle.getValue().getOperand(i: 0), AM, | 
|---|
| 2941 | Depth: Depth + 1)) | 
|---|
| 2942 | return false; | 
|---|
| 2943 | AM = Backup; | 
|---|
| 2944 |  | 
|---|
| 2945 | N = Handle.getValue(); | 
|---|
| 2946 | break; | 
|---|
| 2947 | } | 
|---|
| 2948 | } | 
|---|
| 2949 |  | 
|---|
| 2950 | return matchAddressBase(N, AM); | 
|---|
| 2951 | } | 
|---|
| 2952 |  | 
|---|
| 2953 | /// Helper for selectVectorAddr. Handles things that can be folded into a | 
|---|
| 2954 | /// gather/scatter address. The index register and scale should have already | 
|---|
| 2955 | /// been handled. | 
|---|
| 2956 | bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { | 
|---|
| 2957 | return matchVectorAddressRecursively(N, AM, Depth: 0); | 
|---|
| 2958 | } | 
|---|
| 2959 |  | 
|---|
| 2960 | bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, | 
|---|
| 2961 | SDValue IndexOp, SDValue ScaleOp, | 
|---|
| 2962 | SDValue &Base, SDValue &Scale, | 
|---|
| 2963 | SDValue &Index, SDValue &Disp, | 
|---|
| 2964 | SDValue &Segment) { | 
|---|
| 2965 | X86ISelAddressMode AM; | 
|---|
| 2966 | AM.Scale = ScaleOp->getAsZExtVal(); | 
|---|
| 2967 |  | 
|---|
| 2968 | // Attempt to match index patterns, as long as we're not relying on implicit | 
|---|
| 2969 | // sign-extension, which is performed BEFORE scale. | 
|---|
| 2970 | if (IndexOp.getScalarValueSizeInBits() == BasePtr.getScalarValueSizeInBits()) | 
|---|
| 2971 | AM.IndexReg = matchIndexRecursively(N: IndexOp, AM, Depth: 0); | 
|---|
| 2972 | else | 
|---|
| 2973 | AM.IndexReg = IndexOp; | 
|---|
| 2974 |  | 
|---|
| 2975 | unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace(); | 
|---|
| 2976 | if (AddrSpace == X86AS::GS) | 
|---|
| 2977 | AM.Segment = CurDAG->getRegister(Reg: X86::GS, VT: MVT::i16); | 
|---|
| 2978 | if (AddrSpace == X86AS::FS) | 
|---|
| 2979 | AM.Segment = CurDAG->getRegister(Reg: X86::FS, VT: MVT::i16); | 
|---|
| 2980 | if (AddrSpace == X86AS::SS) | 
|---|
| 2981 | AM.Segment = CurDAG->getRegister(Reg: X86::SS, VT: MVT::i16); | 
|---|
| 2982 |  | 
|---|
| 2983 | SDLoc DL(BasePtr); | 
|---|
| 2984 | MVT VT = BasePtr.getSimpleValueType(); | 
|---|
| 2985 |  | 
|---|
| 2986 | // Try to match into the base and displacement fields. | 
|---|
| 2987 | if (matchVectorAddress(N: BasePtr, AM)) | 
|---|
| 2988 | return false; | 
|---|
| 2989 |  | 
|---|
| 2990 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | 
|---|
| 2991 | return true; | 
|---|
| 2992 | } | 
|---|
| 2993 |  | 
|---|
| 2994 | /// Returns true if it is able to pattern match an addressing mode. | 
|---|
| 2995 | /// It returns the operands which make up the maximal addressing mode it can | 
|---|
| 2996 | /// match by reference. | 
|---|
| 2997 | /// | 
|---|
| 2998 | /// Parent is the parent node of the addr operand that is being matched.  It | 
|---|
| 2999 | /// is always a load, store, atomic node, or null.  It is only null when | 
|---|
| 3000 | /// checking memory operands for inline asm nodes. | 
|---|
| 3001 | bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, | 
|---|
| 3002 | SDValue &Scale, SDValue &Index, | 
|---|
| 3003 | SDValue &Disp, SDValue &Segment) { | 
|---|
| 3004 | X86ISelAddressMode AM; | 
|---|
| 3005 |  | 
|---|
| 3006 | if (Parent && | 
|---|
| 3007 | // This list of opcodes are all the nodes that have an "addr:$ptr" operand | 
|---|
| 3008 | // that are not a MemSDNode, and thus don't have proper addrspace info. | 
|---|
| 3009 | Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme | 
|---|
| 3010 | Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores | 
|---|
| 3011 | Parent->getOpcode() != X86ISD::TLSCALL && // Fixme | 
|---|
| 3012 | Parent->getOpcode() != X86ISD::ENQCMD && // Fixme | 
|---|
| 3013 | Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme | 
|---|
| 3014 | Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp | 
|---|
| 3015 | Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp | 
|---|
| 3016 | unsigned AddrSpace = | 
|---|
| 3017 | cast<MemSDNode>(Val: Parent)->getPointerInfo().getAddrSpace(); | 
|---|
| 3018 | if (AddrSpace == X86AS::GS) | 
|---|
| 3019 | AM.Segment = CurDAG->getRegister(Reg: X86::GS, VT: MVT::i16); | 
|---|
| 3020 | if (AddrSpace == X86AS::FS) | 
|---|
| 3021 | AM.Segment = CurDAG->getRegister(Reg: X86::FS, VT: MVT::i16); | 
|---|
| 3022 | if (AddrSpace == X86AS::SS) | 
|---|
| 3023 | AM.Segment = CurDAG->getRegister(Reg: X86::SS, VT: MVT::i16); | 
|---|
| 3024 | } | 
|---|
| 3025 |  | 
|---|
| 3026 | // Save the DL and VT before calling matchAddress, it can invalidate N. | 
|---|
| 3027 | SDLoc DL(N); | 
|---|
| 3028 | MVT VT = N.getSimpleValueType(); | 
|---|
| 3029 |  | 
|---|
| 3030 | if (matchAddress(N, AM)) | 
|---|
| 3031 | return false; | 
|---|
| 3032 |  | 
|---|
| 3033 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | 
|---|
| 3034 | return true; | 
|---|
| 3035 | } | 
|---|
| 3036 |  | 
|---|
| 3037 | bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { | 
|---|
| 3038 | // Cannot use 32 bit constants to reference objects in kernel/large code | 
|---|
| 3039 | // model. | 
|---|
| 3040 | if (TM.getCodeModel() == CodeModel::Kernel || | 
|---|
| 3041 | TM.getCodeModel() == CodeModel::Large) | 
|---|
| 3042 | return false; | 
|---|
| 3043 |  | 
|---|
| 3044 | // In static codegen with small code model, we can get the address of a label | 
|---|
| 3045 | // into a register with 'movl' | 
|---|
| 3046 | if (N->getOpcode() != X86ISD::Wrapper) | 
|---|
| 3047 | return false; | 
|---|
| 3048 |  | 
|---|
| 3049 | N = N.getOperand(i: 0); | 
|---|
| 3050 |  | 
|---|
| 3051 | // At least GNU as does not accept 'movl' for TPOFF relocations. | 
|---|
| 3052 | // FIXME: We could use 'movl' when we know we are targeting MC. | 
|---|
| 3053 | if (N->getOpcode() == ISD::TargetGlobalTLSAddress) | 
|---|
| 3054 | return false; | 
|---|
| 3055 |  | 
|---|
| 3056 | Imm = N; | 
|---|
| 3057 | // Small/medium code model can reference non-TargetGlobalAddress objects with | 
|---|
| 3058 | // 32 bit constants. | 
|---|
| 3059 | if (N->getOpcode() != ISD::TargetGlobalAddress) { | 
|---|
| 3060 | return TM.getCodeModel() == CodeModel::Small || | 
|---|
| 3061 | TM.getCodeModel() == CodeModel::Medium; | 
|---|
| 3062 | } | 
|---|
| 3063 |  | 
|---|
| 3064 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Val&: N)->getGlobal(); | 
|---|
| 3065 | if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) | 
|---|
| 3066 | return CR->getUnsignedMax().ult(RHS: 1ull << 32); | 
|---|
| 3067 |  | 
|---|
| 3068 | return !TM.isLargeGlobalValue(GV); | 
|---|
| 3069 | } | 
|---|
| 3070 |  | 
|---|
| 3071 | bool X86DAGToDAGISel::selectLEA64_Addr(SDValue N, SDValue &Base, SDValue &Scale, | 
|---|
| 3072 | SDValue &Index, SDValue &Disp, | 
|---|
| 3073 | SDValue &Segment) { | 
|---|
| 3074 | // Save the debug loc before calling selectLEAAddr, in case it invalidates N. | 
|---|
| 3075 | SDLoc DL(N); | 
|---|
| 3076 |  | 
|---|
| 3077 | if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) | 
|---|
| 3078 | return false; | 
|---|
| 3079 |  | 
|---|
| 3080 | EVT BaseType = Base.getValueType(); | 
|---|
| 3081 | unsigned SubReg; | 
|---|
| 3082 | if (BaseType == MVT::i8) | 
|---|
| 3083 | SubReg = X86::sub_8bit; | 
|---|
| 3084 | else if (BaseType == MVT::i16) | 
|---|
| 3085 | SubReg = X86::sub_16bit; | 
|---|
| 3086 | else | 
|---|
| 3087 | SubReg = X86::sub_32bit; | 
|---|
| 3088 |  | 
|---|
| 3089 | auto *RN = dyn_cast<RegisterSDNode>(Val&: Base); | 
|---|
| 3090 | if (RN && RN->getReg() == 0) | 
|---|
| 3091 | Base = CurDAG->getRegister(Reg: 0, VT: MVT::i64); | 
|---|
| 3092 | else if ((BaseType == MVT::i8 || BaseType == MVT::i16 || | 
|---|
| 3093 | BaseType == MVT::i32) && | 
|---|
| 3094 | !isa<FrameIndexSDNode>(Val: Base)) { | 
|---|
| 3095 | // Base could already be %rip, particularly in the x32 ABI. | 
|---|
| 3096 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(Opcode: X86::IMPLICIT_DEF, dl: DL, | 
|---|
| 3097 | VT: MVT::i64), 0); | 
|---|
| 3098 | Base = CurDAG->getTargetInsertSubreg(SRIdx: SubReg, DL, VT: MVT::i64, Operand: ImplDef, Subreg: Base); | 
|---|
| 3099 | } | 
|---|
| 3100 |  | 
|---|
| 3101 | [[maybe_unused]] EVT IndexType = Index.getValueType(); | 
|---|
| 3102 | RN = dyn_cast<RegisterSDNode>(Val&: Index); | 
|---|
| 3103 | if (RN && RN->getReg() == 0) | 
|---|
| 3104 | Index = CurDAG->getRegister(Reg: 0, VT: MVT::i64); | 
|---|
| 3105 | else { | 
|---|
| 3106 | assert((IndexType == BaseType) && | 
|---|
| 3107 | "Expect to be extending 8/16/32-bit registers for use in LEA"); | 
|---|
| 3108 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(Opcode: X86::IMPLICIT_DEF, dl: DL, | 
|---|
| 3109 | VT: MVT::i64), 0); | 
|---|
| 3110 | Index = CurDAG->getTargetInsertSubreg(SRIdx: SubReg, DL, VT: MVT::i64, Operand: ImplDef, Subreg: Index); | 
|---|
| 3111 | } | 
|---|
| 3112 |  | 
|---|
| 3113 | return true; | 
|---|
| 3114 | } | 
|---|
| 3115 |  | 
|---|
| 3116 | /// Calls SelectAddr and determines if the maximal addressing | 
|---|
| 3117 | /// mode it matches can be cost effectively emitted as an LEA instruction. | 
|---|
| 3118 | bool X86DAGToDAGISel::selectLEAAddr(SDValue N, | 
|---|
| 3119 | SDValue &Base, SDValue &Scale, | 
|---|
| 3120 | SDValue &Index, SDValue &Disp, | 
|---|
| 3121 | SDValue &Segment) { | 
|---|
| 3122 | X86ISelAddressMode AM; | 
|---|
| 3123 |  | 
|---|
| 3124 | // Save the DL and VT before calling matchAddress, it can invalidate N. | 
|---|
| 3125 | SDLoc DL(N); | 
|---|
| 3126 | MVT VT = N.getSimpleValueType(); | 
|---|
| 3127 |  | 
|---|
| 3128 | // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support | 
|---|
| 3129 | // segments. | 
|---|
| 3130 | SDValue Copy = AM.Segment; | 
|---|
| 3131 | SDValue T = CurDAG->getRegister(Reg: 0, VT: MVT::i32); | 
|---|
| 3132 | AM.Segment = T; | 
|---|
| 3133 | if (matchAddress(N, AM)) | 
|---|
| 3134 | return false; | 
|---|
| 3135 | assert (T == AM.Segment); | 
|---|
| 3136 | AM.Segment = Copy; | 
|---|
| 3137 |  | 
|---|
| 3138 | unsigned Complexity = 0; | 
|---|
| 3139 | if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode()) | 
|---|
| 3140 | Complexity = 1; | 
|---|
| 3141 | else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) | 
|---|
| 3142 | Complexity = 4; | 
|---|
| 3143 |  | 
|---|
| 3144 | if (AM.IndexReg.getNode()) | 
|---|
| 3145 | Complexity++; | 
|---|
| 3146 |  | 
|---|
| 3147 | // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with | 
|---|
| 3148 | // a simple shift. | 
|---|
| 3149 | if (AM.Scale > 1) | 
|---|
| 3150 | Complexity++; | 
|---|
| 3151 |  | 
|---|
| 3152 | // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA | 
|---|
| 3153 | // to a LEA. This is determined with some experimentation but is by no means | 
|---|
| 3154 | // optimal (especially for code size consideration). LEA is nice because of | 
|---|
| 3155 | // its three-address nature. Tweak the cost function again when we can run | 
|---|
| 3156 | // convertToThreeAddress() at register allocation time. | 
|---|
| 3157 | if (AM.hasSymbolicDisplacement()) { | 
|---|
| 3158 | // For X86-64, always use LEA to materialize RIP-relative addresses. | 
|---|
| 3159 | if (Subtarget->is64Bit()) | 
|---|
| 3160 | Complexity = 4; | 
|---|
| 3161 | else | 
|---|
| 3162 | Complexity += 2; | 
|---|
| 3163 | } | 
|---|
| 3164 |  | 
|---|
| 3165 | // Heuristic: try harder to form an LEA from ADD if the operands set flags. | 
|---|
| 3166 | // Unlike ADD, LEA does not affect flags, so we will be less likely to require | 
|---|
| 3167 | // duplicating flag-producing instructions later in the pipeline. | 
|---|
| 3168 | if (N.getOpcode() == ISD::ADD) { | 
|---|
| 3169 | auto isMathWithFlags = [](SDValue V) { | 
|---|
| 3170 | switch (V.getOpcode()) { | 
|---|
| 3171 | case X86ISD::ADD: | 
|---|
| 3172 | case X86ISD::SUB: | 
|---|
| 3173 | case X86ISD::ADC: | 
|---|
| 3174 | case X86ISD::SBB: | 
|---|
| 3175 | case X86ISD::SMUL: | 
|---|
| 3176 | case X86ISD::UMUL: | 
|---|
| 3177 | /* TODO: These opcodes can be added safely, but we may want to justify | 
|---|
| 3178 | their inclusion for different reasons (better for reg-alloc). | 
|---|
| 3179 | case X86ISD::OR: | 
|---|
| 3180 | case X86ISD::XOR: | 
|---|
| 3181 | case X86ISD::AND: | 
|---|
| 3182 | */ | 
|---|
| 3183 | // Value 1 is the flag output of the node - verify it's not dead. | 
|---|
| 3184 | return !SDValue(V.getNode(), 1).use_empty(); | 
|---|
| 3185 | default: | 
|---|
| 3186 | return false; | 
|---|
| 3187 | } | 
|---|
| 3188 | }; | 
|---|
| 3189 | // TODO: We might want to factor in whether there's a load folding | 
|---|
| 3190 | // opportunity for the math op that disappears with LEA. | 
|---|
| 3191 | if (isMathWithFlags(N.getOperand(i: 0)) || isMathWithFlags(N.getOperand(i: 1))) | 
|---|
| 3192 | Complexity++; | 
|---|
| 3193 | } | 
|---|
| 3194 |  | 
|---|
| 3195 | if (AM.Disp) | 
|---|
| 3196 | Complexity++; | 
|---|
| 3197 |  | 
|---|
| 3198 | // If it isn't worth using an LEA, reject it. | 
|---|
| 3199 | if (Complexity <= 2) | 
|---|
| 3200 | return false; | 
|---|
| 3201 |  | 
|---|
| 3202 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | 
|---|
| 3203 | return true; | 
|---|
| 3204 | } | 
|---|
| 3205 |  | 
|---|
| 3206 | /// This is only run on TargetGlobalTLSAddress nodes. | 
|---|
| 3207 | bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, | 
|---|
| 3208 | SDValue &Scale, SDValue &Index, | 
|---|
| 3209 | SDValue &Disp, SDValue &Segment) { | 
|---|
| 3210 | assert(N.getOpcode() == ISD::TargetGlobalTLSAddress || | 
|---|
| 3211 | N.getOpcode() == ISD::TargetExternalSymbol); | 
|---|
| 3212 |  | 
|---|
| 3213 | X86ISelAddressMode AM; | 
|---|
| 3214 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: N)) { | 
|---|
| 3215 | AM.GV = GA->getGlobal(); | 
|---|
| 3216 | AM.Disp += GA->getOffset(); | 
|---|
| 3217 | AM.SymbolFlags = GA->getTargetFlags(); | 
|---|
| 3218 | } else { | 
|---|
| 3219 | auto *SA = cast<ExternalSymbolSDNode>(Val&: N); | 
|---|
| 3220 | AM.ES = SA->getSymbol(); | 
|---|
| 3221 | AM.SymbolFlags = SA->getTargetFlags(); | 
|---|
| 3222 | } | 
|---|
| 3223 |  | 
|---|
| 3224 | if (Subtarget->is32Bit()) { | 
|---|
| 3225 | AM.Scale = 1; | 
|---|
| 3226 | AM.IndexReg = CurDAG->getRegister(Reg: X86::EBX, VT: MVT::i32); | 
|---|
| 3227 | } | 
|---|
| 3228 |  | 
|---|
| 3229 | MVT VT = N.getSimpleValueType(); | 
|---|
| 3230 | getAddressOperands(AM, DL: SDLoc(N), VT, Base, Scale, Index, Disp, Segment); | 
|---|
| 3231 | return true; | 
|---|
| 3232 | } | 
|---|
| 3233 |  | 
|---|
| 3234 | bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { | 
|---|
| 3235 | // Keep track of the original value type and whether this value was | 
|---|
| 3236 | // truncated. If we see a truncation from pointer type to VT that truncates | 
|---|
| 3237 | // bits that are known to be zero, we can use a narrow reference. | 
|---|
| 3238 | EVT VT = N.getValueType(); | 
|---|
| 3239 | bool WasTruncated = false; | 
|---|
| 3240 | if (N.getOpcode() == ISD::TRUNCATE) { | 
|---|
| 3241 | WasTruncated = true; | 
|---|
| 3242 | N = N.getOperand(i: 0); | 
|---|
| 3243 | } | 
|---|
| 3244 |  | 
|---|
| 3245 | if (N.getOpcode() != X86ISD::Wrapper) | 
|---|
| 3246 | return false; | 
|---|
| 3247 |  | 
|---|
| 3248 | // We can only use non-GlobalValues as immediates if they were not truncated, | 
|---|
| 3249 | // as we do not have any range information. If we have a GlobalValue and the | 
|---|
| 3250 | // address was not truncated, we can select it as an operand directly. | 
|---|
| 3251 | unsigned Opc = N.getOperand(i: 0)->getOpcode(); | 
|---|
| 3252 | if (Opc != ISD::TargetGlobalAddress || !WasTruncated) { | 
|---|
| 3253 | Op = N.getOperand(i: 0); | 
|---|
| 3254 | // We can only select the operand directly if we didn't have to look past a | 
|---|
| 3255 | // truncate. | 
|---|
| 3256 | return !WasTruncated; | 
|---|
| 3257 | } | 
|---|
| 3258 |  | 
|---|
| 3259 | // Check that the global's range fits into VT. | 
|---|
| 3260 | auto *GA = cast<GlobalAddressSDNode>(Val: N.getOperand(i: 0)); | 
|---|
| 3261 | std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); | 
|---|
| 3262 | if (!CR || CR->getUnsignedMax().uge(RHS: 1ull << VT.getSizeInBits())) | 
|---|
| 3263 | return false; | 
|---|
| 3264 |  | 
|---|
| 3265 | // Okay, we can use a narrow reference. | 
|---|
| 3266 | Op = CurDAG->getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(N), VT, | 
|---|
| 3267 | offset: GA->getOffset(), TargetFlags: GA->getTargetFlags()); | 
|---|
| 3268 | return true; | 
|---|
| 3269 | } | 
|---|
| 3270 |  | 
|---|
| 3271 | bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, | 
|---|
| 3272 | SDValue &Base, SDValue &Scale, | 
|---|
| 3273 | SDValue &Index, SDValue &Disp, | 
|---|
| 3274 | SDValue &Segment) { | 
|---|
| 3275 | assert(Root && P && "Unknown root/parent nodes"); | 
|---|
| 3276 | if (!ISD::isNON_EXTLoad(N: N.getNode()) || | 
|---|
| 3277 | !IsProfitableToFold(N, U: P, Root) || | 
|---|
| 3278 | !IsLegalToFold(N, U: P, Root, OptLevel)) | 
|---|
| 3279 | return false; | 
|---|
| 3280 |  | 
|---|
| 3281 | return selectAddr(Parent: N.getNode(), | 
|---|
| 3282 | N: N.getOperand(i: 1), Base, Scale, Index, Disp, Segment); | 
|---|
| 3283 | } | 
|---|
| 3284 |  | 
|---|
| 3285 | bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, | 
|---|
| 3286 | SDValue &Base, SDValue &Scale, | 
|---|
| 3287 | SDValue &Index, SDValue &Disp, | 
|---|
| 3288 | SDValue &Segment) { | 
|---|
| 3289 | assert(Root && P && "Unknown root/parent nodes"); | 
|---|
| 3290 | if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || | 
|---|
| 3291 | !IsProfitableToFold(N, U: P, Root) || | 
|---|
| 3292 | !IsLegalToFold(N, U: P, Root, OptLevel)) | 
|---|
| 3293 | return false; | 
|---|
| 3294 |  | 
|---|
| 3295 | return selectAddr(Parent: N.getNode(), | 
|---|
| 3296 | N: N.getOperand(i: 1), Base, Scale, Index, Disp, Segment); | 
|---|
| 3297 | } | 
|---|
| 3298 |  | 
|---|
| 3299 | /// Return an SDNode that returns the value of the global base register. | 
|---|
| 3300 | /// Output instructions required to initialize the global base register, | 
|---|
| 3301 | /// if necessary. | 
|---|
| 3302 | SDNode *X86DAGToDAGISel::getGlobalBaseReg() { | 
|---|
| 3303 | Register GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); | 
|---|
| 3304 | auto &DL = MF->getDataLayout(); | 
|---|
| 3305 | return CurDAG->getRegister(Reg: GlobalBaseReg, VT: TLI->getPointerTy(DL)).getNode(); | 
|---|
| 3306 | } | 
|---|
| 3307 |  | 
|---|
| 3308 | bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { | 
|---|
| 3309 | if (N->getOpcode() == ISD::TRUNCATE) | 
|---|
| 3310 | N = N->getOperand(Num: 0).getNode(); | 
|---|
| 3311 | if (N->getOpcode() != X86ISD::Wrapper) | 
|---|
| 3312 | return false; | 
|---|
| 3313 |  | 
|---|
| 3314 | auto *GA = dyn_cast<GlobalAddressSDNode>(Val: N->getOperand(Num: 0)); | 
|---|
| 3315 | if (!GA) | 
|---|
| 3316 | return false; | 
|---|
| 3317 |  | 
|---|
| 3318 | auto *GV = GA->getGlobal(); | 
|---|
| 3319 | std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange(); | 
|---|
| 3320 | if (CR) | 
|---|
| 3321 | return CR->getSignedMin().sge(RHS: -1ull << Width) && | 
|---|
| 3322 | CR->getSignedMax().slt(RHS: 1ull << Width); | 
|---|
| 3323 | // In the kernel code model, globals are in the negative 2GB of the address | 
|---|
| 3324 | // space, so globals can be a sign extended 32-bit immediate. | 
|---|
| 3325 | // In other code models, small globals are in the low 2GB of the address | 
|---|
| 3326 | // space, so sign extending them is equivalent to zero extending them. | 
|---|
| 3327 | return Width == 32 && !TM.isLargeGlobalValue(GV); | 
|---|
| 3328 | } | 
|---|
| 3329 |  | 
|---|
| 3330 | X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const { | 
|---|
| 3331 | assert(N->isMachineOpcode() && "Unexpected node"); | 
|---|
| 3332 | unsigned Opc = N->getMachineOpcode(); | 
|---|
| 3333 | const MCInstrDesc &MCID = getInstrInfo()->get(Opcode: Opc); | 
|---|
| 3334 | int CondNo = X86::getCondSrcNoFromDesc(MCID); | 
|---|
| 3335 | if (CondNo < 0) | 
|---|
| 3336 | return X86::COND_INVALID; | 
|---|
| 3337 |  | 
|---|
| 3338 | return static_cast<X86::CondCode>(N->getConstantOperandVal(Num: CondNo)); | 
|---|
| 3339 | } | 
|---|
| 3340 |  | 
|---|
| 3341 | /// Test whether the given X86ISD::CMP node has any users that use a flag | 
|---|
| 3342 | /// other than ZF. | 
|---|
| 3343 | bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { | 
|---|
| 3344 | // Examine each user of the node. | 
|---|
| 3345 | for (SDUse &Use : Flags->uses()) { | 
|---|
| 3346 | // Only check things that use the flags. | 
|---|
| 3347 | if (Use.getResNo() != Flags.getResNo()) | 
|---|
| 3348 | continue; | 
|---|
| 3349 | SDNode *User = Use.getUser(); | 
|---|
| 3350 | // Only examine CopyToReg uses that copy to EFLAGS. | 
|---|
| 3351 | if (User->getOpcode() != ISD::CopyToReg || | 
|---|
| 3352 | cast<RegisterSDNode>(Val: User->getOperand(Num: 1))->getReg() != X86::EFLAGS) | 
|---|
| 3353 | return false; | 
|---|
| 3354 | // Examine each user of the CopyToReg use. | 
|---|
| 3355 | for (SDUse &FlagUse : User->uses()) { | 
|---|
| 3356 | // Only examine the Flag result. | 
|---|
| 3357 | if (FlagUse.getResNo() != 1) | 
|---|
| 3358 | continue; | 
|---|
| 3359 | // Anything unusual: assume conservatively. | 
|---|
| 3360 | if (!FlagUse.getUser()->isMachineOpcode()) | 
|---|
| 3361 | return false; | 
|---|
| 3362 | // Examine the condition code of the user. | 
|---|
| 3363 | X86::CondCode CC = getCondFromNode(N: FlagUse.getUser()); | 
|---|
| 3364 |  | 
|---|
| 3365 | switch (CC) { | 
|---|
| 3366 | // Comparisons which only use the zero flag. | 
|---|
| 3367 | case X86::COND_E: case X86::COND_NE: | 
|---|
| 3368 | continue; | 
|---|
| 3369 | // Anything else: assume conservatively. | 
|---|
| 3370 | default: | 
|---|
| 3371 | return false; | 
|---|
| 3372 | } | 
|---|
| 3373 | } | 
|---|
| 3374 | } | 
|---|
| 3375 | return true; | 
|---|
| 3376 | } | 
|---|
| 3377 |  | 
|---|
| 3378 | /// Test whether the given X86ISD::CMP node has any uses which require the SF | 
|---|
| 3379 | /// flag to be accurate. | 
|---|
| 3380 | bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { | 
|---|
| 3381 | // Examine each user of the node. | 
|---|
| 3382 | for (SDUse &Use : Flags->uses()) { | 
|---|
| 3383 | // Only check things that use the flags. | 
|---|
| 3384 | if (Use.getResNo() != Flags.getResNo()) | 
|---|
| 3385 | continue; | 
|---|
| 3386 | SDNode *User = Use.getUser(); | 
|---|
| 3387 | // Only examine CopyToReg uses that copy to EFLAGS. | 
|---|
| 3388 | if (User->getOpcode() != ISD::CopyToReg || | 
|---|
| 3389 | cast<RegisterSDNode>(Val: User->getOperand(Num: 1))->getReg() != X86::EFLAGS) | 
|---|
| 3390 | return false; | 
|---|
| 3391 | // Examine each user of the CopyToReg use. | 
|---|
| 3392 | for (SDUse &FlagUse : User->uses()) { | 
|---|
| 3393 | // Only examine the Flag result. | 
|---|
| 3394 | if (FlagUse.getResNo() != 1) | 
|---|
| 3395 | continue; | 
|---|
| 3396 | // Anything unusual: assume conservatively. | 
|---|
| 3397 | if (!FlagUse.getUser()->isMachineOpcode()) | 
|---|
| 3398 | return false; | 
|---|
| 3399 | // Examine the condition code of the user. | 
|---|
| 3400 | X86::CondCode CC = getCondFromNode(N: FlagUse.getUser()); | 
|---|
| 3401 |  | 
|---|
| 3402 | switch (CC) { | 
|---|
| 3403 | // Comparisons which don't examine the SF flag. | 
|---|
| 3404 | case X86::COND_A: case X86::COND_AE: | 
|---|
| 3405 | case X86::COND_B: case X86::COND_BE: | 
|---|
| 3406 | case X86::COND_E: case X86::COND_NE: | 
|---|
| 3407 | case X86::COND_O: case X86::COND_NO: | 
|---|
| 3408 | case X86::COND_P: case X86::COND_NP: | 
|---|
| 3409 | continue; | 
|---|
| 3410 | // Anything else: assume conservatively. | 
|---|
| 3411 | default: | 
|---|
| 3412 | return false; | 
|---|
| 3413 | } | 
|---|
| 3414 | } | 
|---|
| 3415 | } | 
|---|
| 3416 | return true; | 
|---|
| 3417 | } | 
|---|
| 3418 |  | 
|---|
| 3419 | static bool mayUseCarryFlag(X86::CondCode CC) { | 
|---|
| 3420 | switch (CC) { | 
|---|
| 3421 | // Comparisons which don't examine the CF flag. | 
|---|
| 3422 | case X86::COND_O: case X86::COND_NO: | 
|---|
| 3423 | case X86::COND_E: case X86::COND_NE: | 
|---|
| 3424 | case X86::COND_S: case X86::COND_NS: | 
|---|
| 3425 | case X86::COND_P: case X86::COND_NP: | 
|---|
| 3426 | case X86::COND_L: case X86::COND_GE: | 
|---|
| 3427 | case X86::COND_G: case X86::COND_LE: | 
|---|
| 3428 | return false; | 
|---|
| 3429 | // Anything else: assume conservatively. | 
|---|
| 3430 | default: | 
|---|
| 3431 | return true; | 
|---|
| 3432 | } | 
|---|
| 3433 | } | 
|---|
| 3434 |  | 
|---|
| 3435 | /// Test whether the given node which sets flags has any uses which require the | 
|---|
| 3436 | /// CF flag to be accurate. | 
|---|
| 3437 | bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { | 
|---|
| 3438 | // Examine each user of the node. | 
|---|
| 3439 | for (SDUse &Use : Flags->uses()) { | 
|---|
| 3440 | // Only check things that use the flags. | 
|---|
| 3441 | if (Use.getResNo() != Flags.getResNo()) | 
|---|
| 3442 | continue; | 
|---|
| 3443 |  | 
|---|
| 3444 | SDNode *User = Use.getUser(); | 
|---|
| 3445 | unsigned UserOpc = User->getOpcode(); | 
|---|
| 3446 |  | 
|---|
| 3447 | if (UserOpc == ISD::CopyToReg) { | 
|---|
| 3448 | // Only examine CopyToReg uses that copy to EFLAGS. | 
|---|
| 3449 | if (cast<RegisterSDNode>(Val: User->getOperand(Num: 1))->getReg() != X86::EFLAGS) | 
|---|
| 3450 | return false; | 
|---|
| 3451 | // Examine each user of the CopyToReg use. | 
|---|
| 3452 | for (SDUse &FlagUse : User->uses()) { | 
|---|
| 3453 | // Only examine the Flag result. | 
|---|
| 3454 | if (FlagUse.getResNo() != 1) | 
|---|
| 3455 | continue; | 
|---|
| 3456 | // Anything unusual: assume conservatively. | 
|---|
| 3457 | if (!FlagUse.getUser()->isMachineOpcode()) | 
|---|
| 3458 | return false; | 
|---|
| 3459 | // Examine the condition code of the user. | 
|---|
| 3460 | X86::CondCode CC = getCondFromNode(N: FlagUse.getUser()); | 
|---|
| 3461 |  | 
|---|
| 3462 | if (mayUseCarryFlag(CC)) | 
|---|
| 3463 | return false; | 
|---|
| 3464 | } | 
|---|
| 3465 |  | 
|---|
| 3466 | // This CopyToReg is ok. Move on to the next user. | 
|---|
| 3467 | continue; | 
|---|
| 3468 | } | 
|---|
| 3469 |  | 
|---|
| 3470 | // This might be an unselected node. So look for the pre-isel opcodes that | 
|---|
| 3471 | // use flags. | 
|---|
| 3472 | unsigned CCOpNo; | 
|---|
| 3473 | switch (UserOpc) { | 
|---|
| 3474 | default: | 
|---|
| 3475 | // Something unusual. Be conservative. | 
|---|
| 3476 | return false; | 
|---|
| 3477 | case X86ISD::SETCC:       CCOpNo = 0; break; | 
|---|
| 3478 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; | 
|---|
| 3479 | case X86ISD::CMOV:        CCOpNo = 2; break; | 
|---|
| 3480 | case X86ISD::BRCOND:      CCOpNo = 2; break; | 
|---|
| 3481 | } | 
|---|
| 3482 |  | 
|---|
| 3483 | X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(Num: CCOpNo); | 
|---|
| 3484 | if (mayUseCarryFlag(CC)) | 
|---|
| 3485 | return false; | 
|---|
| 3486 | } | 
|---|
| 3487 | return true; | 
|---|
| 3488 | } | 
|---|
| 3489 |  | 
|---|
| 3490 | /// Check whether or not the chain ending in StoreNode is suitable for doing | 
|---|
| 3491 | /// the {load; op; store} to modify transformation. | 
|---|
| 3492 | static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, | 
|---|
| 3493 | SDValue StoredVal, SelectionDAG *CurDAG, | 
|---|
| 3494 | unsigned LoadOpNo, | 
|---|
| 3495 | LoadSDNode *&LoadNode, | 
|---|
| 3496 | SDValue &InputChain) { | 
|---|
| 3497 | // Is the stored value result 0 of the operation? | 
|---|
| 3498 | if (StoredVal.getResNo() != 0) return false; | 
|---|
| 3499 |  | 
|---|
| 3500 | // Are there other uses of the operation other than the store? | 
|---|
| 3501 | if (!StoredVal.getNode()->hasNUsesOfValue(NUses: 1, Value: 0)) return false; | 
|---|
| 3502 |  | 
|---|
| 3503 | // Is the store non-extending and non-indexed? | 
|---|
| 3504 | if (!ISD::isNormalStore(N: StoreNode) || StoreNode->isNonTemporal()) | 
|---|
| 3505 | return false; | 
|---|
| 3506 |  | 
|---|
| 3507 | SDValue Load = StoredVal->getOperand(Num: LoadOpNo); | 
|---|
| 3508 | // Is the stored value a non-extending and non-indexed load? | 
|---|
| 3509 | if (!ISD::isNormalLoad(N: Load.getNode())) return false; | 
|---|
| 3510 |  | 
|---|
| 3511 | // Return LoadNode by reference. | 
|---|
| 3512 | LoadNode = cast<LoadSDNode>(Val&: Load); | 
|---|
| 3513 |  | 
|---|
| 3514 | // Is store the only read of the loaded value? | 
|---|
| 3515 | if (!Load.hasOneUse()) | 
|---|
| 3516 | return false; | 
|---|
| 3517 |  | 
|---|
| 3518 | // Is the address of the store the same as the load? | 
|---|
| 3519 | if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || | 
|---|
| 3520 | LoadNode->getOffset() != StoreNode->getOffset()) | 
|---|
| 3521 | return false; | 
|---|
| 3522 |  | 
|---|
| 3523 | bool FoundLoad = false; | 
|---|
| 3524 | SmallVector<SDValue, 4> ChainOps; | 
|---|
| 3525 | SmallVector<const SDNode *, 4> LoopWorklist; | 
|---|
| 3526 | SmallPtrSet<const SDNode *, 16> Visited; | 
|---|
| 3527 | const unsigned int Max = 1024; | 
|---|
| 3528 |  | 
|---|
| 3529 | //  Visualization of Load-Op-Store fusion: | 
|---|
| 3530 | // ------------------------- | 
|---|
| 3531 | // Legend: | 
|---|
| 3532 | //    *-lines = Chain operand dependencies. | 
|---|
| 3533 | //    |-lines = Normal operand dependencies. | 
|---|
| 3534 | //    Dependencies flow down and right. n-suffix references multiple nodes. | 
|---|
| 3535 | // | 
|---|
| 3536 | //        C                        Xn  C | 
|---|
| 3537 | //        *                         *  * | 
|---|
| 3538 | //        *                          * * | 
|---|
| 3539 | //  Xn  A-LD    Yn                    TF         Yn | 
|---|
| 3540 | //   *    * \   |                       *        | | 
|---|
| 3541 | //    *   *  \  |                        *       | | 
|---|
| 3542 | //     *  *   \ |             =>       A--LD_OP_ST | 
|---|
| 3543 | //      * *    \|                                 \ | 
|---|
| 3544 | //       TF    OP                                  \ | 
|---|
| 3545 | //         *   | \                                  Zn | 
|---|
| 3546 | //          *  |  \ | 
|---|
| 3547 | //         A-ST    Zn | 
|---|
| 3548 | // | 
|---|
| 3549 |  | 
|---|
| 3550 | // This merge induced dependences from: #1: Xn -> LD, OP, Zn | 
|---|
| 3551 | //                                      #2: Yn -> LD | 
|---|
| 3552 | //                                      #3: ST -> Zn | 
|---|
| 3553 |  | 
|---|
| 3554 | // Ensure the transform is safe by checking for the dual | 
|---|
| 3555 | // dependencies to make sure we do not induce a loop. | 
|---|
| 3556 |  | 
|---|
| 3557 | // As LD is a predecessor to both OP and ST we can do this by checking: | 
|---|
| 3558 | //  a). if LD is a predecessor to a member of Xn or Yn. | 
|---|
| 3559 | //  b). if a Zn is a predecessor to ST. | 
|---|
| 3560 |  | 
|---|
| 3561 | // However, (b) can only occur through being a chain predecessor to | 
|---|
| 3562 | // ST, which is the same as Zn being a member or predecessor of Xn, | 
|---|
| 3563 | // which is a subset of LD being a predecessor of Xn. So it's | 
|---|
| 3564 | // subsumed by check (a). | 
|---|
| 3565 |  | 
|---|
| 3566 | SDValue Chain = StoreNode->getChain(); | 
|---|
| 3567 |  | 
|---|
| 3568 | // Gather X elements in ChainOps. | 
|---|
| 3569 | if (Chain == Load.getValue(R: 1)) { | 
|---|
| 3570 | FoundLoad = true; | 
|---|
| 3571 | ChainOps.push_back(Elt: Load.getOperand(i: 0)); | 
|---|
| 3572 | } else if (Chain.getOpcode() == ISD::TokenFactor) { | 
|---|
| 3573 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { | 
|---|
| 3574 | SDValue Op = Chain.getOperand(i); | 
|---|
| 3575 | if (Op == Load.getValue(R: 1)) { | 
|---|
| 3576 | FoundLoad = true; | 
|---|
| 3577 | // Drop Load, but keep its chain. No cycle check necessary. | 
|---|
| 3578 | ChainOps.push_back(Elt: Load.getOperand(i: 0)); | 
|---|
| 3579 | continue; | 
|---|
| 3580 | } | 
|---|
| 3581 | LoopWorklist.push_back(Elt: Op.getNode()); | 
|---|
| 3582 | ChainOps.push_back(Elt: Op); | 
|---|
| 3583 | } | 
|---|
| 3584 | } | 
|---|
| 3585 |  | 
|---|
| 3586 | if (!FoundLoad) | 
|---|
| 3587 | return false; | 
|---|
| 3588 |  | 
|---|
| 3589 | // Worklist is currently Xn. Add Yn to worklist. | 
|---|
| 3590 | for (SDValue Op : StoredVal->ops()) | 
|---|
| 3591 | if (Op.getNode() != LoadNode) | 
|---|
| 3592 | LoopWorklist.push_back(Elt: Op.getNode()); | 
|---|
| 3593 |  | 
|---|
| 3594 | // Check (a) if Load is a predecessor to Xn + Yn | 
|---|
| 3595 | if (SDNode::hasPredecessorHelper(N: Load.getNode(), Visited, Worklist&: LoopWorklist, MaxSteps: Max, | 
|---|
| 3596 | TopologicalPrune: true)) | 
|---|
| 3597 | return false; | 
|---|
| 3598 |  | 
|---|
| 3599 | InputChain = | 
|---|
| 3600 | CurDAG->getNode(Opcode: ISD::TokenFactor, DL: SDLoc(Chain), VT: MVT::Other, Ops: ChainOps); | 
|---|
| 3601 | return true; | 
|---|
| 3602 | } | 
|---|
| 3603 |  | 
|---|
| 3604 | // Change a chain of {load; op; store} of the same value into a simple op | 
|---|
| 3605 | // through memory of that value, if the uses of the modified value and its | 
|---|
| 3606 | // address are suitable. | 
|---|
| 3607 | // | 
|---|
| 3608 | // The tablegen pattern memory operand pattern is currently not able to match | 
|---|
| 3609 | // the case where the EFLAGS on the original operation are used. | 
|---|
| 3610 | // | 
|---|
| 3611 | // To move this to tablegen, we'll need to improve tablegen to allow flags to | 
|---|
| 3612 | // be transferred from a node in the pattern to the result node, probably with | 
|---|
| 3613 | // a new keyword. For example, we have this | 
|---|
| 3614 | // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", | 
|---|
| 3615 | //  [(store (add (loadi64 addr:$dst), -1), addr:$dst)]>; | 
|---|
| 3616 | // but maybe need something like this | 
|---|
| 3617 | // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", | 
|---|
| 3618 | //  [(store (X86add_flag (loadi64 addr:$dst), -1), addr:$dst), | 
|---|
| 3619 | //   (transferrable EFLAGS)]>; | 
|---|
| 3620 | // | 
|---|
| 3621 | // Until then, we manually fold these and instruction select the operation | 
|---|
| 3622 | // here. | 
|---|
| 3623 | bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { | 
|---|
| 3624 | auto *StoreNode = cast<StoreSDNode>(Val: Node); | 
|---|
| 3625 | SDValue StoredVal = StoreNode->getOperand(Num: 1); | 
|---|
| 3626 | unsigned Opc = StoredVal->getOpcode(); | 
|---|
| 3627 |  | 
|---|
| 3628 | // Before we try to select anything, make sure this is memory operand size | 
|---|
| 3629 | // and opcode we can handle. Note that this must match the code below that | 
|---|
| 3630 | // actually lowers the opcodes. | 
|---|
| 3631 | EVT MemVT = StoreNode->getMemoryVT(); | 
|---|
| 3632 | if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && | 
|---|
| 3633 | MemVT != MVT::i8) | 
|---|
| 3634 | return false; | 
|---|
| 3635 |  | 
|---|
| 3636 | bool IsCommutable = false; | 
|---|
| 3637 | bool IsNegate = false; | 
|---|
| 3638 | switch (Opc) { | 
|---|
| 3639 | default: | 
|---|
| 3640 | return false; | 
|---|
| 3641 | case X86ISD::SUB: | 
|---|
| 3642 | IsNegate = isNullConstant(V: StoredVal.getOperand(i: 0)); | 
|---|
| 3643 | break; | 
|---|
| 3644 | case X86ISD::SBB: | 
|---|
| 3645 | break; | 
|---|
| 3646 | case X86ISD::ADD: | 
|---|
| 3647 | case X86ISD::ADC: | 
|---|
| 3648 | case X86ISD::AND: | 
|---|
| 3649 | case X86ISD::OR: | 
|---|
| 3650 | case X86ISD::XOR: | 
|---|
| 3651 | IsCommutable = true; | 
|---|
| 3652 | break; | 
|---|
| 3653 | } | 
|---|
| 3654 |  | 
|---|
| 3655 | unsigned LoadOpNo = IsNegate ? 1 : 0; | 
|---|
| 3656 | LoadSDNode *LoadNode = nullptr; | 
|---|
| 3657 | SDValue InputChain; | 
|---|
| 3658 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, | 
|---|
| 3659 | LoadNode, InputChain)) { | 
|---|
| 3660 | if (!IsCommutable) | 
|---|
| 3661 | return false; | 
|---|
| 3662 |  | 
|---|
| 3663 | // This operation is commutable, try the other operand. | 
|---|
| 3664 | LoadOpNo = 1; | 
|---|
| 3665 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, | 
|---|
| 3666 | LoadNode, InputChain)) | 
|---|
| 3667 | return false; | 
|---|
| 3668 | } | 
|---|
| 3669 |  | 
|---|
| 3670 | SDValue Base, Scale, Index, Disp, Segment; | 
|---|
| 3671 | if (!selectAddr(Parent: LoadNode, N: LoadNode->getBasePtr(), Base, Scale, Index, Disp, | 
|---|
| 3672 | Segment)) | 
|---|
| 3673 | return false; | 
|---|
| 3674 |  | 
|---|
| 3675 | auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, | 
|---|
| 3676 | unsigned Opc8) { | 
|---|
| 3677 | switch (MemVT.getSimpleVT().SimpleTy) { | 
|---|
| 3678 | case MVT::i64: | 
|---|
| 3679 | return Opc64; | 
|---|
| 3680 | case MVT::i32: | 
|---|
| 3681 | return Opc32; | 
|---|
| 3682 | case MVT::i16: | 
|---|
| 3683 | return Opc16; | 
|---|
| 3684 | case MVT::i8: | 
|---|
| 3685 | return Opc8; | 
|---|
| 3686 | default: | 
|---|
| 3687 | llvm_unreachable( "Invalid size!"); | 
|---|
| 3688 | } | 
|---|
| 3689 | }; | 
|---|
| 3690 |  | 
|---|
| 3691 | MachineSDNode *Result; | 
|---|
| 3692 | switch (Opc) { | 
|---|
| 3693 | case X86ISD::SUB: | 
|---|
| 3694 | // Handle negate. | 
|---|
| 3695 | if (IsNegate) { | 
|---|
| 3696 | unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m, | 
|---|
| 3697 | X86::NEG8m); | 
|---|
| 3698 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; | 
|---|
| 3699 | Result = CurDAG->getMachineNode(Opcode: NewOpc, dl: SDLoc(Node), VT1: MVT::i32, | 
|---|
| 3700 | VT2: MVT::Other, Ops); | 
|---|
| 3701 | break; | 
|---|
| 3702 | } | 
|---|
| 3703 | [[fallthrough]]; | 
|---|
| 3704 | case X86ISD::ADD: | 
|---|
| 3705 | // Try to match inc/dec. | 
|---|
| 3706 | if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { | 
|---|
| 3707 | bool IsOne = isOneConstant(V: StoredVal.getOperand(i: 1)); | 
|---|
| 3708 | bool IsNegOne = isAllOnesConstant(V: StoredVal.getOperand(i: 1)); | 
|---|
| 3709 | // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. | 
|---|
| 3710 | if ((IsOne || IsNegOne) && hasNoCarryFlagUses(Flags: StoredVal.getValue(R: 1))) { | 
|---|
| 3711 | unsigned NewOpc = | 
|---|
| 3712 | ((Opc == X86ISD::ADD) == IsOne) | 
|---|
| 3713 | ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) | 
|---|
| 3714 | : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); | 
|---|
| 3715 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; | 
|---|
| 3716 | Result = CurDAG->getMachineNode(Opcode: NewOpc, dl: SDLoc(Node), VT1: MVT::i32, | 
|---|
| 3717 | VT2: MVT::Other, Ops); | 
|---|
| 3718 | break; | 
|---|
| 3719 | } | 
|---|
| 3720 | } | 
|---|
| 3721 | [[fallthrough]]; | 
|---|
| 3722 | case X86ISD::ADC: | 
|---|
| 3723 | case X86ISD::SBB: | 
|---|
| 3724 | case X86ISD::AND: | 
|---|
| 3725 | case X86ISD::OR: | 
|---|
| 3726 | case X86ISD::XOR: { | 
|---|
| 3727 | auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { | 
|---|
| 3728 | switch (Opc) { | 
|---|
| 3729 | case X86ISD::ADD: | 
|---|
| 3730 | return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, | 
|---|
| 3731 | X86::ADD8mr); | 
|---|
| 3732 | case X86ISD::ADC: | 
|---|
| 3733 | return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, | 
|---|
| 3734 | X86::ADC8mr); | 
|---|
| 3735 | case X86ISD::SUB: | 
|---|
| 3736 | return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, | 
|---|
| 3737 | X86::SUB8mr); | 
|---|
| 3738 | case X86ISD::SBB: | 
|---|
| 3739 | return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, | 
|---|
| 3740 | X86::SBB8mr); | 
|---|
| 3741 | case X86ISD::AND: | 
|---|
| 3742 | return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, | 
|---|
| 3743 | X86::AND8mr); | 
|---|
| 3744 | case X86ISD::OR: | 
|---|
| 3745 | return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr); | 
|---|
| 3746 | case X86ISD::XOR: | 
|---|
| 3747 | return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr, | 
|---|
| 3748 | X86::XOR8mr); | 
|---|
| 3749 | default: | 
|---|
| 3750 | llvm_unreachable( "Invalid opcode!"); | 
|---|
| 3751 | } | 
|---|
| 3752 | }; | 
|---|
| 3753 | auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { | 
|---|
| 3754 | switch (Opc) { | 
|---|
| 3755 | case X86ISD::ADD: | 
|---|
| 3756 | return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, | 
|---|
| 3757 | X86::ADD8mi); | 
|---|
| 3758 | case X86ISD::ADC: | 
|---|
| 3759 | return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, | 
|---|
| 3760 | X86::ADC8mi); | 
|---|
| 3761 | case X86ISD::SUB: | 
|---|
| 3762 | return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, | 
|---|
| 3763 | X86::SUB8mi); | 
|---|
| 3764 | case X86ISD::SBB: | 
|---|
| 3765 | return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, | 
|---|
| 3766 | X86::SBB8mi); | 
|---|
| 3767 | case X86ISD::AND: | 
|---|
| 3768 | return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, | 
|---|
| 3769 | X86::AND8mi); | 
|---|
| 3770 | case X86ISD::OR: | 
|---|
| 3771 | return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi, | 
|---|
| 3772 | X86::OR8mi); | 
|---|
| 3773 | case X86ISD::XOR: | 
|---|
| 3774 | return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi, | 
|---|
| 3775 | X86::XOR8mi); | 
|---|
| 3776 | default: | 
|---|
| 3777 | llvm_unreachable( "Invalid opcode!"); | 
|---|
| 3778 | } | 
|---|
| 3779 | }; | 
|---|
| 3780 |  | 
|---|
| 3781 | unsigned NewOpc = SelectRegOpcode(Opc); | 
|---|
| 3782 | SDValue Operand = StoredVal->getOperand(Num: 1-LoadOpNo); | 
|---|
| 3783 |  | 
|---|
| 3784 | // See if the operand is a constant that we can fold into an immediate | 
|---|
| 3785 | // operand. | 
|---|
| 3786 | if (auto *OperandC = dyn_cast<ConstantSDNode>(Val&: Operand)) { | 
|---|
| 3787 | int64_t OperandV = OperandC->getSExtValue(); | 
|---|
| 3788 |  | 
|---|
| 3789 | // Check if we can shrink the operand enough to fit in an immediate (or | 
|---|
| 3790 | // fit into a smaller immediate) by negating it and switching the | 
|---|
| 3791 | // operation. | 
|---|
| 3792 | if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) && | 
|---|
| 3793 | ((MemVT != MVT::i8 && !isInt<8>(x: OperandV) && isInt<8>(x: -OperandV)) || | 
|---|
| 3794 | (MemVT == MVT::i64 && !isInt<32>(x: OperandV) && | 
|---|
| 3795 | isInt<32>(x: -OperandV))) && | 
|---|
| 3796 | hasNoCarryFlagUses(Flags: StoredVal.getValue(R: 1))) { | 
|---|
| 3797 | OperandV = -OperandV; | 
|---|
| 3798 | Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; | 
|---|
| 3799 | } | 
|---|
| 3800 |  | 
|---|
| 3801 | if (MemVT != MVT::i64 || isInt<32>(x: OperandV)) { | 
|---|
| 3802 | Operand = CurDAG->getSignedTargetConstant(Val: OperandV, DL: SDLoc(Node), VT: MemVT); | 
|---|
| 3803 | NewOpc = SelectImmOpcode(Opc); | 
|---|
| 3804 | } | 
|---|
| 3805 | } | 
|---|
| 3806 |  | 
|---|
| 3807 | if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { | 
|---|
| 3808 | SDValue CopyTo = | 
|---|
| 3809 | CurDAG->getCopyToReg(Chain: InputChain, dl: SDLoc(Node), Reg: X86::EFLAGS, | 
|---|
| 3810 | N: StoredVal.getOperand(i: 2), Glue: SDValue()); | 
|---|
| 3811 |  | 
|---|
| 3812 | const SDValue Ops[] = {Base,    Scale,   Index,  Disp, | 
|---|
| 3813 | Segment, Operand, CopyTo, CopyTo.getValue(R: 1)}; | 
|---|
| 3814 | Result = CurDAG->getMachineNode(Opcode: NewOpc, dl: SDLoc(Node), VT1: MVT::i32, VT2: MVT::Other, | 
|---|
| 3815 | Ops); | 
|---|
| 3816 | } else { | 
|---|
| 3817 | const SDValue Ops[] = {Base,    Scale,   Index,     Disp, | 
|---|
| 3818 | Segment, Operand, InputChain}; | 
|---|
| 3819 | Result = CurDAG->getMachineNode(Opcode: NewOpc, dl: SDLoc(Node), VT1: MVT::i32, VT2: MVT::Other, | 
|---|
| 3820 | Ops); | 
|---|
| 3821 | } | 
|---|
| 3822 | break; | 
|---|
| 3823 | } | 
|---|
| 3824 | default: | 
|---|
| 3825 | llvm_unreachable( "Invalid opcode!"); | 
|---|
| 3826 | } | 
|---|
| 3827 |  | 
|---|
| 3828 | MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(), | 
|---|
| 3829 | LoadNode->getMemOperand()}; | 
|---|
| 3830 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: MemOps); | 
|---|
| 3831 |  | 
|---|
| 3832 | // Update Load Chain uses as well. | 
|---|
| 3833 | ReplaceUses(F: SDValue(LoadNode, 1), T: SDValue(Result, 1)); | 
|---|
| 3834 | ReplaceUses(F: SDValue(StoreNode, 0), T: SDValue(Result, 1)); | 
|---|
| 3835 | ReplaceUses(F: SDValue(StoredVal.getNode(), 1), T: SDValue(Result, 0)); | 
|---|
| 3836 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 3837 | return true; | 
|---|
| 3838 | } | 
|---|
| 3839 |  | 
|---|
| 3840 | // See if this is an  X & Mask  that we can match to BEXTR/BZHI. | 
|---|
| 3841 | // Where Mask is one of the following patterns: | 
|---|
| 3842 | //   a) x &  (1 << nbits) - 1 | 
|---|
| 3843 | //   b) x & ~(-1 << nbits) | 
|---|
| 3844 | //   c) x &  (-1 >> (32 - y)) | 
|---|
| 3845 | //   d) x << (32 - y) >> (32 - y) | 
|---|
| 3846 | //   e) (1 << nbits) - 1 | 
|---|
| 3847 | bool X86DAGToDAGISel::(SDNode *Node) { | 
|---|
| 3848 | assert( | 
|---|
| 3849 | (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND || | 
|---|
| 3850 | Node->getOpcode() == ISD::SRL) && | 
|---|
| 3851 | "Should be either an and-mask, or right-shift after clearing high bits."); | 
|---|
| 3852 |  | 
|---|
| 3853 | // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. | 
|---|
| 3854 | if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) | 
|---|
| 3855 | return false; | 
|---|
| 3856 |  | 
|---|
| 3857 | MVT NVT = Node->getSimpleValueType(ResNo: 0); | 
|---|
| 3858 |  | 
|---|
| 3859 | // Only supported for 32 and 64 bits. | 
|---|
| 3860 | if (NVT != MVT::i32 && NVT != MVT::i64) | 
|---|
| 3861 | return false; | 
|---|
| 3862 |  | 
|---|
| 3863 | SDValue NBits; | 
|---|
| 3864 | bool NegateNBits; | 
|---|
| 3865 |  | 
|---|
| 3866 | // If we have BMI2's BZHI, we are ok with muti-use patterns. | 
|---|
| 3867 | // Else, if we only have BMI1's BEXTR, we require one-use. | 
|---|
| 3868 | const bool  = Subtarget->hasBMI2(); | 
|---|
| 3869 | auto checkUses = [AllowExtraUsesByDefault]( | 
|---|
| 3870 | SDValue Op, unsigned NUses, | 
|---|
| 3871 | std::optional<bool> ) { | 
|---|
| 3872 | return AllowExtraUses.value_or(u: AllowExtraUsesByDefault) || | 
|---|
| 3873 | Op.getNode()->hasNUsesOfValue(NUses, Value: Op.getResNo()); | 
|---|
| 3874 | }; | 
|---|
| 3875 | auto checkOneUse = [checkUses](SDValue Op, | 
|---|
| 3876 | std::optional<bool>  = | 
|---|
| 3877 | std::nullopt) { | 
|---|
| 3878 | return checkUses(Op, 1, AllowExtraUses); | 
|---|
| 3879 | }; | 
|---|
| 3880 | auto checkTwoUse = [checkUses](SDValue Op, | 
|---|
| 3881 | std::optional<bool>  = | 
|---|
| 3882 | std::nullopt) { | 
|---|
| 3883 | return checkUses(Op, 2, AllowExtraUses); | 
|---|
| 3884 | }; | 
|---|
| 3885 |  | 
|---|
| 3886 | auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { | 
|---|
| 3887 | if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { | 
|---|
| 3888 | assert(V.getSimpleValueType() == MVT::i32 && | 
|---|
| 3889 | V.getOperand(0).getSimpleValueType() == MVT::i64 && | 
|---|
| 3890 | "Expected i64 -> i32 truncation"); | 
|---|
| 3891 | V = V.getOperand(i: 0); | 
|---|
| 3892 | } | 
|---|
| 3893 | return V; | 
|---|
| 3894 | }; | 
|---|
| 3895 |  | 
|---|
| 3896 | // a) x & ((1 << nbits) + (-1)) | 
|---|
| 3897 | auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits, | 
|---|
| 3898 | &NegateNBits](SDValue Mask) -> bool { | 
|---|
| 3899 | // Match `add`. Must only have one use! | 
|---|
| 3900 | if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) | 
|---|
| 3901 | return false; | 
|---|
| 3902 | // We should be adding all-ones constant (i.e. subtracting one.) | 
|---|
| 3903 | if (!isAllOnesConstant(V: Mask->getOperand(Num: 1))) | 
|---|
| 3904 | return false; | 
|---|
| 3905 | // Match `1 << nbits`. Might be truncated. Must only have one use! | 
|---|
| 3906 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(Num: 0)); | 
|---|
| 3907 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) | 
|---|
| 3908 | return false; | 
|---|
| 3909 | if (!isOneConstant(V: M0->getOperand(Num: 0))) | 
|---|
| 3910 | return false; | 
|---|
| 3911 | NBits = M0->getOperand(Num: 1); | 
|---|
| 3912 | NegateNBits = false; | 
|---|
| 3913 | return true; | 
|---|
| 3914 | }; | 
|---|
| 3915 |  | 
|---|
| 3916 | auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) { | 
|---|
| 3917 | V = peekThroughOneUseTruncation(V); | 
|---|
| 3918 | return CurDAG->MaskedValueIsAllOnes( | 
|---|
| 3919 | Op: V, Mask: APInt::getLowBitsSet(numBits: V.getSimpleValueType().getSizeInBits(), | 
|---|
| 3920 | loBitsSet: NVT.getSizeInBits())); | 
|---|
| 3921 | }; | 
|---|
| 3922 |  | 
|---|
| 3923 | // b) x & ~(-1 << nbits) | 
|---|
| 3924 | auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation, | 
|---|
| 3925 | &NBits, &NegateNBits](SDValue Mask) -> bool { | 
|---|
| 3926 | // Match `~()`. Must only have one use! | 
|---|
| 3927 | if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask)) | 
|---|
| 3928 | return false; | 
|---|
| 3929 | // The -1 only has to be all-ones for the final Node's NVT. | 
|---|
| 3930 | if (!isAllOnes(Mask->getOperand(Num: 1))) | 
|---|
| 3931 | return false; | 
|---|
| 3932 | // Match `-1 << nbits`. Might be truncated. Must only have one use! | 
|---|
| 3933 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(Num: 0)); | 
|---|
| 3934 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) | 
|---|
| 3935 | return false; | 
|---|
| 3936 | // The -1 only has to be all-ones for the final Node's NVT. | 
|---|
| 3937 | if (!isAllOnes(M0->getOperand(Num: 0))) | 
|---|
| 3938 | return false; | 
|---|
| 3939 | NBits = M0->getOperand(Num: 1); | 
|---|
| 3940 | NegateNBits = false; | 
|---|
| 3941 | return true; | 
|---|
| 3942 | }; | 
|---|
| 3943 |  | 
|---|
| 3944 | // Try to match potentially-truncated shift amount as `(bitwidth - y)`, | 
|---|
| 3945 | // or leave the shift amount as-is, but then we'll have to negate it. | 
|---|
| 3946 | auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt, | 
|---|
| 3947 | unsigned Bitwidth) { | 
|---|
| 3948 | NBits = ShiftAmt; | 
|---|
| 3949 | NegateNBits = true; | 
|---|
| 3950 | // Skip over a truncate of the shift amount, if any. | 
|---|
| 3951 | if (NBits.getOpcode() == ISD::TRUNCATE) | 
|---|
| 3952 | NBits = NBits.getOperand(i: 0); | 
|---|
| 3953 | // Try to match the shift amount as (bitwidth - y). It should go away, too. | 
|---|
| 3954 | // If it doesn't match, that's fine, we'll just negate it ourselves. | 
|---|
| 3955 | if (NBits.getOpcode() != ISD::SUB) | 
|---|
| 3956 | return; | 
|---|
| 3957 | auto *V0 = dyn_cast<ConstantSDNode>(Val: NBits.getOperand(i: 0)); | 
|---|
| 3958 | if (!V0 || V0->getZExtValue() != Bitwidth) | 
|---|
| 3959 | return; | 
|---|
| 3960 | NBits = NBits.getOperand(i: 1); | 
|---|
| 3961 | NegateNBits = false; | 
|---|
| 3962 | }; | 
|---|
| 3963 |  | 
|---|
| 3964 | // c) x &  (-1 >> z)  but then we'll have to subtract z from bitwidth | 
|---|
| 3965 | //   or | 
|---|
| 3966 | // c) x &  (-1 >> (32 - y)) | 
|---|
| 3967 | auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits, | 
|---|
| 3968 | canonicalizeShiftAmt](SDValue Mask) -> bool { | 
|---|
| 3969 | // The mask itself may be truncated. | 
|---|
| 3970 | Mask = peekThroughOneUseTruncation(Mask); | 
|---|
| 3971 | unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits(); | 
|---|
| 3972 | // Match `l>>`. Must only have one use! | 
|---|
| 3973 | if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask)) | 
|---|
| 3974 | return false; | 
|---|
| 3975 | // We should be shifting truly all-ones constant. | 
|---|
| 3976 | if (!isAllOnesConstant(V: Mask.getOperand(i: 0))) | 
|---|
| 3977 | return false; | 
|---|
| 3978 | SDValue M1 = Mask.getOperand(i: 1); | 
|---|
| 3979 | // The shift amount should not be used externally. | 
|---|
| 3980 | if (!checkOneUse(M1)) | 
|---|
| 3981 | return false; | 
|---|
| 3982 | canonicalizeShiftAmt(M1, Bitwidth); | 
|---|
| 3983 | // Pattern c. is non-canonical, and is expanded into pattern d. iff there | 
|---|
| 3984 | // is no extra use of the mask. Clearly, there was one since we are here. | 
|---|
| 3985 | // But at the same time, if we need to negate the shift amount, | 
|---|
| 3986 | // then we don't want the mask to stick around, else it's unprofitable. | 
|---|
| 3987 | return !NegateNBits; | 
|---|
| 3988 | }; | 
|---|
| 3989 |  | 
|---|
| 3990 | SDValue X; | 
|---|
| 3991 |  | 
|---|
| 3992 | // d) x << z >> z  but then we'll have to subtract z from bitwidth | 
|---|
| 3993 | //   or | 
|---|
| 3994 | // d) x << (32 - y) >> (32 - y) | 
|---|
| 3995 | auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt, | 
|---|
| 3996 | AllowExtraUsesByDefault, &NegateNBits, | 
|---|
| 3997 | &X](SDNode *Node) -> bool { | 
|---|
| 3998 | if (Node->getOpcode() != ISD::SRL) | 
|---|
| 3999 | return false; | 
|---|
| 4000 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 4001 | if (N0->getOpcode() != ISD::SHL) | 
|---|
| 4002 | return false; | 
|---|
| 4003 | unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits(); | 
|---|
| 4004 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 4005 | SDValue N01 = N0->getOperand(Num: 1); | 
|---|
| 4006 | // Both of the shifts must be by the exact same value. | 
|---|
| 4007 | if (N1 != N01) | 
|---|
| 4008 | return false; | 
|---|
| 4009 | canonicalizeShiftAmt(N1, Bitwidth); | 
|---|
| 4010 | // There should not be any external uses of the inner shift / shift amount. | 
|---|
| 4011 | // Note that while we are generally okay with external uses given BMI2, | 
|---|
| 4012 | // iff we need to negate the shift amount, we are not okay with extra uses. | 
|---|
| 4013 | const bool  = AllowExtraUsesByDefault && !NegateNBits; | 
|---|
| 4014 | if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses)) | 
|---|
| 4015 | return false; | 
|---|
| 4016 | X = N0->getOperand(Num: 0); | 
|---|
| 4017 | return true; | 
|---|
| 4018 | }; | 
|---|
| 4019 |  | 
|---|
| 4020 | auto matchLowBitMask = [matchPatternA, matchPatternB, | 
|---|
| 4021 | matchPatternC](SDValue Mask) -> bool { | 
|---|
| 4022 | return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask); | 
|---|
| 4023 | }; | 
|---|
| 4024 |  | 
|---|
| 4025 | if (Node->getOpcode() == ISD::AND) { | 
|---|
| 4026 | X = Node->getOperand(Num: 0); | 
|---|
| 4027 | SDValue Mask = Node->getOperand(Num: 1); | 
|---|
| 4028 |  | 
|---|
| 4029 | if (matchLowBitMask(Mask)) { | 
|---|
| 4030 | // Great. | 
|---|
| 4031 | } else { | 
|---|
| 4032 | std::swap(a&: X, b&: Mask); | 
|---|
| 4033 | if (!matchLowBitMask(Mask)) | 
|---|
| 4034 | return false; | 
|---|
| 4035 | } | 
|---|
| 4036 | } else if (matchLowBitMask(SDValue(Node, 0))) { | 
|---|
| 4037 | X = CurDAG->getAllOnesConstant(DL: SDLoc(Node), VT: NVT); | 
|---|
| 4038 | } else if (!matchPatternD(Node)) | 
|---|
| 4039 | return false; | 
|---|
| 4040 |  | 
|---|
| 4041 | // If we need to negate the shift amount, require BMI2 BZHI support. | 
|---|
| 4042 | // It's just too unprofitable for BMI1 BEXTR. | 
|---|
| 4043 | if (NegateNBits && !Subtarget->hasBMI2()) | 
|---|
| 4044 | return false; | 
|---|
| 4045 |  | 
|---|
| 4046 | SDLoc DL(Node); | 
|---|
| 4047 |  | 
|---|
| 4048 | // Truncate the shift amount. | 
|---|
| 4049 | NBits = CurDAG->getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i8, Operand: NBits); | 
|---|
| 4050 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: NBits); | 
|---|
| 4051 |  | 
|---|
| 4052 | // Insert 8-bit NBits into lowest 8 bits of 32-bit register. | 
|---|
| 4053 | // All the other bits are undefined, we do not care about them. | 
|---|
| 4054 | SDValue ImplDef = SDValue( | 
|---|
| 4055 | CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i32), 0); | 
|---|
| 4056 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: ImplDef); | 
|---|
| 4057 |  | 
|---|
| 4058 | SDValue SRIdxVal = CurDAG->getTargetConstant(Val: X86::sub_8bit, DL, VT: MVT::i32); | 
|---|
| 4059 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: SRIdxVal); | 
|---|
| 4060 | NBits = SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, | 
|---|
| 4061 | VT: MVT::i32, Op1: ImplDef, Op2: NBits, Op3: SRIdxVal), | 
|---|
| 4062 | 0); | 
|---|
| 4063 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: NBits); | 
|---|
| 4064 |  | 
|---|
| 4065 | // We might have matched the amount of high bits to be cleared, | 
|---|
| 4066 | // but we want the amount of low bits to be kept, so negate it then. | 
|---|
| 4067 | if (NegateNBits) { | 
|---|
| 4068 | SDValue BitWidthC = CurDAG->getConstant(Val: NVT.getSizeInBits(), DL, VT: MVT::i32); | 
|---|
| 4069 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: BitWidthC); | 
|---|
| 4070 |  | 
|---|
| 4071 | NBits = CurDAG->getNode(Opcode: ISD::SUB, DL, VT: MVT::i32, N1: BitWidthC, N2: NBits); | 
|---|
| 4072 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: NBits); | 
|---|
| 4073 | } | 
|---|
| 4074 |  | 
|---|
| 4075 | if (Subtarget->hasBMI2()) { | 
|---|
| 4076 | // Great, just emit the BZHI.. | 
|---|
| 4077 | if (NVT != MVT::i32) { | 
|---|
| 4078 | // But have to place the bit count into the wide-enough register first. | 
|---|
| 4079 | NBits = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: NVT, Operand: NBits); | 
|---|
| 4080 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: NBits); | 
|---|
| 4081 | } | 
|---|
| 4082 |  | 
|---|
| 4083 | SDValue  = CurDAG->getNode(Opcode: X86ISD::BZHI, DL, VT: NVT, N1: X, N2: NBits); | 
|---|
| 4084 | ReplaceNode(F: Node, T: Extract.getNode()); | 
|---|
| 4085 | SelectCode(N: Extract.getNode()); | 
|---|
| 4086 | return true; | 
|---|
| 4087 | } | 
|---|
| 4088 |  | 
|---|
| 4089 | // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is | 
|---|
| 4090 | // *logically* shifted (potentially with one-use trunc inbetween), | 
|---|
| 4091 | // and the truncation was the only use of the shift, | 
|---|
| 4092 | // and if so look past one-use truncation. | 
|---|
| 4093 | { | 
|---|
| 4094 | SDValue RealX = peekThroughOneUseTruncation(X); | 
|---|
| 4095 | // FIXME: only if the shift is one-use? | 
|---|
| 4096 | if (RealX != X && RealX.getOpcode() == ISD::SRL) | 
|---|
| 4097 | X = RealX; | 
|---|
| 4098 | } | 
|---|
| 4099 |  | 
|---|
| 4100 | MVT XVT = X.getSimpleValueType(); | 
|---|
| 4101 |  | 
|---|
| 4102 | // Else, emitting BEXTR requires one more step. | 
|---|
| 4103 | // The 'control' of BEXTR has the pattern of: | 
|---|
| 4104 | // [15...8 bit][ 7...0 bit] location | 
|---|
| 4105 | // [ bit count][     shift] name | 
|---|
| 4106 | // I.e. 0b000000011'00000001 means  (x >> 0b1) & 0b11 | 
|---|
| 4107 |  | 
|---|
| 4108 | // Shift NBits left by 8 bits, thus producing 'control'. | 
|---|
| 4109 | // This makes the low 8 bits to be zero. | 
|---|
| 4110 | SDValue C8 = CurDAG->getConstant(Val: 8, DL, VT: MVT::i8); | 
|---|
| 4111 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: C8); | 
|---|
| 4112 | SDValue Control = CurDAG->getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, N1: NBits, N2: C8); | 
|---|
| 4113 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: Control); | 
|---|
| 4114 |  | 
|---|
| 4115 | // If the 'X' is *logically* shifted, we can fold that shift into 'control'. | 
|---|
| 4116 | // FIXME: only if the shift is one-use? | 
|---|
| 4117 | if (X.getOpcode() == ISD::SRL) { | 
|---|
| 4118 | SDValue ShiftAmt = X.getOperand(i: 1); | 
|---|
| 4119 | X = X.getOperand(i: 0); | 
|---|
| 4120 |  | 
|---|
| 4121 | assert(ShiftAmt.getValueType() == MVT::i8 && | 
|---|
| 4122 | "Expected shift amount to be i8"); | 
|---|
| 4123 |  | 
|---|
| 4124 | // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero! | 
|---|
| 4125 | // We could zext to i16 in some form, but we intentionally don't do that. | 
|---|
| 4126 | SDValue OrigShiftAmt = ShiftAmt; | 
|---|
| 4127 | ShiftAmt = CurDAG->getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: ShiftAmt); | 
|---|
| 4128 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: ShiftAmt); | 
|---|
| 4129 |  | 
|---|
| 4130 | // And now 'or' these low 8 bits of shift amount into the 'control'. | 
|---|
| 4131 | Control = CurDAG->getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Control, N2: ShiftAmt); | 
|---|
| 4132 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: Control); | 
|---|
| 4133 | } | 
|---|
| 4134 |  | 
|---|
| 4135 | // But have to place the 'control' into the wide-enough register first. | 
|---|
| 4136 | if (XVT != MVT::i32) { | 
|---|
| 4137 | Control = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XVT, Operand: Control); | 
|---|
| 4138 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: Control); | 
|---|
| 4139 | } | 
|---|
| 4140 |  | 
|---|
| 4141 | // And finally, form the BEXTR itself. | 
|---|
| 4142 | SDValue  = CurDAG->getNode(Opcode: X86ISD::BEXTR, DL, VT: XVT, N1: X, N2: Control); | 
|---|
| 4143 |  | 
|---|
| 4144 | // The 'X' was originally truncated. Do that now. | 
|---|
| 4145 | if (XVT != NVT) { | 
|---|
| 4146 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(Node, 0), N: Extract); | 
|---|
| 4147 | Extract = CurDAG->getNode(Opcode: ISD::TRUNCATE, DL, VT: NVT, Operand: Extract); | 
|---|
| 4148 | } | 
|---|
| 4149 |  | 
|---|
| 4150 | ReplaceNode(F: Node, T: Extract.getNode()); | 
|---|
| 4151 | SelectCode(N: Extract.getNode()); | 
|---|
| 4152 |  | 
|---|
| 4153 | return true; | 
|---|
| 4154 | } | 
|---|
| 4155 |  | 
|---|
| 4156 | // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. | 
|---|
| 4157 | MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { | 
|---|
| 4158 | MVT NVT = Node->getSimpleValueType(ResNo: 0); | 
|---|
| 4159 | SDLoc dl(Node); | 
|---|
| 4160 |  | 
|---|
| 4161 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 4162 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 4163 |  | 
|---|
| 4164 | // If we have TBM we can use an immediate for the control. If we have BMI | 
|---|
| 4165 | // we should only do this if the BEXTR instruction is implemented well. | 
|---|
| 4166 | // Otherwise moving the control into a register makes this more costly. | 
|---|
| 4167 | // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM | 
|---|
| 4168 | // hoisting the move immediate would make it worthwhile with a less optimal | 
|---|
| 4169 | // BEXTR? | 
|---|
| 4170 | bool PreferBEXTR = | 
|---|
| 4171 | Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); | 
|---|
| 4172 | if (!PreferBEXTR && !Subtarget->hasBMI2()) | 
|---|
| 4173 | return nullptr; | 
|---|
| 4174 |  | 
|---|
| 4175 | // Must have a shift right. | 
|---|
| 4176 | if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) | 
|---|
| 4177 | return nullptr; | 
|---|
| 4178 |  | 
|---|
| 4179 | // Shift can't have additional users. | 
|---|
| 4180 | if (!N0->hasOneUse()) | 
|---|
| 4181 | return nullptr; | 
|---|
| 4182 |  | 
|---|
| 4183 | // Only supported for 32 and 64 bits. | 
|---|
| 4184 | if (NVT != MVT::i32 && NVT != MVT::i64) | 
|---|
| 4185 | return nullptr; | 
|---|
| 4186 |  | 
|---|
| 4187 | // Shift amount and RHS of and must be constant. | 
|---|
| 4188 | auto *MaskCst = dyn_cast<ConstantSDNode>(Val&: N1); | 
|---|
| 4189 | auto *ShiftCst = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); | 
|---|
| 4190 | if (!MaskCst || !ShiftCst) | 
|---|
| 4191 | return nullptr; | 
|---|
| 4192 |  | 
|---|
| 4193 | // And RHS must be a mask. | 
|---|
| 4194 | uint64_t Mask = MaskCst->getZExtValue(); | 
|---|
| 4195 | if (!isMask_64(Value: Mask)) | 
|---|
| 4196 | return nullptr; | 
|---|
| 4197 |  | 
|---|
| 4198 | uint64_t Shift = ShiftCst->getZExtValue(); | 
|---|
| 4199 | uint64_t MaskSize = llvm::popcount(Value: Mask); | 
|---|
| 4200 |  | 
|---|
| 4201 | // Don't interfere with something that can be handled by extracting AH. | 
|---|
| 4202 | // TODO: If we are able to fold a load, BEXTR might still be better than AH. | 
|---|
| 4203 | if (Shift == 8 && MaskSize == 8) | 
|---|
| 4204 | return nullptr; | 
|---|
| 4205 |  | 
|---|
| 4206 | // Make sure we are only using bits that were in the original value, not | 
|---|
| 4207 | // shifted in. | 
|---|
| 4208 | if (Shift + MaskSize > NVT.getSizeInBits()) | 
|---|
| 4209 | return nullptr; | 
|---|
| 4210 |  | 
|---|
| 4211 | // BZHI, if available, is always fast, unlike BEXTR. But even if we decide | 
|---|
| 4212 | // that we can't use BEXTR, it is only worthwhile using BZHI if the mask | 
|---|
| 4213 | // does not fit into 32 bits. Load folding is not a sufficient reason. | 
|---|
| 4214 | if (!PreferBEXTR && MaskSize <= 32) | 
|---|
| 4215 | return nullptr; | 
|---|
| 4216 |  | 
|---|
| 4217 | SDValue Control; | 
|---|
| 4218 | unsigned ROpc, MOpc; | 
|---|
| 4219 |  | 
|---|
| 4220 | #define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC) | 
|---|
| 4221 | if (!PreferBEXTR) { | 
|---|
| 4222 | assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); | 
|---|
| 4223 | // If we can't make use of BEXTR then we can't fuse shift+mask stages. | 
|---|
| 4224 | // Let's perform the mask first, and apply shift later. Note that we need to | 
|---|
| 4225 | // widen the mask to account for the fact that we'll apply shift afterwards! | 
|---|
| 4226 | Control = CurDAG->getTargetConstant(Val: Shift + MaskSize, DL: dl, VT: NVT); | 
|---|
| 4227 | ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr) | 
|---|
| 4228 | : GET_EGPR_IF_ENABLED(X86::BZHI32rr); | 
|---|
| 4229 | MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm) | 
|---|
| 4230 | : GET_EGPR_IF_ENABLED(X86::BZHI32rm); | 
|---|
| 4231 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; | 
|---|
| 4232 | Control = SDValue(CurDAG->getMachineNode(Opcode: NewOpc, dl, VT: NVT, Op1: Control), 0); | 
|---|
| 4233 | } else { | 
|---|
| 4234 | // The 'control' of BEXTR has the pattern of: | 
|---|
| 4235 | // [15...8 bit][ 7...0 bit] location | 
|---|
| 4236 | // [ bit count][     shift] name | 
|---|
| 4237 | // I.e. 0b000000011'00000001 means  (x >> 0b1) & 0b11 | 
|---|
| 4238 | Control = CurDAG->getTargetConstant(Val: Shift | (MaskSize << 8), DL: dl, VT: NVT); | 
|---|
| 4239 | if (Subtarget->hasTBM()) { | 
|---|
| 4240 | ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; | 
|---|
| 4241 | MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; | 
|---|
| 4242 | } else { | 
|---|
| 4243 | assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); | 
|---|
| 4244 | // BMI requires the immediate to placed in a register. | 
|---|
| 4245 | ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr) | 
|---|
| 4246 | : GET_EGPR_IF_ENABLED(X86::BEXTR32rr); | 
|---|
| 4247 | MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm) | 
|---|
| 4248 | : GET_EGPR_IF_ENABLED(X86::BEXTR32rm); | 
|---|
| 4249 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; | 
|---|
| 4250 | Control = SDValue(CurDAG->getMachineNode(Opcode: NewOpc, dl, VT: NVT, Op1: Control), 0); | 
|---|
| 4251 | } | 
|---|
| 4252 | } | 
|---|
| 4253 |  | 
|---|
| 4254 | MachineSDNode *NewNode; | 
|---|
| 4255 | SDValue Input = N0->getOperand(Num: 0); | 
|---|
| 4256 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 4257 | if (tryFoldLoad(Root: Node, P: N0.getNode(), N: Input, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4)) { | 
|---|
| 4258 | SDValue Ops[] = { | 
|---|
| 4259 | Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(i: 0)}; | 
|---|
| 4260 | SDVTList VTs = CurDAG->getVTList(VT1: NVT, VT2: MVT::i32, VT3: MVT::Other); | 
|---|
| 4261 | NewNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 4262 | // Update the chain. | 
|---|
| 4263 | ReplaceUses(F: Input.getValue(R: 1), T: SDValue(NewNode, 2)); | 
|---|
| 4264 | // Record the mem-refs | 
|---|
| 4265 | CurDAG->setNodeMemRefs(N: NewNode, NewMemRefs: {cast<LoadSDNode>(Val&: Input)->getMemOperand()}); | 
|---|
| 4266 | } else { | 
|---|
| 4267 | NewNode = CurDAG->getMachineNode(Opcode: ROpc, dl, VT1: NVT, VT2: MVT::i32, Op1: Input, Op2: Control); | 
|---|
| 4268 | } | 
|---|
| 4269 |  | 
|---|
| 4270 | if (!PreferBEXTR) { | 
|---|
| 4271 | // We still need to apply the shift. | 
|---|
| 4272 | SDValue ShAmt = CurDAG->getTargetConstant(Val: Shift, DL: dl, VT: NVT); | 
|---|
| 4273 | unsigned NewOpc = NVT == MVT::i64 ? GET_ND_IF_ENABLED(X86::SHR64ri) | 
|---|
| 4274 | : GET_ND_IF_ENABLED(X86::SHR32ri); | 
|---|
| 4275 | NewNode = | 
|---|
| 4276 | CurDAG->getMachineNode(Opcode: NewOpc, dl, VT: NVT, Op1: SDValue(NewNode, 0), Op2: ShAmt); | 
|---|
| 4277 | } | 
|---|
| 4278 |  | 
|---|
| 4279 | return NewNode; | 
|---|
| 4280 | } | 
|---|
| 4281 |  | 
|---|
| 4282 | // Emit a PCMISTR(I/M) instruction. | 
|---|
| 4283 | MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, | 
|---|
| 4284 | bool MayFoldLoad, const SDLoc &dl, | 
|---|
| 4285 | MVT VT, SDNode *Node) { | 
|---|
| 4286 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 4287 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 4288 | SDValue Imm = Node->getOperand(Num: 2); | 
|---|
| 4289 | auto *Val = cast<ConstantSDNode>(Val&: Imm)->getConstantIntValue(); | 
|---|
| 4290 | Imm = CurDAG->getTargetConstant(Val: *Val, DL: SDLoc(Node), VT: Imm.getValueType()); | 
|---|
| 4291 |  | 
|---|
| 4292 | // Try to fold a load. No need to check alignment. | 
|---|
| 4293 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 4294 | if (MayFoldLoad && tryFoldLoad(P: Node, N: N1, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4)) { | 
|---|
| 4295 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | 
|---|
| 4296 | N1.getOperand(i: 0) }; | 
|---|
| 4297 | SDVTList VTs = CurDAG->getVTList(VT1: VT, VT2: MVT::i32, VT3: MVT::Other); | 
|---|
| 4298 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 4299 | // Update the chain. | 
|---|
| 4300 | ReplaceUses(F: N1.getValue(R: 1), T: SDValue(CNode, 2)); | 
|---|
| 4301 | // Record the mem-refs | 
|---|
| 4302 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<LoadSDNode>(Val&: N1)->getMemOperand()}); | 
|---|
| 4303 | return CNode; | 
|---|
| 4304 | } | 
|---|
| 4305 |  | 
|---|
| 4306 | SDValue Ops[] = { N0, N1, Imm }; | 
|---|
| 4307 | SDVTList VTs = CurDAG->getVTList(VT1: VT, VT2: MVT::i32); | 
|---|
| 4308 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: ROpc, dl, VTs, Ops); | 
|---|
| 4309 | return CNode; | 
|---|
| 4310 | } | 
|---|
| 4311 |  | 
|---|
| 4312 | // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need | 
|---|
| 4313 | // to emit a second instruction after this one. This is needed since we have two | 
|---|
| 4314 | // copyToReg nodes glued before this and we need to continue that glue through. | 
|---|
| 4315 | MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, | 
|---|
| 4316 | bool MayFoldLoad, const SDLoc &dl, | 
|---|
| 4317 | MVT VT, SDNode *Node, | 
|---|
| 4318 | SDValue &InGlue) { | 
|---|
| 4319 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 4320 | SDValue N2 = Node->getOperand(Num: 2); | 
|---|
| 4321 | SDValue Imm = Node->getOperand(Num: 4); | 
|---|
| 4322 | auto *Val = cast<ConstantSDNode>(Val&: Imm)->getConstantIntValue(); | 
|---|
| 4323 | Imm = CurDAG->getTargetConstant(Val: *Val, DL: SDLoc(Node), VT: Imm.getValueType()); | 
|---|
| 4324 |  | 
|---|
| 4325 | // Try to fold a load. No need to check alignment. | 
|---|
| 4326 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 4327 | if (MayFoldLoad && tryFoldLoad(P: Node, N: N2, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4)) { | 
|---|
| 4328 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | 
|---|
| 4329 | N2.getOperand(i: 0), InGlue }; | 
|---|
| 4330 | SDVTList VTs = CurDAG->getVTList(VT1: VT, VT2: MVT::i32, VT3: MVT::Other, VT4: MVT::Glue); | 
|---|
| 4331 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 4332 | InGlue = SDValue(CNode, 3); | 
|---|
| 4333 | // Update the chain. | 
|---|
| 4334 | ReplaceUses(F: N2.getValue(R: 1), T: SDValue(CNode, 2)); | 
|---|
| 4335 | // Record the mem-refs | 
|---|
| 4336 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<LoadSDNode>(Val&: N2)->getMemOperand()}); | 
|---|
| 4337 | return CNode; | 
|---|
| 4338 | } | 
|---|
| 4339 |  | 
|---|
| 4340 | SDValue Ops[] = { N0, N2, Imm, InGlue }; | 
|---|
| 4341 | SDVTList VTs = CurDAG->getVTList(VT1: VT, VT2: MVT::i32, VT3: MVT::Glue); | 
|---|
| 4342 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: ROpc, dl, VTs, Ops); | 
|---|
| 4343 | InGlue = SDValue(CNode, 2); | 
|---|
| 4344 | return CNode; | 
|---|
| 4345 | } | 
|---|
| 4346 |  | 
|---|
| 4347 | bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { | 
|---|
| 4348 | EVT VT = N->getValueType(ResNo: 0); | 
|---|
| 4349 |  | 
|---|
| 4350 | // Only handle scalar shifts. | 
|---|
| 4351 | if (VT.isVector()) | 
|---|
| 4352 | return false; | 
|---|
| 4353 |  | 
|---|
| 4354 | // Narrower shifts only mask to 5 bits in hardware. | 
|---|
| 4355 | unsigned Size = VT == MVT::i64 ? 64 : 32; | 
|---|
| 4356 |  | 
|---|
| 4357 | SDValue OrigShiftAmt = N->getOperand(Num: 1); | 
|---|
| 4358 | SDValue ShiftAmt = OrigShiftAmt; | 
|---|
| 4359 | SDLoc DL(N); | 
|---|
| 4360 |  | 
|---|
| 4361 | // Skip over a truncate of the shift amount. | 
|---|
| 4362 | if (ShiftAmt->getOpcode() == ISD::TRUNCATE) | 
|---|
| 4363 | ShiftAmt = ShiftAmt->getOperand(Num: 0); | 
|---|
| 4364 |  | 
|---|
| 4365 | // This function is called after X86DAGToDAGISel::matchBitExtract(), | 
|---|
| 4366 | // so we are not afraid that we might mess up BZHI/BEXTR pattern. | 
|---|
| 4367 |  | 
|---|
| 4368 | SDValue NewShiftAmt; | 
|---|
| 4369 | if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB || | 
|---|
| 4370 | ShiftAmt->getOpcode() == ISD::XOR) { | 
|---|
| 4371 | SDValue Add0 = ShiftAmt->getOperand(Num: 0); | 
|---|
| 4372 | SDValue Add1 = ShiftAmt->getOperand(Num: 1); | 
|---|
| 4373 | auto *Add0C = dyn_cast<ConstantSDNode>(Val&: Add0); | 
|---|
| 4374 | auto *Add1C = dyn_cast<ConstantSDNode>(Val&: Add1); | 
|---|
| 4375 | // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X | 
|---|
| 4376 | // to avoid the ADD/SUB/XOR. | 
|---|
| 4377 | if (Add1C && Add1C->getAPIntValue().urem(RHS: Size) == 0) { | 
|---|
| 4378 | NewShiftAmt = Add0; | 
|---|
| 4379 |  | 
|---|
| 4380 | } else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() && | 
|---|
| 4381 | ((Add0C && Add0C->getAPIntValue().urem(RHS: Size) == Size - 1) || | 
|---|
| 4382 | (Add1C && Add1C->getAPIntValue().urem(RHS: Size) == Size - 1))) { | 
|---|
| 4383 | // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X | 
|---|
| 4384 | // we can replace it with a NOT. In the XOR case it may save some code | 
|---|
| 4385 | // size, in the SUB case it also may save a move. | 
|---|
| 4386 | assert(Add0C == nullptr || Add1C == nullptr); | 
|---|
| 4387 |  | 
|---|
| 4388 | // We can only do N-X, not X-N | 
|---|
| 4389 | if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr) | 
|---|
| 4390 | return false; | 
|---|
| 4391 |  | 
|---|
| 4392 | EVT OpVT = ShiftAmt.getValueType(); | 
|---|
| 4393 |  | 
|---|
| 4394 | SDValue AllOnes = CurDAG->getAllOnesConstant(DL, VT: OpVT); | 
|---|
| 4395 | NewShiftAmt = CurDAG->getNode(Opcode: ISD::XOR, DL, VT: OpVT, | 
|---|
| 4396 | N1: Add0C == nullptr ? Add0 : Add1, N2: AllOnes); | 
|---|
| 4397 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: AllOnes); | 
|---|
| 4398 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: NewShiftAmt); | 
|---|
| 4399 | // If we are shifting by N-X where N == 0 mod Size, then just shift by | 
|---|
| 4400 | // -X to generate a NEG instead of a SUB of a constant. | 
|---|
| 4401 | } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && | 
|---|
| 4402 | Add0C->getZExtValue() != 0) { | 
|---|
| 4403 | EVT SubVT = ShiftAmt.getValueType(); | 
|---|
| 4404 | SDValue X; | 
|---|
| 4405 | if (Add0C->getZExtValue() % Size == 0) | 
|---|
| 4406 | X = Add1; | 
|---|
| 4407 | else if (ShiftAmt.hasOneUse() && Size == 64 && | 
|---|
| 4408 | Add0C->getZExtValue() % 32 == 0) { | 
|---|
| 4409 | // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32). | 
|---|
| 4410 | // This is mainly beneficial if we already compute (x+n*32). | 
|---|
| 4411 | if (Add1.getOpcode() == ISD::TRUNCATE) { | 
|---|
| 4412 | Add1 = Add1.getOperand(i: 0); | 
|---|
| 4413 | SubVT = Add1.getValueType(); | 
|---|
| 4414 | } | 
|---|
| 4415 | if (Add0.getValueType() != SubVT) { | 
|---|
| 4416 | Add0 = CurDAG->getZExtOrTrunc(Op: Add0, DL, VT: SubVT); | 
|---|
| 4417 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: Add0); | 
|---|
| 4418 | } | 
|---|
| 4419 |  | 
|---|
| 4420 | X = CurDAG->getNode(Opcode: ISD::ADD, DL, VT: SubVT, N1: Add1, N2: Add0); | 
|---|
| 4421 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: X); | 
|---|
| 4422 | } else | 
|---|
| 4423 | return false; | 
|---|
| 4424 | // Insert a negate op. | 
|---|
| 4425 | // TODO: This isn't guaranteed to replace the sub if there is a logic cone | 
|---|
| 4426 | // that uses it that's not a shift. | 
|---|
| 4427 | SDValue Zero = CurDAG->getConstant(Val: 0, DL, VT: SubVT); | 
|---|
| 4428 | SDValue Neg = CurDAG->getNode(Opcode: ISD::SUB, DL, VT: SubVT, N1: Zero, N2: X); | 
|---|
| 4429 | NewShiftAmt = Neg; | 
|---|
| 4430 |  | 
|---|
| 4431 | // Insert these operands into a valid topological order so they can | 
|---|
| 4432 | // get selected independently. | 
|---|
| 4433 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: Zero); | 
|---|
| 4434 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: Neg); | 
|---|
| 4435 | } else | 
|---|
| 4436 | return false; | 
|---|
| 4437 | } else | 
|---|
| 4438 | return false; | 
|---|
| 4439 |  | 
|---|
| 4440 | if (NewShiftAmt.getValueType() != MVT::i8) { | 
|---|
| 4441 | // Need to truncate the shift amount. | 
|---|
| 4442 | NewShiftAmt = CurDAG->getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i8, Operand: NewShiftAmt); | 
|---|
| 4443 | // Add to a correct topological ordering. | 
|---|
| 4444 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: NewShiftAmt); | 
|---|
| 4445 | } | 
|---|
| 4446 |  | 
|---|
| 4447 | // Insert a new mask to keep the shift amount legal. This should be removed | 
|---|
| 4448 | // by isel patterns. | 
|---|
| 4449 | NewShiftAmt = CurDAG->getNode(Opcode: ISD::AND, DL, VT: MVT::i8, N1: NewShiftAmt, | 
|---|
| 4450 | N2: CurDAG->getConstant(Val: Size - 1, DL, VT: MVT::i8)); | 
|---|
| 4451 | // Place in a correct topological ordering. | 
|---|
| 4452 | insertDAGNode(DAG&: *CurDAG, Pos: OrigShiftAmt, N: NewShiftAmt); | 
|---|
| 4453 |  | 
|---|
| 4454 | SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, Op1: N->getOperand(Num: 0), | 
|---|
| 4455 | Op2: NewShiftAmt); | 
|---|
| 4456 | if (UpdatedNode != N) { | 
|---|
| 4457 | // If we found an existing node, we should replace ourselves with that node | 
|---|
| 4458 | // and wait for it to be selected after its other users. | 
|---|
| 4459 | ReplaceNode(F: N, T: UpdatedNode); | 
|---|
| 4460 | return true; | 
|---|
| 4461 | } | 
|---|
| 4462 |  | 
|---|
| 4463 | // If the original shift amount is now dead, delete it so that we don't run | 
|---|
| 4464 | // it through isel. | 
|---|
| 4465 | if (OrigShiftAmt.getNode()->use_empty()) | 
|---|
| 4466 | CurDAG->RemoveDeadNode(N: OrigShiftAmt.getNode()); | 
|---|
| 4467 |  | 
|---|
| 4468 | // Now that we've optimized the shift amount, defer to normal isel to get | 
|---|
| 4469 | // load folding and legacy vs BMI2 selection without repeating it here. | 
|---|
| 4470 | SelectCode(N); | 
|---|
| 4471 | return true; | 
|---|
| 4472 | } | 
|---|
| 4473 |  | 
|---|
| 4474 | bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { | 
|---|
| 4475 | MVT NVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 4476 | unsigned Opcode = N->getOpcode(); | 
|---|
| 4477 | SDLoc dl(N); | 
|---|
| 4478 |  | 
|---|
| 4479 | // For operations of the form (x << C1) op C2, check if we can use a smaller | 
|---|
| 4480 | // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. | 
|---|
| 4481 | SDValue Shift = N->getOperand(Num: 0); | 
|---|
| 4482 | SDValue N1 = N->getOperand(Num: 1); | 
|---|
| 4483 |  | 
|---|
| 4484 | auto *Cst = dyn_cast<ConstantSDNode>(Val&: N1); | 
|---|
| 4485 | if (!Cst) | 
|---|
| 4486 | return false; | 
|---|
| 4487 |  | 
|---|
| 4488 | int64_t Val = Cst->getSExtValue(); | 
|---|
| 4489 |  | 
|---|
| 4490 | // If we have an any_extend feeding the AND, look through it to see if there | 
|---|
| 4491 | // is a shift behind it. But only if the AND doesn't use the extended bits. | 
|---|
| 4492 | // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? | 
|---|
| 4493 | bool FoundAnyExtend = false; | 
|---|
| 4494 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && | 
|---|
| 4495 | Shift.getOperand(i: 0).getSimpleValueType() == MVT::i32 && | 
|---|
| 4496 | isUInt<32>(x: Val)) { | 
|---|
| 4497 | FoundAnyExtend = true; | 
|---|
| 4498 | Shift = Shift.getOperand(i: 0); | 
|---|
| 4499 | } | 
|---|
| 4500 |  | 
|---|
| 4501 | if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) | 
|---|
| 4502 | return false; | 
|---|
| 4503 |  | 
|---|
| 4504 | // i8 is unshrinkable, i16 should be promoted to i32. | 
|---|
| 4505 | if (NVT != MVT::i32 && NVT != MVT::i64) | 
|---|
| 4506 | return false; | 
|---|
| 4507 |  | 
|---|
| 4508 | auto *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1)); | 
|---|
| 4509 | if (!ShlCst) | 
|---|
| 4510 | return false; | 
|---|
| 4511 |  | 
|---|
| 4512 | uint64_t ShAmt = ShlCst->getZExtValue(); | 
|---|
| 4513 |  | 
|---|
| 4514 | // Make sure that we don't change the operation by removing bits. | 
|---|
| 4515 | // This only matters for OR and XOR, AND is unaffected. | 
|---|
| 4516 | uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; | 
|---|
| 4517 | if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) | 
|---|
| 4518 | return false; | 
|---|
| 4519 |  | 
|---|
| 4520 | // Check the minimum bitwidth for the new constant. | 
|---|
| 4521 | // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. | 
|---|
| 4522 | auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { | 
|---|
| 4523 | if (Opcode == ISD::AND) { | 
|---|
| 4524 | // AND32ri is the same as AND64ri32 with zext imm. | 
|---|
| 4525 | // Try this before sign extended immediates below. | 
|---|
| 4526 | ShiftedVal = (uint64_t)Val >> ShAmt; | 
|---|
| 4527 | if (NVT == MVT::i64 && !isUInt<32>(x: Val) && isUInt<32>(x: ShiftedVal)) | 
|---|
| 4528 | return true; | 
|---|
| 4529 | // Also swap order when the AND can become MOVZX. | 
|---|
| 4530 | if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX) | 
|---|
| 4531 | return true; | 
|---|
| 4532 | } | 
|---|
| 4533 | ShiftedVal = Val >> ShAmt; | 
|---|
| 4534 | if ((!isInt<8>(x: Val) && isInt<8>(x: ShiftedVal)) || | 
|---|
| 4535 | (!isInt<32>(x: Val) && isInt<32>(x: ShiftedVal))) | 
|---|
| 4536 | return true; | 
|---|
| 4537 | if (Opcode != ISD::AND) { | 
|---|
| 4538 | // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr | 
|---|
| 4539 | ShiftedVal = (uint64_t)Val >> ShAmt; | 
|---|
| 4540 | if (NVT == MVT::i64 && !isUInt<32>(x: Val) && isUInt<32>(x: ShiftedVal)) | 
|---|
| 4541 | return true; | 
|---|
| 4542 | } | 
|---|
| 4543 | return false; | 
|---|
| 4544 | }; | 
|---|
| 4545 |  | 
|---|
| 4546 | int64_t ShiftedVal; | 
|---|
| 4547 | if (!CanShrinkImmediate(ShiftedVal)) | 
|---|
| 4548 | return false; | 
|---|
| 4549 |  | 
|---|
| 4550 | // Ok, we can reorder to get a smaller immediate. | 
|---|
| 4551 |  | 
|---|
| 4552 | // But, its possible the original immediate allowed an AND to become MOVZX. | 
|---|
| 4553 | // Doing this late due to avoid the MakedValueIsZero call as late as | 
|---|
| 4554 | // possible. | 
|---|
| 4555 | if (Opcode == ISD::AND) { | 
|---|
| 4556 | // Find the smallest zext this could possibly be. | 
|---|
| 4557 | unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits(); | 
|---|
| 4558 | ZExtWidth = llvm::bit_ceil(Value: std::max(a: ZExtWidth, b: 8U)); | 
|---|
| 4559 |  | 
|---|
| 4560 | // Figure out which bits need to be zero to achieve that mask. | 
|---|
| 4561 | APInt NeededMask = APInt::getLowBitsSet(numBits: NVT.getSizeInBits(), | 
|---|
| 4562 | loBitsSet: ZExtWidth); | 
|---|
| 4563 | NeededMask &= ~Cst->getAPIntValue(); | 
|---|
| 4564 |  | 
|---|
| 4565 | if (CurDAG->MaskedValueIsZero(Op: N->getOperand(Num: 0), Mask: NeededMask)) | 
|---|
| 4566 | return false; | 
|---|
| 4567 | } | 
|---|
| 4568 |  | 
|---|
| 4569 | SDValue X = Shift.getOperand(i: 0); | 
|---|
| 4570 | if (FoundAnyExtend) { | 
|---|
| 4571 | SDValue NewX = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: NVT, Operand: X); | 
|---|
| 4572 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(N, 0), N: NewX); | 
|---|
| 4573 | X = NewX; | 
|---|
| 4574 | } | 
|---|
| 4575 |  | 
|---|
| 4576 | SDValue NewCst = CurDAG->getSignedConstant(Val: ShiftedVal, DL: dl, VT: NVT); | 
|---|
| 4577 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(N, 0), N: NewCst); | 
|---|
| 4578 | SDValue NewBinOp = CurDAG->getNode(Opcode, DL: dl, VT: NVT, N1: X, N2: NewCst); | 
|---|
| 4579 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(N, 0), N: NewBinOp); | 
|---|
| 4580 | SDValue NewSHL = CurDAG->getNode(Opcode: ISD::SHL, DL: dl, VT: NVT, N1: NewBinOp, | 
|---|
| 4581 | N2: Shift.getOperand(i: 1)); | 
|---|
| 4582 | ReplaceNode(F: N, T: NewSHL.getNode()); | 
|---|
| 4583 | SelectCode(N: NewSHL.getNode()); | 
|---|
| 4584 | return true; | 
|---|
| 4585 | } | 
|---|
| 4586 |  | 
|---|
| 4587 | bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA, | 
|---|
| 4588 | SDNode *ParentB, SDNode *ParentC, | 
|---|
| 4589 | SDValue A, SDValue B, SDValue C, | 
|---|
| 4590 | uint8_t Imm) { | 
|---|
| 4591 | assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) && | 
|---|
| 4592 | C.isOperandOf(ParentC) && "Incorrect parent node"); | 
|---|
| 4593 |  | 
|---|
| 4594 | auto tryFoldLoadOrBCast = | 
|---|
| 4595 | [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale, | 
|---|
| 4596 | SDValue &Index, SDValue &Disp, SDValue &Segment) { | 
|---|
| 4597 | if (tryFoldLoad(Root, P, N: L, Base, Scale, Index, Disp, Segment)) | 
|---|
| 4598 | return true; | 
|---|
| 4599 |  | 
|---|
| 4600 | // Not a load, check for broadcast which may be behind a bitcast. | 
|---|
| 4601 | if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { | 
|---|
| 4602 | P = L.getNode(); | 
|---|
| 4603 | L = L.getOperand(i: 0); | 
|---|
| 4604 | } | 
|---|
| 4605 |  | 
|---|
| 4606 | if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) | 
|---|
| 4607 | return false; | 
|---|
| 4608 |  | 
|---|
| 4609 | // Only 32 and 64 bit broadcasts are supported. | 
|---|
| 4610 | auto *MemIntr = cast<MemIntrinsicSDNode>(Val&: L); | 
|---|
| 4611 | unsigned Size = MemIntr->getMemoryVT().getSizeInBits(); | 
|---|
| 4612 | if (Size != 32 && Size != 64) | 
|---|
| 4613 | return false; | 
|---|
| 4614 |  | 
|---|
| 4615 | return tryFoldBroadcast(Root, P, N: L, Base, Scale, Index, Disp, Segment); | 
|---|
| 4616 | }; | 
|---|
| 4617 |  | 
|---|
| 4618 | bool FoldedLoad = false; | 
|---|
| 4619 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 4620 | if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | 
|---|
| 4621 | FoldedLoad = true; | 
|---|
| 4622 | } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3, | 
|---|
| 4623 | Tmp4)) { | 
|---|
| 4624 | FoldedLoad = true; | 
|---|
| 4625 | std::swap(a&: A, b&: C); | 
|---|
| 4626 | // Swap bits 1/4 and 3/6. | 
|---|
| 4627 | uint8_t OldImm = Imm; | 
|---|
| 4628 | Imm = OldImm & 0xa5; | 
|---|
| 4629 | if (OldImm & 0x02) Imm |= 0x10; | 
|---|
| 4630 | if (OldImm & 0x10) Imm |= 0x02; | 
|---|
| 4631 | if (OldImm & 0x08) Imm |= 0x40; | 
|---|
| 4632 | if (OldImm & 0x40) Imm |= 0x08; | 
|---|
| 4633 | } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3, | 
|---|
| 4634 | Tmp4)) { | 
|---|
| 4635 | FoldedLoad = true; | 
|---|
| 4636 | std::swap(a&: B, b&: C); | 
|---|
| 4637 | // Swap bits 1/2 and 5/6. | 
|---|
| 4638 | uint8_t OldImm = Imm; | 
|---|
| 4639 | Imm = OldImm & 0x99; | 
|---|
| 4640 | if (OldImm & 0x02) Imm |= 0x04; | 
|---|
| 4641 | if (OldImm & 0x04) Imm |= 0x02; | 
|---|
| 4642 | if (OldImm & 0x20) Imm |= 0x40; | 
|---|
| 4643 | if (OldImm & 0x40) Imm |= 0x20; | 
|---|
| 4644 | } | 
|---|
| 4645 |  | 
|---|
| 4646 | SDLoc DL(Root); | 
|---|
| 4647 |  | 
|---|
| 4648 | SDValue TImm = CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i8); | 
|---|
| 4649 |  | 
|---|
| 4650 | MVT NVT = Root->getSimpleValueType(ResNo: 0); | 
|---|
| 4651 |  | 
|---|
| 4652 | MachineSDNode *MNode; | 
|---|
| 4653 | if (FoldedLoad) { | 
|---|
| 4654 | SDVTList VTs = CurDAG->getVTList(VT1: NVT, VT2: MVT::Other); | 
|---|
| 4655 |  | 
|---|
| 4656 | unsigned Opc; | 
|---|
| 4657 | if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) { | 
|---|
| 4658 | auto *MemIntr = cast<MemIntrinsicSDNode>(Val&: C); | 
|---|
| 4659 | unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits(); | 
|---|
| 4660 | assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!"); | 
|---|
| 4661 |  | 
|---|
| 4662 | bool UseD = EltSize == 32; | 
|---|
| 4663 | if (NVT.is128BitVector()) | 
|---|
| 4664 | Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi; | 
|---|
| 4665 | else if (NVT.is256BitVector()) | 
|---|
| 4666 | Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi; | 
|---|
| 4667 | else if (NVT.is512BitVector()) | 
|---|
| 4668 | Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi; | 
|---|
| 4669 | else | 
|---|
| 4670 | llvm_unreachable( "Unexpected vector size!"); | 
|---|
| 4671 | } else { | 
|---|
| 4672 | bool UseD = NVT.getVectorElementType() == MVT::i32; | 
|---|
| 4673 | if (NVT.is128BitVector()) | 
|---|
| 4674 | Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi; | 
|---|
| 4675 | else if (NVT.is256BitVector()) | 
|---|
| 4676 | Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi; | 
|---|
| 4677 | else if (NVT.is512BitVector()) | 
|---|
| 4678 | Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi; | 
|---|
| 4679 | else | 
|---|
| 4680 | llvm_unreachable( "Unexpected vector size!"); | 
|---|
| 4681 | } | 
|---|
| 4682 |  | 
|---|
| 4683 | SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(i: 0)}; | 
|---|
| 4684 | MNode = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VTs, Ops); | 
|---|
| 4685 |  | 
|---|
| 4686 | // Update the chain. | 
|---|
| 4687 | ReplaceUses(F: C.getValue(R: 1), T: SDValue(MNode, 1)); | 
|---|
| 4688 | // Record the mem-refs | 
|---|
| 4689 | CurDAG->setNodeMemRefs(N: MNode, NewMemRefs: {cast<MemSDNode>(Val&: C)->getMemOperand()}); | 
|---|
| 4690 | } else { | 
|---|
| 4691 | bool UseD = NVT.getVectorElementType() == MVT::i32; | 
|---|
| 4692 | unsigned Opc; | 
|---|
| 4693 | if (NVT.is128BitVector()) | 
|---|
| 4694 | Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri; | 
|---|
| 4695 | else if (NVT.is256BitVector()) | 
|---|
| 4696 | Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri; | 
|---|
| 4697 | else if (NVT.is512BitVector()) | 
|---|
| 4698 | Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri; | 
|---|
| 4699 | else | 
|---|
| 4700 | llvm_unreachable( "Unexpected vector size!"); | 
|---|
| 4701 |  | 
|---|
| 4702 | MNode = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: NVT, Ops: {A, B, C, TImm}); | 
|---|
| 4703 | } | 
|---|
| 4704 |  | 
|---|
| 4705 | ReplaceUses(F: SDValue(Root, 0), T: SDValue(MNode, 0)); | 
|---|
| 4706 | CurDAG->RemoveDeadNode(N: Root); | 
|---|
| 4707 | return true; | 
|---|
| 4708 | } | 
|---|
| 4709 |  | 
|---|
| 4710 | // Try to match two logic ops to a VPTERNLOG. | 
|---|
| 4711 | // FIXME: Handle more complex patterns that use an operand more than once? | 
|---|
| 4712 | bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { | 
|---|
| 4713 | MVT NVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 4714 |  | 
|---|
| 4715 | // Make sure we support VPTERNLOG. | 
|---|
| 4716 | if (!NVT.isVector() || !Subtarget->hasAVX512() || | 
|---|
| 4717 | NVT.getVectorElementType() == MVT::i1) | 
|---|
| 4718 | return false; | 
|---|
| 4719 |  | 
|---|
| 4720 | // We need VLX for 128/256-bit. | 
|---|
| 4721 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) | 
|---|
| 4722 | return false; | 
|---|
| 4723 |  | 
|---|
| 4724 | SDValue N0 = N->getOperand(Num: 0); | 
|---|
| 4725 | SDValue N1 = N->getOperand(Num: 1); | 
|---|
| 4726 |  | 
|---|
| 4727 | auto getFoldableLogicOp = [](SDValue Op) { | 
|---|
| 4728 | // Peek through single use bitcast. | 
|---|
| 4729 | if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) | 
|---|
| 4730 | Op = Op.getOperand(i: 0); | 
|---|
| 4731 |  | 
|---|
| 4732 | if (!Op.hasOneUse()) | 
|---|
| 4733 | return SDValue(); | 
|---|
| 4734 |  | 
|---|
| 4735 | unsigned Opc = Op.getOpcode(); | 
|---|
| 4736 | if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR || | 
|---|
| 4737 | Opc == X86ISD::ANDNP) | 
|---|
| 4738 | return Op; | 
|---|
| 4739 |  | 
|---|
| 4740 | return SDValue(); | 
|---|
| 4741 | }; | 
|---|
| 4742 |  | 
|---|
| 4743 | SDValue A, FoldableOp; | 
|---|
| 4744 | if ((FoldableOp = getFoldableLogicOp(N1))) { | 
|---|
| 4745 | A = N0; | 
|---|
| 4746 | } else if ((FoldableOp = getFoldableLogicOp(N0))) { | 
|---|
| 4747 | A = N1; | 
|---|
| 4748 | } else | 
|---|
| 4749 | return false; | 
|---|
| 4750 |  | 
|---|
| 4751 | SDValue B = FoldableOp.getOperand(i: 0); | 
|---|
| 4752 | SDValue C = FoldableOp.getOperand(i: 1); | 
|---|
| 4753 | SDNode *ParentA = N; | 
|---|
| 4754 | SDNode *ParentB = FoldableOp.getNode(); | 
|---|
| 4755 | SDNode *ParentC = FoldableOp.getNode(); | 
|---|
| 4756 |  | 
|---|
| 4757 | // We can build the appropriate control immediate by performing the logic | 
|---|
| 4758 | // operation we're matching using these constants for A, B, and C. | 
|---|
| 4759 | uint8_t TernlogMagicA = 0xf0; | 
|---|
| 4760 | uint8_t TernlogMagicB = 0xcc; | 
|---|
| 4761 | uint8_t TernlogMagicC = 0xaa; | 
|---|
| 4762 |  | 
|---|
| 4763 | // Some of the inputs may be inverted, peek through them and invert the | 
|---|
| 4764 | // magic values accordingly. | 
|---|
| 4765 | // TODO: There may be a bitcast before the xor that we should peek through. | 
|---|
| 4766 | auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) { | 
|---|
| 4767 | if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() && | 
|---|
| 4768 | ISD::isBuildVectorAllOnes(N: Op.getOperand(i: 1).getNode())) { | 
|---|
| 4769 | Magic = ~Magic; | 
|---|
| 4770 | Parent = Op.getNode(); | 
|---|
| 4771 | Op = Op.getOperand(i: 0); | 
|---|
| 4772 | } | 
|---|
| 4773 | }; | 
|---|
| 4774 |  | 
|---|
| 4775 | PeekThroughNot(A, ParentA, TernlogMagicA); | 
|---|
| 4776 | PeekThroughNot(B, ParentB, TernlogMagicB); | 
|---|
| 4777 | PeekThroughNot(C, ParentC, TernlogMagicC); | 
|---|
| 4778 |  | 
|---|
| 4779 | uint8_t Imm; | 
|---|
| 4780 | switch (FoldableOp.getOpcode()) { | 
|---|
| 4781 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 4782 | case ISD::AND:      Imm = TernlogMagicB & TernlogMagicC; break; | 
|---|
| 4783 | case ISD::OR:       Imm = TernlogMagicB | TernlogMagicC; break; | 
|---|
| 4784 | case ISD::XOR:      Imm = TernlogMagicB ^ TernlogMagicC; break; | 
|---|
| 4785 | case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break; | 
|---|
| 4786 | } | 
|---|
| 4787 |  | 
|---|
| 4788 | switch (N->getOpcode()) { | 
|---|
| 4789 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 4790 | case X86ISD::ANDNP: | 
|---|
| 4791 | if (A == N0) | 
|---|
| 4792 | Imm &= ~TernlogMagicA; | 
|---|
| 4793 | else | 
|---|
| 4794 | Imm = ~(Imm) & TernlogMagicA; | 
|---|
| 4795 | break; | 
|---|
| 4796 | case ISD::AND: Imm &= TernlogMagicA; break; | 
|---|
| 4797 | case ISD::OR:  Imm |= TernlogMagicA; break; | 
|---|
| 4798 | case ISD::XOR: Imm ^= TernlogMagicA; break; | 
|---|
| 4799 | } | 
|---|
| 4800 |  | 
|---|
| 4801 | return matchVPTERNLOG(Root: N, ParentA, ParentB, ParentC, A, B, C, Imm); | 
|---|
| 4802 | } | 
|---|
| 4803 |  | 
|---|
| 4804 | /// If the high bits of an 'and' operand are known zero, try setting the | 
|---|
| 4805 | /// high bits of an 'and' constant operand to produce a smaller encoding by | 
|---|
| 4806 | /// creating a small, sign-extended negative immediate rather than a large | 
|---|
| 4807 | /// positive one. This reverses a transform in SimplifyDemandedBits that | 
|---|
| 4808 | /// shrinks mask constants by clearing bits. There is also a possibility that | 
|---|
| 4809 | /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that | 
|---|
| 4810 | /// case, just replace the 'and'. Return 'true' if the node is replaced. | 
|---|
| 4811 | bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { | 
|---|
| 4812 | // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't | 
|---|
| 4813 | // have immediate operands. | 
|---|
| 4814 | MVT VT = And->getSimpleValueType(ResNo: 0); | 
|---|
| 4815 | if (VT != MVT::i32 && VT != MVT::i64) | 
|---|
| 4816 | return false; | 
|---|
| 4817 |  | 
|---|
| 4818 | auto *And1C = dyn_cast<ConstantSDNode>(Val: And->getOperand(Num: 1)); | 
|---|
| 4819 | if (!And1C) | 
|---|
| 4820 | return false; | 
|---|
| 4821 |  | 
|---|
| 4822 | // Bail out if the mask constant is already negative. It's can't shrink more. | 
|---|
| 4823 | // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel | 
|---|
| 4824 | // patterns to use a 32-bit and instead of a 64-bit and by relying on the | 
|---|
| 4825 | // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits | 
|---|
| 4826 | // are negative too. | 
|---|
| 4827 | APInt MaskVal = And1C->getAPIntValue(); | 
|---|
| 4828 | unsigned MaskLZ = MaskVal.countl_zero(); | 
|---|
| 4829 | if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32)) | 
|---|
| 4830 | return false; | 
|---|
| 4831 |  | 
|---|
| 4832 | // Don't extend into the upper 32 bits of a 64 bit mask. | 
|---|
| 4833 | if (VT == MVT::i64 && MaskLZ >= 32) { | 
|---|
| 4834 | MaskLZ -= 32; | 
|---|
| 4835 | MaskVal = MaskVal.trunc(width: 32); | 
|---|
| 4836 | } | 
|---|
| 4837 |  | 
|---|
| 4838 | SDValue And0 = And->getOperand(Num: 0); | 
|---|
| 4839 | APInt HighZeros = APInt::getHighBitsSet(numBits: MaskVal.getBitWidth(), hiBitsSet: MaskLZ); | 
|---|
| 4840 | APInt NegMaskVal = MaskVal | HighZeros; | 
|---|
| 4841 |  | 
|---|
| 4842 | // If a negative constant would not allow a smaller encoding, there's no need | 
|---|
| 4843 | // to continue. Only change the constant when we know it's a win. | 
|---|
| 4844 | unsigned MinWidth = NegMaskVal.getSignificantBits(); | 
|---|
| 4845 | if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32)) | 
|---|
| 4846 | return false; | 
|---|
| 4847 |  | 
|---|
| 4848 | // Extend masks if we truncated above. | 
|---|
| 4849 | if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) { | 
|---|
| 4850 | NegMaskVal = NegMaskVal.zext(width: 64); | 
|---|
| 4851 | HighZeros = HighZeros.zext(width: 64); | 
|---|
| 4852 | } | 
|---|
| 4853 |  | 
|---|
| 4854 | // The variable operand must be all zeros in the top bits to allow using the | 
|---|
| 4855 | // new, negative constant as the mask. | 
|---|
| 4856 | // TODO: Handle constant folding? | 
|---|
| 4857 | KnownBits Known0 = CurDAG->computeKnownBits(Op: And0); | 
|---|
| 4858 | if (Known0.isConstant() || !HighZeros.isSubsetOf(RHS: Known0.Zero)) | 
|---|
| 4859 | return false; | 
|---|
| 4860 |  | 
|---|
| 4861 | // Check if the mask is -1. In that case, this is an unnecessary instruction | 
|---|
| 4862 | // that escaped earlier analysis. | 
|---|
| 4863 | if (NegMaskVal.isAllOnes()) { | 
|---|
| 4864 | ReplaceNode(F: And, T: And0.getNode()); | 
|---|
| 4865 | return true; | 
|---|
| 4866 | } | 
|---|
| 4867 |  | 
|---|
| 4868 | // A negative mask allows a smaller encoding. Create a new 'and' node. | 
|---|
| 4869 | SDValue NewMask = CurDAG->getConstant(Val: NegMaskVal, DL: SDLoc(And), VT); | 
|---|
| 4870 | insertDAGNode(DAG&: *CurDAG, Pos: SDValue(And, 0), N: NewMask); | 
|---|
| 4871 | SDValue NewAnd = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc(And), VT, N1: And0, N2: NewMask); | 
|---|
| 4872 | ReplaceNode(F: And, T: NewAnd.getNode()); | 
|---|
| 4873 | SelectCode(N: NewAnd.getNode()); | 
|---|
| 4874 | return true; | 
|---|
| 4875 | } | 
|---|
| 4876 |  | 
|---|
| 4877 | static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, | 
|---|
| 4878 | bool FoldedBCast, bool Masked) { | 
|---|
| 4879 | #define VPTESTM_CASE(VT, SUFFIX) \ | 
|---|
| 4880 | case MVT::VT: \ | 
|---|
| 4881 | if (Masked) \ | 
|---|
| 4882 | return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \ | 
|---|
| 4883 | return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX; | 
|---|
| 4884 |  | 
|---|
| 4885 |  | 
|---|
| 4886 | #define VPTESTM_BROADCAST_CASES(SUFFIX) \ | 
|---|
| 4887 | default: llvm_unreachable("Unexpected VT!"); \ | 
|---|
| 4888 | VPTESTM_CASE(v4i32, DZ128##SUFFIX) \ | 
|---|
| 4889 | VPTESTM_CASE(v2i64, QZ128##SUFFIX) \ | 
|---|
| 4890 | VPTESTM_CASE(v8i32, DZ256##SUFFIX) \ | 
|---|
| 4891 | VPTESTM_CASE(v4i64, QZ256##SUFFIX) \ | 
|---|
| 4892 | VPTESTM_CASE(v16i32, DZ##SUFFIX) \ | 
|---|
| 4893 | VPTESTM_CASE(v8i64, QZ##SUFFIX) | 
|---|
| 4894 |  | 
|---|
| 4895 | #define VPTESTM_FULL_CASES(SUFFIX) \ | 
|---|
| 4896 | VPTESTM_BROADCAST_CASES(SUFFIX) \ | 
|---|
| 4897 | VPTESTM_CASE(v16i8, BZ128##SUFFIX) \ | 
|---|
| 4898 | VPTESTM_CASE(v8i16, WZ128##SUFFIX) \ | 
|---|
| 4899 | VPTESTM_CASE(v32i8, BZ256##SUFFIX) \ | 
|---|
| 4900 | VPTESTM_CASE(v16i16, WZ256##SUFFIX) \ | 
|---|
| 4901 | VPTESTM_CASE(v64i8, BZ##SUFFIX) \ | 
|---|
| 4902 | VPTESTM_CASE(v32i16, WZ##SUFFIX) | 
|---|
| 4903 |  | 
|---|
| 4904 | if (FoldedBCast) { | 
|---|
| 4905 | switch (TestVT.SimpleTy) { | 
|---|
| 4906 | VPTESTM_BROADCAST_CASES(rmb) | 
|---|
| 4907 | } | 
|---|
| 4908 | } | 
|---|
| 4909 |  | 
|---|
| 4910 | if (FoldedLoad) { | 
|---|
| 4911 | switch (TestVT.SimpleTy) { | 
|---|
| 4912 | VPTESTM_FULL_CASES(rm) | 
|---|
| 4913 | } | 
|---|
| 4914 | } | 
|---|
| 4915 |  | 
|---|
| 4916 | switch (TestVT.SimpleTy) { | 
|---|
| 4917 | VPTESTM_FULL_CASES(rr) | 
|---|
| 4918 | } | 
|---|
| 4919 |  | 
|---|
| 4920 | #undef VPTESTM_FULL_CASES | 
|---|
| 4921 | #undef VPTESTM_BROADCAST_CASES | 
|---|
| 4922 | #undef VPTESTM_CASE | 
|---|
| 4923 | } | 
|---|
| 4924 |  | 
|---|
| 4925 | // Try to create VPTESTM instruction. If InMask is not null, it will be used | 
|---|
| 4926 | // to form a masked operation. | 
|---|
| 4927 | bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, | 
|---|
| 4928 | SDValue InMask) { | 
|---|
| 4929 | assert(Subtarget->hasAVX512() && "Expected AVX512!"); | 
|---|
| 4930 | assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && | 
|---|
| 4931 | "Unexpected VT!"); | 
|---|
| 4932 |  | 
|---|
| 4933 | // Look for equal and not equal compares. | 
|---|
| 4934 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Setcc.getOperand(i: 2))->get(); | 
|---|
| 4935 | if (CC != ISD::SETEQ && CC != ISD::SETNE) | 
|---|
| 4936 | return false; | 
|---|
| 4937 |  | 
|---|
| 4938 | SDValue SetccOp0 = Setcc.getOperand(i: 0); | 
|---|
| 4939 | SDValue SetccOp1 = Setcc.getOperand(i: 1); | 
|---|
| 4940 |  | 
|---|
| 4941 | // Canonicalize the all zero vector to the RHS. | 
|---|
| 4942 | if (ISD::isBuildVectorAllZeros(N: SetccOp0.getNode())) | 
|---|
| 4943 | std::swap(a&: SetccOp0, b&: SetccOp1); | 
|---|
| 4944 |  | 
|---|
| 4945 | // See if we're comparing against zero. | 
|---|
| 4946 | if (!ISD::isBuildVectorAllZeros(N: SetccOp1.getNode())) | 
|---|
| 4947 | return false; | 
|---|
| 4948 |  | 
|---|
| 4949 | SDValue N0 = SetccOp0; | 
|---|
| 4950 |  | 
|---|
| 4951 | MVT CmpVT = N0.getSimpleValueType(); | 
|---|
| 4952 | MVT CmpSVT = CmpVT.getVectorElementType(); | 
|---|
| 4953 |  | 
|---|
| 4954 | // Start with both operands the same. We'll try to refine this. | 
|---|
| 4955 | SDValue Src0 = N0; | 
|---|
| 4956 | SDValue Src1 = N0; | 
|---|
| 4957 |  | 
|---|
| 4958 | { | 
|---|
| 4959 | // Look through single use bitcasts. | 
|---|
| 4960 | SDValue N0Temp = N0; | 
|---|
| 4961 | if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse()) | 
|---|
| 4962 | N0Temp = N0.getOperand(i: 0); | 
|---|
| 4963 |  | 
|---|
| 4964 | // Look for single use AND. | 
|---|
| 4965 | if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) { | 
|---|
| 4966 | Src0 = N0Temp.getOperand(i: 0); | 
|---|
| 4967 | Src1 = N0Temp.getOperand(i: 1); | 
|---|
| 4968 | } | 
|---|
| 4969 | } | 
|---|
| 4970 |  | 
|---|
| 4971 | // Without VLX we need to widen the operation. | 
|---|
| 4972 | bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); | 
|---|
| 4973 |  | 
|---|
| 4974 | auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L, | 
|---|
| 4975 | SDValue &Base, SDValue &Scale, SDValue &Index, | 
|---|
| 4976 | SDValue &Disp, SDValue &Segment) { | 
|---|
| 4977 | // If we need to widen, we can't fold the load. | 
|---|
| 4978 | if (!Widen) | 
|---|
| 4979 | if (tryFoldLoad(Root, P, N: L, Base, Scale, Index, Disp, Segment)) | 
|---|
| 4980 | return true; | 
|---|
| 4981 |  | 
|---|
| 4982 | // If we didn't fold a load, try to match broadcast. No widening limitation | 
|---|
| 4983 | // for this. But only 32 and 64 bit types are supported. | 
|---|
| 4984 | if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64) | 
|---|
| 4985 | return false; | 
|---|
| 4986 |  | 
|---|
| 4987 | // Look through single use bitcasts. | 
|---|
| 4988 | if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { | 
|---|
| 4989 | P = L.getNode(); | 
|---|
| 4990 | L = L.getOperand(i: 0); | 
|---|
| 4991 | } | 
|---|
| 4992 |  | 
|---|
| 4993 | if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) | 
|---|
| 4994 | return false; | 
|---|
| 4995 |  | 
|---|
| 4996 | auto *MemIntr = cast<MemIntrinsicSDNode>(Val&: L); | 
|---|
| 4997 | if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits()) | 
|---|
| 4998 | return false; | 
|---|
| 4999 |  | 
|---|
| 5000 | return tryFoldBroadcast(Root, P, N: L, Base, Scale, Index, Disp, Segment); | 
|---|
| 5001 | }; | 
|---|
| 5002 |  | 
|---|
| 5003 | // We can only fold loads if the sources are unique. | 
|---|
| 5004 | bool CanFoldLoads = Src0 != Src1; | 
|---|
| 5005 |  | 
|---|
| 5006 | bool FoldedLoad = false; | 
|---|
| 5007 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 5008 | if (CanFoldLoads) { | 
|---|
| 5009 | FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2, | 
|---|
| 5010 | Tmp3, Tmp4); | 
|---|
| 5011 | if (!FoldedLoad) { | 
|---|
| 5012 | // And is commutative. | 
|---|
| 5013 | FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1, | 
|---|
| 5014 | Tmp2, Tmp3, Tmp4); | 
|---|
| 5015 | if (FoldedLoad) | 
|---|
| 5016 | std::swap(a&: Src0, b&: Src1); | 
|---|
| 5017 | } | 
|---|
| 5018 | } | 
|---|
| 5019 |  | 
|---|
| 5020 | bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD; | 
|---|
| 5021 |  | 
|---|
| 5022 | bool IsMasked = InMask.getNode() != nullptr; | 
|---|
| 5023 |  | 
|---|
| 5024 | SDLoc dl(Root); | 
|---|
| 5025 |  | 
|---|
| 5026 | MVT ResVT = Setcc.getSimpleValueType(); | 
|---|
| 5027 | MVT MaskVT = ResVT; | 
|---|
| 5028 | if (Widen) { | 
|---|
| 5029 | // Widen the inputs using insert_subreg or copy_to_regclass. | 
|---|
| 5030 | unsigned Scale = CmpVT.is128BitVector() ? 4 : 2; | 
|---|
| 5031 | unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm; | 
|---|
| 5032 | unsigned NumElts = CmpVT.getVectorNumElements() * Scale; | 
|---|
| 5033 | CmpVT = MVT::getVectorVT(VT: CmpSVT, NumElements: NumElts); | 
|---|
| 5034 | MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts); | 
|---|
| 5035 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(Opcode: X86::IMPLICIT_DEF, dl, | 
|---|
| 5036 | VT: CmpVT), 0); | 
|---|
| 5037 | Src0 = CurDAG->getTargetInsertSubreg(SRIdx: SubReg, DL: dl, VT: CmpVT, Operand: ImplDef, Subreg: Src0); | 
|---|
| 5038 |  | 
|---|
| 5039 | if (!FoldedBCast) | 
|---|
| 5040 | Src1 = CurDAG->getTargetInsertSubreg(SRIdx: SubReg, DL: dl, VT: CmpVT, Operand: ImplDef, Subreg: Src1); | 
|---|
| 5041 |  | 
|---|
| 5042 | if (IsMasked) { | 
|---|
| 5043 | // Widen the mask. | 
|---|
| 5044 | unsigned RegClass = TLI->getRegClassFor(VT: MaskVT)->getID(); | 
|---|
| 5045 | SDValue RC = CurDAG->getTargetConstant(Val: RegClass, DL: dl, VT: MVT::i32); | 
|---|
| 5046 | InMask = SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, | 
|---|
| 5047 | dl, VT: MaskVT, Op1: InMask, Op2: RC), 0); | 
|---|
| 5048 | } | 
|---|
| 5049 | } | 
|---|
| 5050 |  | 
|---|
| 5051 | bool IsTestN = CC == ISD::SETEQ; | 
|---|
| 5052 | unsigned Opc = getVPTESTMOpc(TestVT: CmpVT, IsTestN, FoldedLoad, FoldedBCast, | 
|---|
| 5053 | Masked: IsMasked); | 
|---|
| 5054 |  | 
|---|
| 5055 | MachineSDNode *CNode; | 
|---|
| 5056 | if (FoldedLoad) { | 
|---|
| 5057 | SDVTList VTs = CurDAG->getVTList(VT1: MaskVT, VT2: MVT::Other); | 
|---|
| 5058 |  | 
|---|
| 5059 | if (IsMasked) { | 
|---|
| 5060 | SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, | 
|---|
| 5061 | Src1.getOperand(i: 0) }; | 
|---|
| 5062 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VTs, Ops); | 
|---|
| 5063 | } else { | 
|---|
| 5064 | SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, | 
|---|
| 5065 | Src1.getOperand(i: 0) }; | 
|---|
| 5066 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VTs, Ops); | 
|---|
| 5067 | } | 
|---|
| 5068 |  | 
|---|
| 5069 | // Update the chain. | 
|---|
| 5070 | ReplaceUses(F: Src1.getValue(R: 1), T: SDValue(CNode, 1)); | 
|---|
| 5071 | // Record the mem-refs | 
|---|
| 5072 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<MemSDNode>(Val&: Src1)->getMemOperand()}); | 
|---|
| 5073 | } else { | 
|---|
| 5074 | if (IsMasked) | 
|---|
| 5075 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MaskVT, Op1: InMask, Op2: Src0, Op3: Src1); | 
|---|
| 5076 | else | 
|---|
| 5077 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MaskVT, Op1: Src0, Op2: Src1); | 
|---|
| 5078 | } | 
|---|
| 5079 |  | 
|---|
| 5080 | // If we widened, we need to shrink the mask VT. | 
|---|
| 5081 | if (Widen) { | 
|---|
| 5082 | unsigned RegClass = TLI->getRegClassFor(VT: ResVT)->getID(); | 
|---|
| 5083 | SDValue RC = CurDAG->getTargetConstant(Val: RegClass, DL: dl, VT: MVT::i32); | 
|---|
| 5084 | CNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, | 
|---|
| 5085 | dl, VT: ResVT, Op1: SDValue(CNode, 0), Op2: RC); | 
|---|
| 5086 | } | 
|---|
| 5087 |  | 
|---|
| 5088 | ReplaceUses(F: SDValue(Root, 0), T: SDValue(CNode, 0)); | 
|---|
| 5089 | CurDAG->RemoveDeadNode(N: Root); | 
|---|
| 5090 | return true; | 
|---|
| 5091 | } | 
|---|
| 5092 |  | 
|---|
| 5093 | // Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it | 
|---|
| 5094 | // into vpternlog. | 
|---|
| 5095 | bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { | 
|---|
| 5096 | assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); | 
|---|
| 5097 |  | 
|---|
| 5098 | MVT NVT = N->getSimpleValueType(ResNo: 0); | 
|---|
| 5099 |  | 
|---|
| 5100 | // Make sure we support VPTERNLOG. | 
|---|
| 5101 | if (!NVT.isVector() || !Subtarget->hasAVX512()) | 
|---|
| 5102 | return false; | 
|---|
| 5103 |  | 
|---|
| 5104 | // We need VLX for 128/256-bit. | 
|---|
| 5105 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) | 
|---|
| 5106 | return false; | 
|---|
| 5107 |  | 
|---|
| 5108 | SDValue N0 = N->getOperand(Num: 0); | 
|---|
| 5109 | SDValue N1 = N->getOperand(Num: 1); | 
|---|
| 5110 |  | 
|---|
| 5111 | // Canonicalize AND to LHS. | 
|---|
| 5112 | if (N1.getOpcode() == ISD::AND) | 
|---|
| 5113 | std::swap(a&: N0, b&: N1); | 
|---|
| 5114 |  | 
|---|
| 5115 | if (N0.getOpcode() != ISD::AND || | 
|---|
| 5116 | N1.getOpcode() != X86ISD::ANDNP || | 
|---|
| 5117 | !N0.hasOneUse() || !N1.hasOneUse()) | 
|---|
| 5118 | return false; | 
|---|
| 5119 |  | 
|---|
| 5120 | // ANDN is not commutable, use it to pick down A and C. | 
|---|
| 5121 | SDValue A = N1.getOperand(i: 0); | 
|---|
| 5122 | SDValue C = N1.getOperand(i: 1); | 
|---|
| 5123 |  | 
|---|
| 5124 | // AND is commutable, if one operand matches A, the other operand is B. | 
|---|
| 5125 | // Otherwise this isn't a match. | 
|---|
| 5126 | SDValue B; | 
|---|
| 5127 | if (N0.getOperand(i: 0) == A) | 
|---|
| 5128 | B = N0.getOperand(i: 1); | 
|---|
| 5129 | else if (N0.getOperand(i: 1) == A) | 
|---|
| 5130 | B = N0.getOperand(i: 0); | 
|---|
| 5131 | else | 
|---|
| 5132 | return false; | 
|---|
| 5133 |  | 
|---|
| 5134 | SDLoc dl(N); | 
|---|
| 5135 | SDValue Imm = CurDAG->getTargetConstant(Val: 0xCA, DL: dl, VT: MVT::i8); | 
|---|
| 5136 | SDValue Ternlog = CurDAG->getNode(Opcode: X86ISD::VPTERNLOG, DL: dl, VT: NVT, N1: A, N2: B, N3: C, N4: Imm); | 
|---|
| 5137 | ReplaceNode(F: N, T: Ternlog.getNode()); | 
|---|
| 5138 |  | 
|---|
| 5139 | return matchVPTERNLOG(Root: Ternlog.getNode(), ParentA: Ternlog.getNode(), ParentB: Ternlog.getNode(), | 
|---|
| 5140 | ParentC: Ternlog.getNode(), A, B, C, Imm: 0xCA); | 
|---|
| 5141 | } | 
|---|
| 5142 |  | 
|---|
| 5143 | void X86DAGToDAGISel::Select(SDNode *Node) { | 
|---|
| 5144 | MVT NVT = Node->getSimpleValueType(ResNo: 0); | 
|---|
| 5145 | unsigned Opcode = Node->getOpcode(); | 
|---|
| 5146 | SDLoc dl(Node); | 
|---|
| 5147 |  | 
|---|
| 5148 | if (Node->isMachineOpcode()) { | 
|---|
| 5149 | LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); | 
|---|
| 5150 | Node->setNodeId(-1); | 
|---|
| 5151 | return;   // Already selected. | 
|---|
| 5152 | } | 
|---|
| 5153 |  | 
|---|
| 5154 | switch (Opcode) { | 
|---|
| 5155 | default: break; | 
|---|
| 5156 | case ISD::INTRINSIC_W_CHAIN: { | 
|---|
| 5157 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); | 
|---|
| 5158 | switch (IntNo) { | 
|---|
| 5159 | default: break; | 
|---|
| 5160 | case Intrinsic::x86_encodekey128: | 
|---|
| 5161 | case Intrinsic::x86_encodekey256: { | 
|---|
| 5162 | if (!Subtarget->hasKL()) | 
|---|
| 5163 | break; | 
|---|
| 5164 |  | 
|---|
| 5165 | unsigned Opcode; | 
|---|
| 5166 | switch (IntNo) { | 
|---|
| 5167 | default: llvm_unreachable( "Impossible intrinsic"); | 
|---|
| 5168 | case Intrinsic::x86_encodekey128: | 
|---|
| 5169 | Opcode = X86::ENCODEKEY128; | 
|---|
| 5170 | break; | 
|---|
| 5171 | case Intrinsic::x86_encodekey256: | 
|---|
| 5172 | Opcode = X86::ENCODEKEY256; | 
|---|
| 5173 | break; | 
|---|
| 5174 | } | 
|---|
| 5175 |  | 
|---|
| 5176 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 5177 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM0, N: Node->getOperand(Num: 3), | 
|---|
| 5178 | Glue: SDValue()); | 
|---|
| 5179 | if (Opcode == X86::ENCODEKEY256) | 
|---|
| 5180 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM1, N: Node->getOperand(Num: 4), | 
|---|
| 5181 | Glue: Chain.getValue(R: 1)); | 
|---|
| 5182 |  | 
|---|
| 5183 | MachineSDNode *Res = CurDAG->getMachineNode( | 
|---|
| 5184 | Opcode, dl, VTs: Node->getVTList(), | 
|---|
| 5185 | Ops: {Node->getOperand(Num: 2), Chain, Chain.getValue(R: 1)}); | 
|---|
| 5186 | ReplaceNode(F: Node, T: Res); | 
|---|
| 5187 | return; | 
|---|
| 5188 | } | 
|---|
| 5189 | case Intrinsic::x86_tileloaddrs64_internal: | 
|---|
| 5190 | case Intrinsic::x86_tileloaddrst164_internal: | 
|---|
| 5191 | if (!Subtarget->hasAMXMOVRS()) | 
|---|
| 5192 | break; | 
|---|
| 5193 | [[fallthrough]]; | 
|---|
| 5194 | case Intrinsic::x86_tileloadd64_internal: | 
|---|
| 5195 | case Intrinsic::x86_tileloaddt164_internal: { | 
|---|
| 5196 | if (!Subtarget->hasAMXTILE()) | 
|---|
| 5197 | break; | 
|---|
| 5198 | auto *MFI = | 
|---|
| 5199 | CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | 
|---|
| 5200 | MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); | 
|---|
| 5201 | unsigned Opc; | 
|---|
| 5202 | switch (IntNo) { | 
|---|
| 5203 | default: | 
|---|
| 5204 | llvm_unreachable( "Unexpected intrinsic!"); | 
|---|
| 5205 | case Intrinsic::x86_tileloaddrs64_internal: | 
|---|
| 5206 | Opc = X86::PTILELOADDRSV; | 
|---|
| 5207 | break; | 
|---|
| 5208 | case Intrinsic::x86_tileloaddrst164_internal: | 
|---|
| 5209 | Opc = X86::PTILELOADDRST1V; | 
|---|
| 5210 | break; | 
|---|
| 5211 | case Intrinsic::x86_tileloadd64_internal: | 
|---|
| 5212 | Opc = X86::PTILELOADDV; | 
|---|
| 5213 | break; | 
|---|
| 5214 | case Intrinsic::x86_tileloaddt164_internal: | 
|---|
| 5215 | Opc = X86::PTILELOADDT1V; | 
|---|
| 5216 | break; | 
|---|
| 5217 | } | 
|---|
| 5218 | // _tile_loadd_internal(row, col, buf, STRIDE) | 
|---|
| 5219 | SDValue Base = Node->getOperand(Num: 4); | 
|---|
| 5220 | SDValue Scale = getI8Imm(Imm: 1, DL: dl); | 
|---|
| 5221 | SDValue Index = Node->getOperand(Num: 5); | 
|---|
| 5222 | SDValue Disp = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32); | 
|---|
| 5223 | SDValue Segment = CurDAG->getRegister(Reg: 0, VT: MVT::i16); | 
|---|
| 5224 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 5225 | MachineSDNode *CNode; | 
|---|
| 5226 | SDValue Ops[] = {Node->getOperand(Num: 2), | 
|---|
| 5227 | Node->getOperand(Num: 3), | 
|---|
| 5228 | Base, | 
|---|
| 5229 | Scale, | 
|---|
| 5230 | Index, | 
|---|
| 5231 | Disp, | 
|---|
| 5232 | Segment, | 
|---|
| 5233 | Chain}; | 
|---|
| 5234 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: {MVT::x86amx, MVT::Other}, Ops); | 
|---|
| 5235 | ReplaceNode(F: Node, T: CNode); | 
|---|
| 5236 | return; | 
|---|
| 5237 | } | 
|---|
| 5238 | } | 
|---|
| 5239 | break; | 
|---|
| 5240 | } | 
|---|
| 5241 | case ISD::INTRINSIC_VOID: { | 
|---|
| 5242 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); | 
|---|
| 5243 | switch (IntNo) { | 
|---|
| 5244 | default: break; | 
|---|
| 5245 | case Intrinsic::x86_sse3_monitor: | 
|---|
| 5246 | case Intrinsic::x86_monitorx: | 
|---|
| 5247 | case Intrinsic::x86_clzero: { | 
|---|
| 5248 | bool Use64BitPtr = Node->getOperand(Num: 2).getValueType() == MVT::i64; | 
|---|
| 5249 |  | 
|---|
| 5250 | unsigned Opc = 0; | 
|---|
| 5251 | switch (IntNo) { | 
|---|
| 5252 | default: llvm_unreachable( "Unexpected intrinsic!"); | 
|---|
| 5253 | case Intrinsic::x86_sse3_monitor: | 
|---|
| 5254 | if (!Subtarget->hasSSE3()) | 
|---|
| 5255 | break; | 
|---|
| 5256 | Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; | 
|---|
| 5257 | break; | 
|---|
| 5258 | case Intrinsic::x86_monitorx: | 
|---|
| 5259 | if (!Subtarget->hasMWAITX()) | 
|---|
| 5260 | break; | 
|---|
| 5261 | Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; | 
|---|
| 5262 | break; | 
|---|
| 5263 | case Intrinsic::x86_clzero: | 
|---|
| 5264 | if (!Subtarget->hasCLZERO()) | 
|---|
| 5265 | break; | 
|---|
| 5266 | Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; | 
|---|
| 5267 | break; | 
|---|
| 5268 | } | 
|---|
| 5269 |  | 
|---|
| 5270 | if (Opc) { | 
|---|
| 5271 | unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; | 
|---|
| 5272 | SDValue Chain = CurDAG->getCopyToReg(Chain: Node->getOperand(Num: 0), dl, Reg: PtrReg, | 
|---|
| 5273 | N: Node->getOperand(Num: 2), Glue: SDValue()); | 
|---|
| 5274 | SDValue InGlue = Chain.getValue(R: 1); | 
|---|
| 5275 |  | 
|---|
| 5276 | if (IntNo == Intrinsic::x86_sse3_monitor || | 
|---|
| 5277 | IntNo == Intrinsic::x86_monitorx) { | 
|---|
| 5278 | // Copy the other two operands to ECX and EDX. | 
|---|
| 5279 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::ECX, N: Node->getOperand(Num: 3), | 
|---|
| 5280 | Glue: InGlue); | 
|---|
| 5281 | InGlue = Chain.getValue(R: 1); | 
|---|
| 5282 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::EDX, N: Node->getOperand(Num: 4), | 
|---|
| 5283 | Glue: InGlue); | 
|---|
| 5284 | InGlue = Chain.getValue(R: 1); | 
|---|
| 5285 | } | 
|---|
| 5286 |  | 
|---|
| 5287 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, | 
|---|
| 5288 | Ops: { Chain, InGlue}); | 
|---|
| 5289 | ReplaceNode(F: Node, T: CNode); | 
|---|
| 5290 | return; | 
|---|
| 5291 | } | 
|---|
| 5292 |  | 
|---|
| 5293 | break; | 
|---|
| 5294 | } | 
|---|
| 5295 | case Intrinsic::x86_tilestored64_internal: { | 
|---|
| 5296 | auto *MFI = | 
|---|
| 5297 | CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | 
|---|
| 5298 | MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); | 
|---|
| 5299 | unsigned Opc = X86::PTILESTOREDV; | 
|---|
| 5300 | // _tile_stored_internal(row, col, buf, STRIDE, c) | 
|---|
| 5301 | SDValue Base = Node->getOperand(Num: 4); | 
|---|
| 5302 | SDValue Scale = getI8Imm(Imm: 1, DL: dl); | 
|---|
| 5303 | SDValue Index = Node->getOperand(Num: 5); | 
|---|
| 5304 | SDValue Disp = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32); | 
|---|
| 5305 | SDValue Segment = CurDAG->getRegister(Reg: 0, VT: MVT::i16); | 
|---|
| 5306 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 5307 | MachineSDNode *CNode; | 
|---|
| 5308 | SDValue Ops[] = {Node->getOperand(Num: 2), | 
|---|
| 5309 | Node->getOperand(Num: 3), | 
|---|
| 5310 | Base, | 
|---|
| 5311 | Scale, | 
|---|
| 5312 | Index, | 
|---|
| 5313 | Disp, | 
|---|
| 5314 | Segment, | 
|---|
| 5315 | Node->getOperand(Num: 6), | 
|---|
| 5316 | Chain}; | 
|---|
| 5317 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops); | 
|---|
| 5318 | ReplaceNode(F: Node, T: CNode); | 
|---|
| 5319 | return; | 
|---|
| 5320 | } | 
|---|
| 5321 | case Intrinsic::x86_tileloaddrs64: | 
|---|
| 5322 | case Intrinsic::x86_tileloaddrst164: | 
|---|
| 5323 | if (!Subtarget->hasAMXMOVRS()) | 
|---|
| 5324 | break; | 
|---|
| 5325 | [[fallthrough]]; | 
|---|
| 5326 | case Intrinsic::x86_tileloadd64: | 
|---|
| 5327 | case Intrinsic::x86_tileloaddt164: | 
|---|
| 5328 | case Intrinsic::x86_tilestored64: { | 
|---|
| 5329 | if (!Subtarget->hasAMXTILE()) | 
|---|
| 5330 | break; | 
|---|
| 5331 | auto *MFI = | 
|---|
| 5332 | CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | 
|---|
| 5333 | MFI->setAMXProgModel(AMXProgModelEnum::DirectReg); | 
|---|
| 5334 | unsigned Opc; | 
|---|
| 5335 | switch (IntNo) { | 
|---|
| 5336 | default: llvm_unreachable( "Unexpected intrinsic!"); | 
|---|
| 5337 | case Intrinsic::x86_tileloadd64:   Opc = X86::PTILELOADD; break; | 
|---|
| 5338 | case Intrinsic::x86_tileloaddrs64: | 
|---|
| 5339 | Opc = X86::PTILELOADDRS; | 
|---|
| 5340 | break; | 
|---|
| 5341 | case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break; | 
|---|
| 5342 | case Intrinsic::x86_tileloaddrst164: | 
|---|
| 5343 | Opc = X86::PTILELOADDRST1; | 
|---|
| 5344 | break; | 
|---|
| 5345 | case Intrinsic::x86_tilestored64:  Opc = X86::PTILESTORED; break; | 
|---|
| 5346 | } | 
|---|
| 5347 | // FIXME: Match displacement and scale. | 
|---|
| 5348 | unsigned TIndex = Node->getConstantOperandVal(Num: 2); | 
|---|
| 5349 | SDValue TReg = getI8Imm(Imm: TIndex, DL: dl); | 
|---|
| 5350 | SDValue Base = Node->getOperand(Num: 3); | 
|---|
| 5351 | SDValue Scale = getI8Imm(Imm: 1, DL: dl); | 
|---|
| 5352 | SDValue Index = Node->getOperand(Num: 4); | 
|---|
| 5353 | SDValue Disp = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32); | 
|---|
| 5354 | SDValue Segment = CurDAG->getRegister(Reg: 0, VT: MVT::i16); | 
|---|
| 5355 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 5356 | MachineSDNode *CNode; | 
|---|
| 5357 | if (Opc == X86::PTILESTORED) { | 
|---|
| 5358 | SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain }; | 
|---|
| 5359 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops); | 
|---|
| 5360 | } else { | 
|---|
| 5361 | SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain }; | 
|---|
| 5362 | CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops); | 
|---|
| 5363 | } | 
|---|
| 5364 | ReplaceNode(F: Node, T: CNode); | 
|---|
| 5365 | return; | 
|---|
| 5366 | } | 
|---|
| 5367 | case Intrinsic::x86_t2rpntlvwz0rs: | 
|---|
| 5368 | case Intrinsic::x86_t2rpntlvwz0rst1: | 
|---|
| 5369 | case Intrinsic::x86_t2rpntlvwz1rs: | 
|---|
| 5370 | case Intrinsic::x86_t2rpntlvwz1rst1: | 
|---|
| 5371 | if (!Subtarget->hasAMXMOVRS()) | 
|---|
| 5372 | break; | 
|---|
| 5373 | [[fallthrough]]; | 
|---|
| 5374 | case Intrinsic::x86_t2rpntlvwz0: | 
|---|
| 5375 | case Intrinsic::x86_t2rpntlvwz0t1: | 
|---|
| 5376 | case Intrinsic::x86_t2rpntlvwz1: | 
|---|
| 5377 | case Intrinsic::x86_t2rpntlvwz1t1: { | 
|---|
| 5378 | if (!Subtarget->hasAMXTRANSPOSE()) | 
|---|
| 5379 | break; | 
|---|
| 5380 | auto *MFI = | 
|---|
| 5381 | CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | 
|---|
| 5382 | MFI->setAMXProgModel(AMXProgModelEnum::DirectReg); | 
|---|
| 5383 | unsigned Opc; | 
|---|
| 5384 | switch (IntNo) { | 
|---|
| 5385 | default: | 
|---|
| 5386 | llvm_unreachable( "Unexpected intrinsic!"); | 
|---|
| 5387 | case Intrinsic::x86_t2rpntlvwz0: | 
|---|
| 5388 | Opc = X86::PT2RPNTLVWZ0; | 
|---|
| 5389 | break; | 
|---|
| 5390 | case Intrinsic::x86_t2rpntlvwz0t1: | 
|---|
| 5391 | Opc = X86::PT2RPNTLVWZ0T1; | 
|---|
| 5392 | break; | 
|---|
| 5393 | case Intrinsic::x86_t2rpntlvwz1: | 
|---|
| 5394 | Opc = X86::PT2RPNTLVWZ1; | 
|---|
| 5395 | break; | 
|---|
| 5396 | case Intrinsic::x86_t2rpntlvwz1t1: | 
|---|
| 5397 | Opc = X86::PT2RPNTLVWZ1T1; | 
|---|
| 5398 | break; | 
|---|
| 5399 | case Intrinsic::x86_t2rpntlvwz0rs: | 
|---|
| 5400 | Opc = X86::PT2RPNTLVWZ0RS; | 
|---|
| 5401 | break; | 
|---|
| 5402 | case Intrinsic::x86_t2rpntlvwz0rst1: | 
|---|
| 5403 | Opc = X86::PT2RPNTLVWZ0RST1; | 
|---|
| 5404 | break; | 
|---|
| 5405 | case Intrinsic::x86_t2rpntlvwz1rs: | 
|---|
| 5406 | Opc = X86::PT2RPNTLVWZ1RS; | 
|---|
| 5407 | break; | 
|---|
| 5408 | case Intrinsic::x86_t2rpntlvwz1rst1: | 
|---|
| 5409 | Opc = X86::PT2RPNTLVWZ1RST1; | 
|---|
| 5410 | break; | 
|---|
| 5411 | } | 
|---|
| 5412 | // FIXME: Match displacement and scale. | 
|---|
| 5413 | unsigned TIndex = Node->getConstantOperandVal(Num: 2); | 
|---|
| 5414 | SDValue TReg = getI8Imm(Imm: TIndex, DL: dl); | 
|---|
| 5415 | SDValue Base = Node->getOperand(Num: 3); | 
|---|
| 5416 | SDValue Scale = getI8Imm(Imm: 1, DL: dl); | 
|---|
| 5417 | SDValue Index = Node->getOperand(Num: 4); | 
|---|
| 5418 | SDValue Disp = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32); | 
|---|
| 5419 | SDValue Segment = CurDAG->getRegister(Reg: 0, VT: MVT::i16); | 
|---|
| 5420 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 5421 | SDValue Ops[] = {TReg, Base, Scale, Index, Disp, Segment, Chain}; | 
|---|
| 5422 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops); | 
|---|
| 5423 | ReplaceNode(F: Node, T: CNode); | 
|---|
| 5424 | return; | 
|---|
| 5425 | } | 
|---|
| 5426 | } | 
|---|
| 5427 | break; | 
|---|
| 5428 | } | 
|---|
| 5429 | case ISD::BRIND: | 
|---|
| 5430 | case X86ISD::NT_BRIND: { | 
|---|
| 5431 | if (Subtarget->isTargetNaCl()) | 
|---|
| 5432 | // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We | 
|---|
| 5433 | // leave the instruction alone. | 
|---|
| 5434 | break; | 
|---|
| 5435 | if (Subtarget->isTarget64BitILP32()) { | 
|---|
| 5436 | // Converts a 32-bit register to a 64-bit, zero-extended version of | 
|---|
| 5437 | // it. This is needed because x86-64 can do many things, but jmp %r32 | 
|---|
| 5438 | // ain't one of them. | 
|---|
| 5439 | SDValue Target = Node->getOperand(Num: 1); | 
|---|
| 5440 | assert(Target.getValueType() == MVT::i32 && "Unexpected VT!"); | 
|---|
| 5441 | SDValue ZextTarget = CurDAG->getZExtOrTrunc(Op: Target, DL: dl, VT: MVT::i64); | 
|---|
| 5442 | SDValue Brind = CurDAG->getNode(Opcode, DL: dl, VT: MVT::Other, | 
|---|
| 5443 | N1: Node->getOperand(Num: 0), N2: ZextTarget); | 
|---|
| 5444 | ReplaceNode(F: Node, T: Brind.getNode()); | 
|---|
| 5445 | SelectCode(N: ZextTarget.getNode()); | 
|---|
| 5446 | SelectCode(N: Brind.getNode()); | 
|---|
| 5447 | return; | 
|---|
| 5448 | } | 
|---|
| 5449 | break; | 
|---|
| 5450 | } | 
|---|
| 5451 | case X86ISD::GlobalBaseReg: | 
|---|
| 5452 | ReplaceNode(F: Node, T: getGlobalBaseReg()); | 
|---|
| 5453 | return; | 
|---|
| 5454 |  | 
|---|
| 5455 | case ISD::BITCAST: | 
|---|
| 5456 | // Just drop all 128/256/512-bit bitcasts. | 
|---|
| 5457 | if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() || | 
|---|
| 5458 | NVT == MVT::f128) { | 
|---|
| 5459 | ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0)); | 
|---|
| 5460 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 5461 | return; | 
|---|
| 5462 | } | 
|---|
| 5463 | break; | 
|---|
| 5464 |  | 
|---|
| 5465 | case ISD::SRL: | 
|---|
| 5466 | if (matchBitExtract(Node)) | 
|---|
| 5467 | return; | 
|---|
| 5468 | [[fallthrough]]; | 
|---|
| 5469 | case ISD::SRA: | 
|---|
| 5470 | case ISD::SHL: | 
|---|
| 5471 | if (tryShiftAmountMod(N: Node)) | 
|---|
| 5472 | return; | 
|---|
| 5473 | break; | 
|---|
| 5474 |  | 
|---|
| 5475 | case X86ISD::VPTERNLOG: { | 
|---|
| 5476 | uint8_t Imm = Node->getConstantOperandVal(Num: 3); | 
|---|
| 5477 | if (matchVPTERNLOG(Root: Node, ParentA: Node, ParentB: Node, ParentC: Node, A: Node->getOperand(Num: 0), | 
|---|
| 5478 | B: Node->getOperand(Num: 1), C: Node->getOperand(Num: 2), Imm)) | 
|---|
| 5479 | return; | 
|---|
| 5480 | break; | 
|---|
| 5481 | } | 
|---|
| 5482 |  | 
|---|
| 5483 | case X86ISD::ANDNP: | 
|---|
| 5484 | if (tryVPTERNLOG(N: Node)) | 
|---|
| 5485 | return; | 
|---|
| 5486 | break; | 
|---|
| 5487 |  | 
|---|
| 5488 | case ISD::AND: | 
|---|
| 5489 | if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { | 
|---|
| 5490 | // Try to form a masked VPTESTM. Operands can be in either order. | 
|---|
| 5491 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 5492 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 5493 | if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() && | 
|---|
| 5494 | tryVPTESTM(Root: Node, Setcc: N0, InMask: N1)) | 
|---|
| 5495 | return; | 
|---|
| 5496 | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && | 
|---|
| 5497 | tryVPTESTM(Root: Node, Setcc: N1, InMask: N0)) | 
|---|
| 5498 | return; | 
|---|
| 5499 | } | 
|---|
| 5500 |  | 
|---|
| 5501 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) { | 
|---|
| 5502 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(NewNode, 0)); | 
|---|
| 5503 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 5504 | return; | 
|---|
| 5505 | } | 
|---|
| 5506 | if (matchBitExtract(Node)) | 
|---|
| 5507 | return; | 
|---|
| 5508 | if (AndImmShrink && shrinkAndImmediate(And: Node)) | 
|---|
| 5509 | return; | 
|---|
| 5510 |  | 
|---|
| 5511 | [[fallthrough]]; | 
|---|
| 5512 | case ISD::OR: | 
|---|
| 5513 | case ISD::XOR: | 
|---|
| 5514 | if (tryShrinkShlLogicImm(N: Node)) | 
|---|
| 5515 | return; | 
|---|
| 5516 | if (Opcode == ISD::OR && tryMatchBitSelect(N: Node)) | 
|---|
| 5517 | return; | 
|---|
| 5518 | if (tryVPTERNLOG(N: Node)) | 
|---|
| 5519 | return; | 
|---|
| 5520 |  | 
|---|
| 5521 | [[fallthrough]]; | 
|---|
| 5522 | case ISD::ADD: | 
|---|
| 5523 | if (Opcode == ISD::ADD && matchBitExtract(Node)) | 
|---|
| 5524 | return; | 
|---|
| 5525 | [[fallthrough]]; | 
|---|
| 5526 | case ISD::SUB: { | 
|---|
| 5527 | // Try to avoid folding immediates with multiple uses for optsize. | 
|---|
| 5528 | // This code tries to select to register form directly to avoid going | 
|---|
| 5529 | // through the isel table which might fold the immediate. We can't change | 
|---|
| 5530 | // the patterns on the add/sub/and/or/xor with immediate paterns in the | 
|---|
| 5531 | // tablegen files to check immediate use count without making the patterns | 
|---|
| 5532 | // unavailable to the fast-isel table. | 
|---|
| 5533 | if (!CurDAG->shouldOptForSize()) | 
|---|
| 5534 | break; | 
|---|
| 5535 |  | 
|---|
| 5536 | // Only handle i8/i16/i32/i64. | 
|---|
| 5537 | if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) | 
|---|
| 5538 | break; | 
|---|
| 5539 |  | 
|---|
| 5540 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 5541 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 5542 |  | 
|---|
| 5543 | auto *Cst = dyn_cast<ConstantSDNode>(Val&: N1); | 
|---|
| 5544 | if (!Cst) | 
|---|
| 5545 | break; | 
|---|
| 5546 |  | 
|---|
| 5547 | int64_t Val = Cst->getSExtValue(); | 
|---|
| 5548 |  | 
|---|
| 5549 | // Make sure its an immediate that is considered foldable. | 
|---|
| 5550 | // FIXME: Handle unsigned 32 bit immediates for 64-bit AND. | 
|---|
| 5551 | if (!isInt<8>(x: Val) && !isInt<32>(x: Val)) | 
|---|
| 5552 | break; | 
|---|
| 5553 |  | 
|---|
| 5554 | // If this can match to INC/DEC, let it go. | 
|---|
| 5555 | if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) | 
|---|
| 5556 | break; | 
|---|
| 5557 |  | 
|---|
| 5558 | // Check if we should avoid folding this immediate. | 
|---|
| 5559 | if (!shouldAvoidImmediateInstFormsForSize(N: N1.getNode())) | 
|---|
| 5560 | break; | 
|---|
| 5561 |  | 
|---|
| 5562 | // We should not fold the immediate. So we need a register form instead. | 
|---|
| 5563 | unsigned ROpc, MOpc; | 
|---|
| 5564 | switch (NVT.SimpleTy) { | 
|---|
| 5565 | default: llvm_unreachable( "Unexpected VT!"); | 
|---|
| 5566 | case MVT::i8: | 
|---|
| 5567 | switch (Opcode) { | 
|---|
| 5568 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 5569 | case ISD::ADD: | 
|---|
| 5570 | ROpc = GET_ND_IF_ENABLED(X86::ADD8rr); | 
|---|
| 5571 | MOpc = GET_ND_IF_ENABLED(X86::ADD8rm); | 
|---|
| 5572 | break; | 
|---|
| 5573 | case ISD::SUB: | 
|---|
| 5574 | ROpc = GET_ND_IF_ENABLED(X86::SUB8rr); | 
|---|
| 5575 | MOpc = GET_ND_IF_ENABLED(X86::SUB8rm); | 
|---|
| 5576 | break; | 
|---|
| 5577 | case ISD::AND: | 
|---|
| 5578 | ROpc = GET_ND_IF_ENABLED(X86::AND8rr); | 
|---|
| 5579 | MOpc = GET_ND_IF_ENABLED(X86::AND8rm); | 
|---|
| 5580 | break; | 
|---|
| 5581 | case ISD::OR: | 
|---|
| 5582 | ROpc = GET_ND_IF_ENABLED(X86::OR8rr); | 
|---|
| 5583 | MOpc = GET_ND_IF_ENABLED(X86::OR8rm); | 
|---|
| 5584 | break; | 
|---|
| 5585 | case ISD::XOR: | 
|---|
| 5586 | ROpc = GET_ND_IF_ENABLED(X86::XOR8rr); | 
|---|
| 5587 | MOpc = GET_ND_IF_ENABLED(X86::XOR8rm); | 
|---|
| 5588 | break; | 
|---|
| 5589 | } | 
|---|
| 5590 | break; | 
|---|
| 5591 | case MVT::i16: | 
|---|
| 5592 | switch (Opcode) { | 
|---|
| 5593 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 5594 | case ISD::ADD: | 
|---|
| 5595 | ROpc = GET_ND_IF_ENABLED(X86::ADD16rr); | 
|---|
| 5596 | MOpc = GET_ND_IF_ENABLED(X86::ADD16rm); | 
|---|
| 5597 | break; | 
|---|
| 5598 | case ISD::SUB: | 
|---|
| 5599 | ROpc = GET_ND_IF_ENABLED(X86::SUB16rr); | 
|---|
| 5600 | MOpc = GET_ND_IF_ENABLED(X86::SUB16rm); | 
|---|
| 5601 | break; | 
|---|
| 5602 | case ISD::AND: | 
|---|
| 5603 | ROpc = GET_ND_IF_ENABLED(X86::AND16rr); | 
|---|
| 5604 | MOpc = GET_ND_IF_ENABLED(X86::AND16rm); | 
|---|
| 5605 | break; | 
|---|
| 5606 | case ISD::OR: | 
|---|
| 5607 | ROpc = GET_ND_IF_ENABLED(X86::OR16rr); | 
|---|
| 5608 | MOpc = GET_ND_IF_ENABLED(X86::OR16rm); | 
|---|
| 5609 | break; | 
|---|
| 5610 | case ISD::XOR: | 
|---|
| 5611 | ROpc = GET_ND_IF_ENABLED(X86::XOR16rr); | 
|---|
| 5612 | MOpc = GET_ND_IF_ENABLED(X86::XOR16rm); | 
|---|
| 5613 | break; | 
|---|
| 5614 | } | 
|---|
| 5615 | break; | 
|---|
| 5616 | case MVT::i32: | 
|---|
| 5617 | switch (Opcode) { | 
|---|
| 5618 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 5619 | case ISD::ADD: | 
|---|
| 5620 | ROpc = GET_ND_IF_ENABLED(X86::ADD32rr); | 
|---|
| 5621 | MOpc = GET_ND_IF_ENABLED(X86::ADD32rm); | 
|---|
| 5622 | break; | 
|---|
| 5623 | case ISD::SUB: | 
|---|
| 5624 | ROpc = GET_ND_IF_ENABLED(X86::SUB32rr); | 
|---|
| 5625 | MOpc = GET_ND_IF_ENABLED(X86::SUB32rm); | 
|---|
| 5626 | break; | 
|---|
| 5627 | case ISD::AND: | 
|---|
| 5628 | ROpc = GET_ND_IF_ENABLED(X86::AND32rr); | 
|---|
| 5629 | MOpc = GET_ND_IF_ENABLED(X86::AND32rm); | 
|---|
| 5630 | break; | 
|---|
| 5631 | case ISD::OR: | 
|---|
| 5632 | ROpc = GET_ND_IF_ENABLED(X86::OR32rr); | 
|---|
| 5633 | MOpc = GET_ND_IF_ENABLED(X86::OR32rm); | 
|---|
| 5634 | break; | 
|---|
| 5635 | case ISD::XOR: | 
|---|
| 5636 | ROpc = GET_ND_IF_ENABLED(X86::XOR32rr); | 
|---|
| 5637 | MOpc = GET_ND_IF_ENABLED(X86::XOR32rm); | 
|---|
| 5638 | break; | 
|---|
| 5639 | } | 
|---|
| 5640 | break; | 
|---|
| 5641 | case MVT::i64: | 
|---|
| 5642 | switch (Opcode) { | 
|---|
| 5643 | default: llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 5644 | case ISD::ADD: | 
|---|
| 5645 | ROpc = GET_ND_IF_ENABLED(X86::ADD64rr); | 
|---|
| 5646 | MOpc = GET_ND_IF_ENABLED(X86::ADD64rm); | 
|---|
| 5647 | break; | 
|---|
| 5648 | case ISD::SUB: | 
|---|
| 5649 | ROpc = GET_ND_IF_ENABLED(X86::SUB64rr); | 
|---|
| 5650 | MOpc = GET_ND_IF_ENABLED(X86::SUB64rm); | 
|---|
| 5651 | break; | 
|---|
| 5652 | case ISD::AND: | 
|---|
| 5653 | ROpc = GET_ND_IF_ENABLED(X86::AND64rr); | 
|---|
| 5654 | MOpc = GET_ND_IF_ENABLED(X86::AND64rm); | 
|---|
| 5655 | break; | 
|---|
| 5656 | case ISD::OR: | 
|---|
| 5657 | ROpc = GET_ND_IF_ENABLED(X86::OR64rr); | 
|---|
| 5658 | MOpc = GET_ND_IF_ENABLED(X86::OR64rm); | 
|---|
| 5659 | break; | 
|---|
| 5660 | case ISD::XOR: | 
|---|
| 5661 | ROpc = GET_ND_IF_ENABLED(X86::XOR64rr); | 
|---|
| 5662 | MOpc = GET_ND_IF_ENABLED(X86::XOR64rm); | 
|---|
| 5663 | break; | 
|---|
| 5664 | } | 
|---|
| 5665 | break; | 
|---|
| 5666 | } | 
|---|
| 5667 |  | 
|---|
| 5668 | // Ok this is a AND/OR/XOR/ADD/SUB with constant. | 
|---|
| 5669 |  | 
|---|
| 5670 | // If this is a not a subtract, we can still try to fold a load. | 
|---|
| 5671 | if (Opcode != ISD::SUB) { | 
|---|
| 5672 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 5673 | if (tryFoldLoad(P: Node, N: N0, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4)) { | 
|---|
| 5674 | SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(i: 0) }; | 
|---|
| 5675 | SDVTList VTs = CurDAG->getVTList(VT1: NVT, VT2: MVT::i32, VT3: MVT::Other); | 
|---|
| 5676 | MachineSDNode *CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 5677 | // Update the chain. | 
|---|
| 5678 | ReplaceUses(F: N0.getValue(R: 1), T: SDValue(CNode, 2)); | 
|---|
| 5679 | // Record the mem-refs | 
|---|
| 5680 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<LoadSDNode>(Val&: N0)->getMemOperand()}); | 
|---|
| 5681 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(CNode, 0)); | 
|---|
| 5682 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 5683 | return; | 
|---|
| 5684 | } | 
|---|
| 5685 | } | 
|---|
| 5686 |  | 
|---|
| 5687 | CurDAG->SelectNodeTo(N: Node, MachineOpc: ROpc, VT1: NVT, VT2: MVT::i32, Op1: N0, Op2: N1); | 
|---|
| 5688 | return; | 
|---|
| 5689 | } | 
|---|
| 5690 |  | 
|---|
| 5691 | case X86ISD::SMUL: | 
|---|
| 5692 | // i16/i32/i64 are handled with isel patterns. | 
|---|
| 5693 | if (NVT != MVT::i8) | 
|---|
| 5694 | break; | 
|---|
| 5695 | [[fallthrough]]; | 
|---|
| 5696 | case X86ISD::UMUL: { | 
|---|
| 5697 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 5698 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 5699 |  | 
|---|
| 5700 | unsigned LoReg, ROpc, MOpc; | 
|---|
| 5701 | switch (NVT.SimpleTy) { | 
|---|
| 5702 | default: llvm_unreachable( "Unsupported VT!"); | 
|---|
| 5703 | case MVT::i8: | 
|---|
| 5704 | LoReg = X86::AL; | 
|---|
| 5705 | ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r; | 
|---|
| 5706 | MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m; | 
|---|
| 5707 | break; | 
|---|
| 5708 | case MVT::i16: | 
|---|
| 5709 | LoReg = X86::AX; | 
|---|
| 5710 | ROpc = X86::MUL16r; | 
|---|
| 5711 | MOpc = X86::MUL16m; | 
|---|
| 5712 | break; | 
|---|
| 5713 | case MVT::i32: | 
|---|
| 5714 | LoReg = X86::EAX; | 
|---|
| 5715 | ROpc = X86::MUL32r; | 
|---|
| 5716 | MOpc = X86::MUL32m; | 
|---|
| 5717 | break; | 
|---|
| 5718 | case MVT::i64: | 
|---|
| 5719 | LoReg = X86::RAX; | 
|---|
| 5720 | ROpc = X86::MUL64r; | 
|---|
| 5721 | MOpc = X86::MUL64m; | 
|---|
| 5722 | break; | 
|---|
| 5723 | } | 
|---|
| 5724 |  | 
|---|
| 5725 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 5726 | bool FoldedLoad = tryFoldLoad(P: Node, N: N1, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4); | 
|---|
| 5727 | // Multiply is commutative. | 
|---|
| 5728 | if (!FoldedLoad) { | 
|---|
| 5729 | FoldedLoad = tryFoldLoad(P: Node, N: N0, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4); | 
|---|
| 5730 | if (FoldedLoad) | 
|---|
| 5731 | std::swap(a&: N0, b&: N1); | 
|---|
| 5732 | } | 
|---|
| 5733 |  | 
|---|
| 5734 | SDValue InGlue = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: LoReg, | 
|---|
| 5735 | N: N0, Glue: SDValue()).getValue(R: 1); | 
|---|
| 5736 |  | 
|---|
| 5737 | MachineSDNode *CNode; | 
|---|
| 5738 | if (FoldedLoad) { | 
|---|
| 5739 | // i16/i32/i64 use an instruction that produces a low and high result even | 
|---|
| 5740 | // though only the low result is used. | 
|---|
| 5741 | SDVTList VTs; | 
|---|
| 5742 | if (NVT == MVT::i8) | 
|---|
| 5743 | VTs = CurDAG->getVTList(VT1: NVT, VT2: MVT::i32, VT3: MVT::Other); | 
|---|
| 5744 | else | 
|---|
| 5745 | VTs = CurDAG->getVTList(VT1: NVT, VT2: NVT, VT3: MVT::i32, VT4: MVT::Other); | 
|---|
| 5746 |  | 
|---|
| 5747 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(i: 0), | 
|---|
| 5748 | InGlue }; | 
|---|
| 5749 | CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 5750 |  | 
|---|
| 5751 | // Update the chain. | 
|---|
| 5752 | ReplaceUses(F: N1.getValue(R: 1), T: SDValue(CNode, NVT == MVT::i8 ? 2 : 3)); | 
|---|
| 5753 | // Record the mem-refs | 
|---|
| 5754 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<LoadSDNode>(Val&: N1)->getMemOperand()}); | 
|---|
| 5755 | } else { | 
|---|
| 5756 | // i16/i32/i64 use an instruction that produces a low and high result even | 
|---|
| 5757 | // though only the low result is used. | 
|---|
| 5758 | SDVTList VTs; | 
|---|
| 5759 | if (NVT == MVT::i8) | 
|---|
| 5760 | VTs = CurDAG->getVTList(VT1: NVT, VT2: MVT::i32); | 
|---|
| 5761 | else | 
|---|
| 5762 | VTs = CurDAG->getVTList(VT1: NVT, VT2: NVT, VT3: MVT::i32); | 
|---|
| 5763 |  | 
|---|
| 5764 | CNode = CurDAG->getMachineNode(Opcode: ROpc, dl, VTs, Ops: {N1, InGlue}); | 
|---|
| 5765 | } | 
|---|
| 5766 |  | 
|---|
| 5767 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(CNode, 0)); | 
|---|
| 5768 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(CNode, NVT == MVT::i8 ? 1 : 2)); | 
|---|
| 5769 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 5770 | return; | 
|---|
| 5771 | } | 
|---|
| 5772 |  | 
|---|
| 5773 | case ISD::SMUL_LOHI: | 
|---|
| 5774 | case ISD::UMUL_LOHI: { | 
|---|
| 5775 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 5776 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 5777 |  | 
|---|
| 5778 | unsigned Opc, MOpc; | 
|---|
| 5779 | unsigned LoReg, HiReg; | 
|---|
| 5780 | bool IsSigned = Opcode == ISD::SMUL_LOHI; | 
|---|
| 5781 | bool UseMULX = !IsSigned && Subtarget->hasBMI2(); | 
|---|
| 5782 | bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty(); | 
|---|
| 5783 | switch (NVT.SimpleTy) { | 
|---|
| 5784 | default: llvm_unreachable( "Unsupported VT!"); | 
|---|
| 5785 | case MVT::i32: | 
|---|
| 5786 | Opc = UseMULXHi  ? X86::MULX32Hrr | 
|---|
| 5787 | : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX32rr) | 
|---|
| 5788 | : IsSigned ? X86::IMUL32r | 
|---|
| 5789 | : X86::MUL32r; | 
|---|
| 5790 | MOpc = UseMULXHi  ? X86::MULX32Hrm | 
|---|
| 5791 | : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX32rm) | 
|---|
| 5792 | : IsSigned ? X86::IMUL32m | 
|---|
| 5793 | : X86::MUL32m; | 
|---|
| 5794 | LoReg = UseMULX ? X86::EDX : X86::EAX; | 
|---|
| 5795 | HiReg = X86::EDX; | 
|---|
| 5796 | break; | 
|---|
| 5797 | case MVT::i64: | 
|---|
| 5798 | Opc = UseMULXHi  ? X86::MULX64Hrr | 
|---|
| 5799 | : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX64rr) | 
|---|
| 5800 | : IsSigned ? X86::IMUL64r | 
|---|
| 5801 | : X86::MUL64r; | 
|---|
| 5802 | MOpc = UseMULXHi  ? X86::MULX64Hrm | 
|---|
| 5803 | : UseMULX  ? GET_EGPR_IF_ENABLED(X86::MULX64rm) | 
|---|
| 5804 | : IsSigned ? X86::IMUL64m | 
|---|
| 5805 | : X86::MUL64m; | 
|---|
| 5806 | LoReg = UseMULX ? X86::RDX : X86::RAX; | 
|---|
| 5807 | HiReg = X86::RDX; | 
|---|
| 5808 | break; | 
|---|
| 5809 | } | 
|---|
| 5810 |  | 
|---|
| 5811 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 5812 | bool foldedLoad = tryFoldLoad(P: Node, N: N1, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4); | 
|---|
| 5813 | // Multiply is commutative. | 
|---|
| 5814 | if (!foldedLoad) { | 
|---|
| 5815 | foldedLoad = tryFoldLoad(P: Node, N: N0, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4); | 
|---|
| 5816 | if (foldedLoad) | 
|---|
| 5817 | std::swap(a&: N0, b&: N1); | 
|---|
| 5818 | } | 
|---|
| 5819 |  | 
|---|
| 5820 | SDValue InGlue = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: LoReg, | 
|---|
| 5821 | N: N0, Glue: SDValue()).getValue(R: 1); | 
|---|
| 5822 | SDValue ResHi, ResLo; | 
|---|
| 5823 | if (foldedLoad) { | 
|---|
| 5824 | SDValue Chain; | 
|---|
| 5825 | MachineSDNode *CNode = nullptr; | 
|---|
| 5826 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(i: 0), | 
|---|
| 5827 | InGlue }; | 
|---|
| 5828 | if (UseMULXHi) { | 
|---|
| 5829 | SDVTList VTs = CurDAG->getVTList(VT1: NVT, VT2: MVT::Other); | 
|---|
| 5830 | CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 5831 | ResHi = SDValue(CNode, 0); | 
|---|
| 5832 | Chain = SDValue(CNode, 1); | 
|---|
| 5833 | } else if (UseMULX) { | 
|---|
| 5834 | SDVTList VTs = CurDAG->getVTList(VT1: NVT, VT2: NVT, VT3: MVT::Other); | 
|---|
| 5835 | CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 5836 | ResHi = SDValue(CNode, 0); | 
|---|
| 5837 | ResLo = SDValue(CNode, 1); | 
|---|
| 5838 | Chain = SDValue(CNode, 2); | 
|---|
| 5839 | } else { | 
|---|
| 5840 | SDVTList VTs = CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue); | 
|---|
| 5841 | CNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VTs, Ops); | 
|---|
| 5842 | Chain = SDValue(CNode, 0); | 
|---|
| 5843 | InGlue = SDValue(CNode, 1); | 
|---|
| 5844 | } | 
|---|
| 5845 |  | 
|---|
| 5846 | // Update the chain. | 
|---|
| 5847 | ReplaceUses(F: N1.getValue(R: 1), T: Chain); | 
|---|
| 5848 | // Record the mem-refs | 
|---|
| 5849 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<LoadSDNode>(Val&: N1)->getMemOperand()}); | 
|---|
| 5850 | } else { | 
|---|
| 5851 | SDValue Ops[] = { N1, InGlue }; | 
|---|
| 5852 | if (UseMULXHi) { | 
|---|
| 5853 | SDVTList VTs = CurDAG->getVTList(VT: NVT); | 
|---|
| 5854 | SDNode *CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VTs, Ops); | 
|---|
| 5855 | ResHi = SDValue(CNode, 0); | 
|---|
| 5856 | } else if (UseMULX) { | 
|---|
| 5857 | SDVTList VTs = CurDAG->getVTList(VT1: NVT, VT2: NVT); | 
|---|
| 5858 | SDNode *CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VTs, Ops); | 
|---|
| 5859 | ResHi = SDValue(CNode, 0); | 
|---|
| 5860 | ResLo = SDValue(CNode, 1); | 
|---|
| 5861 | } else { | 
|---|
| 5862 | SDVTList VTs = CurDAG->getVTList(VT: MVT::Glue); | 
|---|
| 5863 | SDNode *CNode = CurDAG->getMachineNode(Opcode: Opc, dl, VTs, Ops); | 
|---|
| 5864 | InGlue = SDValue(CNode, 0); | 
|---|
| 5865 | } | 
|---|
| 5866 | } | 
|---|
| 5867 |  | 
|---|
| 5868 | // Copy the low half of the result, if it is needed. | 
|---|
| 5869 | if (!SDValue(Node, 0).use_empty()) { | 
|---|
| 5870 | if (!ResLo) { | 
|---|
| 5871 | assert(LoReg && "Register for low half is not defined!"); | 
|---|
| 5872 | ResLo = CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl, Reg: LoReg, | 
|---|
| 5873 | VT: NVT, Glue: InGlue); | 
|---|
| 5874 | InGlue = ResLo.getValue(R: 2); | 
|---|
| 5875 | } | 
|---|
| 5876 | ReplaceUses(F: SDValue(Node, 0), T: ResLo); | 
|---|
| 5877 | LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); | 
|---|
| 5878 | dbgs() << '\n'); | 
|---|
| 5879 | } | 
|---|
| 5880 | // Copy the high half of the result, if it is needed. | 
|---|
| 5881 | if (!SDValue(Node, 1).use_empty()) { | 
|---|
| 5882 | if (!ResHi) { | 
|---|
| 5883 | assert(HiReg && "Register for high half is not defined!"); | 
|---|
| 5884 | ResHi = CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl, Reg: HiReg, | 
|---|
| 5885 | VT: NVT, Glue: InGlue); | 
|---|
| 5886 | InGlue = ResHi.getValue(R: 2); | 
|---|
| 5887 | } | 
|---|
| 5888 | ReplaceUses(F: SDValue(Node, 1), T: ResHi); | 
|---|
| 5889 | LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); | 
|---|
| 5890 | dbgs() << '\n'); | 
|---|
| 5891 | } | 
|---|
| 5892 |  | 
|---|
| 5893 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 5894 | return; | 
|---|
| 5895 | } | 
|---|
| 5896 |  | 
|---|
| 5897 | case ISD::SDIVREM: | 
|---|
| 5898 | case ISD::UDIVREM: { | 
|---|
| 5899 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 5900 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 5901 |  | 
|---|
| 5902 | unsigned ROpc, MOpc; | 
|---|
| 5903 | bool isSigned = Opcode == ISD::SDIVREM; | 
|---|
| 5904 | if (!isSigned) { | 
|---|
| 5905 | switch (NVT.SimpleTy) { | 
|---|
| 5906 | default: llvm_unreachable( "Unsupported VT!"); | 
|---|
| 5907 | case MVT::i8:  ROpc = X86::DIV8r;  MOpc = X86::DIV8m;  break; | 
|---|
| 5908 | case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; | 
|---|
| 5909 | case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; | 
|---|
| 5910 | case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; | 
|---|
| 5911 | } | 
|---|
| 5912 | } else { | 
|---|
| 5913 | switch (NVT.SimpleTy) { | 
|---|
| 5914 | default: llvm_unreachable( "Unsupported VT!"); | 
|---|
| 5915 | case MVT::i8:  ROpc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break; | 
|---|
| 5916 | case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; | 
|---|
| 5917 | case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; | 
|---|
| 5918 | case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; | 
|---|
| 5919 | } | 
|---|
| 5920 | } | 
|---|
| 5921 |  | 
|---|
| 5922 | unsigned LoReg, HiReg, ClrReg; | 
|---|
| 5923 | unsigned SExtOpcode; | 
|---|
| 5924 | switch (NVT.SimpleTy) { | 
|---|
| 5925 | default: llvm_unreachable( "Unsupported VT!"); | 
|---|
| 5926 | case MVT::i8: | 
|---|
| 5927 | LoReg = X86::AL;  ClrReg = HiReg = X86::AH; | 
|---|
| 5928 | SExtOpcode = 0; // Not used. | 
|---|
| 5929 | break; | 
|---|
| 5930 | case MVT::i16: | 
|---|
| 5931 | LoReg = X86::AX;  HiReg = X86::DX; | 
|---|
| 5932 | ClrReg = X86::DX; | 
|---|
| 5933 | SExtOpcode = X86::CWD; | 
|---|
| 5934 | break; | 
|---|
| 5935 | case MVT::i32: | 
|---|
| 5936 | LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; | 
|---|
| 5937 | SExtOpcode = X86::CDQ; | 
|---|
| 5938 | break; | 
|---|
| 5939 | case MVT::i64: | 
|---|
| 5940 | LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; | 
|---|
| 5941 | SExtOpcode = X86::CQO; | 
|---|
| 5942 | break; | 
|---|
| 5943 | } | 
|---|
| 5944 |  | 
|---|
| 5945 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 5946 | bool foldedLoad = tryFoldLoad(P: Node, N: N1, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4); | 
|---|
| 5947 | bool signBitIsZero = CurDAG->SignBitIsZero(Op: N0); | 
|---|
| 5948 |  | 
|---|
| 5949 | SDValue InGlue; | 
|---|
| 5950 | if (NVT == MVT::i8) { | 
|---|
| 5951 | // Special case for div8, just use a move with zero extension to AX to | 
|---|
| 5952 | // clear the upper 8 bits (AH). | 
|---|
| 5953 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; | 
|---|
| 5954 | MachineSDNode *Move; | 
|---|
| 5955 | if (tryFoldLoad(P: Node, N: N0, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4)) { | 
|---|
| 5956 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(i: 0) }; | 
|---|
| 5957 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 | 
|---|
| 5958 | : X86::MOVZX16rm8; | 
|---|
| 5959 | Move = CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i16, VT2: MVT::Other, Ops); | 
|---|
| 5960 | Chain = SDValue(Move, 1); | 
|---|
| 5961 | ReplaceUses(F: N0.getValue(R: 1), T: Chain); | 
|---|
| 5962 | // Record the mem-refs | 
|---|
| 5963 | CurDAG->setNodeMemRefs(N: Move, NewMemRefs: {cast<LoadSDNode>(Val&: N0)->getMemOperand()}); | 
|---|
| 5964 | } else { | 
|---|
| 5965 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 | 
|---|
| 5966 | : X86::MOVZX16rr8; | 
|---|
| 5967 | Move = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i16, Op1: N0); | 
|---|
| 5968 | Chain = CurDAG->getEntryNode(); | 
|---|
| 5969 | } | 
|---|
| 5970 | Chain  = CurDAG->getCopyToReg(Chain, dl, Reg: X86::AX, N: SDValue(Move, 0), | 
|---|
| 5971 | Glue: SDValue()); | 
|---|
| 5972 | InGlue = Chain.getValue(R: 1); | 
|---|
| 5973 | } else { | 
|---|
| 5974 | InGlue = | 
|---|
| 5975 | CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, | 
|---|
| 5976 | Reg: LoReg, N: N0, Glue: SDValue()).getValue(R: 1); | 
|---|
| 5977 | if (isSigned && !signBitIsZero) { | 
|---|
| 5978 | // Sign extend the low part into the high part. | 
|---|
| 5979 | InGlue = | 
|---|
| 5980 | SDValue(CurDAG->getMachineNode(Opcode: SExtOpcode, dl, VT: MVT::Glue, Op1: InGlue),0); | 
|---|
| 5981 | } else { | 
|---|
| 5982 | // Zero out the high part, effectively zero extending the input. | 
|---|
| 5983 | SDVTList VTs = CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32); | 
|---|
| 5984 | SDValue ClrNode = | 
|---|
| 5985 | SDValue(CurDAG->getMachineNode(Opcode: X86::MOV32r0, dl, VTs, Ops: {}), 0); | 
|---|
| 5986 | switch (NVT.SimpleTy) { | 
|---|
| 5987 | case MVT::i16: | 
|---|
| 5988 | ClrNode = | 
|---|
| 5989 | SDValue(CurDAG->getMachineNode( | 
|---|
| 5990 | Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i16, Op1: ClrNode, | 
|---|
| 5991 | Op2: CurDAG->getTargetConstant(Val: X86::sub_16bit, DL: dl, | 
|---|
| 5992 | VT: MVT::i32)), | 
|---|
| 5993 | 0); | 
|---|
| 5994 | break; | 
|---|
| 5995 | case MVT::i32: | 
|---|
| 5996 | break; | 
|---|
| 5997 | case MVT::i64: | 
|---|
| 5998 | ClrNode = | 
|---|
| 5999 | SDValue(CurDAG->getMachineNode( | 
|---|
| 6000 | Opcode: TargetOpcode::SUBREG_TO_REG, dl, VT: MVT::i64, | 
|---|
| 6001 | Op1: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64), Op2: ClrNode, | 
|---|
| 6002 | Op3: CurDAG->getTargetConstant(Val: X86::sub_32bit, DL: dl, | 
|---|
| 6003 | VT: MVT::i32)), | 
|---|
| 6004 | 0); | 
|---|
| 6005 | break; | 
|---|
| 6006 | default: | 
|---|
| 6007 | llvm_unreachable( "Unexpected division source"); | 
|---|
| 6008 | } | 
|---|
| 6009 |  | 
|---|
| 6010 | InGlue = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: ClrReg, | 
|---|
| 6011 | N: ClrNode, Glue: InGlue).getValue(R: 1); | 
|---|
| 6012 | } | 
|---|
| 6013 | } | 
|---|
| 6014 |  | 
|---|
| 6015 | if (foldedLoad) { | 
|---|
| 6016 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(i: 0), | 
|---|
| 6017 | InGlue }; | 
|---|
| 6018 | MachineSDNode *CNode = | 
|---|
| 6019 | CurDAG->getMachineNode(Opcode: MOpc, dl, VT1: MVT::Other, VT2: MVT::Glue, Ops); | 
|---|
| 6020 | InGlue = SDValue(CNode, 1); | 
|---|
| 6021 | // Update the chain. | 
|---|
| 6022 | ReplaceUses(F: N1.getValue(R: 1), T: SDValue(CNode, 0)); | 
|---|
| 6023 | // Record the mem-refs | 
|---|
| 6024 | CurDAG->setNodeMemRefs(N: CNode, NewMemRefs: {cast<LoadSDNode>(Val&: N1)->getMemOperand()}); | 
|---|
| 6025 | } else { | 
|---|
| 6026 | InGlue = | 
|---|
| 6027 | SDValue(CurDAG->getMachineNode(Opcode: ROpc, dl, VT: MVT::Glue, Op1: N1, Op2: InGlue), 0); | 
|---|
| 6028 | } | 
|---|
| 6029 |  | 
|---|
| 6030 | // Prevent use of AH in a REX instruction by explicitly copying it to | 
|---|
| 6031 | // an ABCD_L register. | 
|---|
| 6032 | // | 
|---|
| 6033 | // The current assumption of the register allocator is that isel | 
|---|
| 6034 | // won't generate explicit references to the GR8_ABCD_H registers. If | 
|---|
| 6035 | // the allocator and/or the backend get enhanced to be more robust in | 
|---|
| 6036 | // that regard, this can be, and should be, removed. | 
|---|
| 6037 | if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { | 
|---|
| 6038 | SDValue AHCopy = CurDAG->getRegister(Reg: X86::AH, VT: MVT::i8); | 
|---|
| 6039 | unsigned AHExtOpcode = | 
|---|
| 6040 | isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX; | 
|---|
| 6041 |  | 
|---|
| 6042 | SDNode *RNode = CurDAG->getMachineNode(Opcode: AHExtOpcode, dl, VT1: MVT::i32, | 
|---|
| 6043 | VT2: MVT::Glue, Op1: AHCopy, Op2: InGlue); | 
|---|
| 6044 | SDValue Result(RNode, 0); | 
|---|
| 6045 | InGlue = SDValue(RNode, 1); | 
|---|
| 6046 |  | 
|---|
| 6047 | Result = | 
|---|
| 6048 | CurDAG->getTargetExtractSubreg(SRIdx: X86::sub_8bit, DL: dl, VT: MVT::i8, Operand: Result); | 
|---|
| 6049 |  | 
|---|
| 6050 | ReplaceUses(F: SDValue(Node, 1), T: Result); | 
|---|
| 6051 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); | 
|---|
| 6052 | dbgs() << '\n'); | 
|---|
| 6053 | } | 
|---|
| 6054 | // Copy the division (low) result, if it is needed. | 
|---|
| 6055 | if (!SDValue(Node, 0).use_empty()) { | 
|---|
| 6056 | SDValue Result = CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl, | 
|---|
| 6057 | Reg: LoReg, VT: NVT, Glue: InGlue); | 
|---|
| 6058 | InGlue = Result.getValue(R: 2); | 
|---|
| 6059 | ReplaceUses(F: SDValue(Node, 0), T: Result); | 
|---|
| 6060 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); | 
|---|
| 6061 | dbgs() << '\n'); | 
|---|
| 6062 | } | 
|---|
| 6063 | // Copy the remainder (high) result, if it is needed. | 
|---|
| 6064 | if (!SDValue(Node, 1).use_empty()) { | 
|---|
| 6065 | SDValue Result = CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl, | 
|---|
| 6066 | Reg: HiReg, VT: NVT, Glue: InGlue); | 
|---|
| 6067 | InGlue = Result.getValue(R: 2); | 
|---|
| 6068 | ReplaceUses(F: SDValue(Node, 1), T: Result); | 
|---|
| 6069 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); | 
|---|
| 6070 | dbgs() << '\n'); | 
|---|
| 6071 | } | 
|---|
| 6072 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6073 | return; | 
|---|
| 6074 | } | 
|---|
| 6075 |  | 
|---|
| 6076 | case X86ISD::FCMP: | 
|---|
| 6077 | case X86ISD::STRICT_FCMP: | 
|---|
| 6078 | case X86ISD::STRICT_FCMPS: { | 
|---|
| 6079 | bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || | 
|---|
| 6080 | Node->getOpcode() == X86ISD::STRICT_FCMPS; | 
|---|
| 6081 | SDValue N0 = Node->getOperand(Num: IsStrictCmp ? 1 : 0); | 
|---|
| 6082 | SDValue N1 = Node->getOperand(Num: IsStrictCmp ? 2 : 1); | 
|---|
| 6083 |  | 
|---|
| 6084 | // Save the original VT of the compare. | 
|---|
| 6085 | MVT CmpVT = N0.getSimpleValueType(); | 
|---|
| 6086 |  | 
|---|
| 6087 | // Floating point needs special handling if we don't have FCOMI. | 
|---|
| 6088 | if (Subtarget->canUseCMOV()) | 
|---|
| 6089 | break; | 
|---|
| 6090 |  | 
|---|
| 6091 | bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; | 
|---|
| 6092 |  | 
|---|
| 6093 | unsigned Opc; | 
|---|
| 6094 | switch (CmpVT.SimpleTy) { | 
|---|
| 6095 | default: llvm_unreachable( "Unexpected type!"); | 
|---|
| 6096 | case MVT::f32: | 
|---|
| 6097 | Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; | 
|---|
| 6098 | break; | 
|---|
| 6099 | case MVT::f64: | 
|---|
| 6100 | Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; | 
|---|
| 6101 | break; | 
|---|
| 6102 | case MVT::f80: | 
|---|
| 6103 | Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; | 
|---|
| 6104 | break; | 
|---|
| 6105 | } | 
|---|
| 6106 |  | 
|---|
| 6107 | SDValue Chain = | 
|---|
| 6108 | IsStrictCmp ? Node->getOperand(Num: 0) : CurDAG->getEntryNode(); | 
|---|
| 6109 | SDValue Glue; | 
|---|
| 6110 | if (IsStrictCmp) { | 
|---|
| 6111 | SDVTList VTs = CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue); | 
|---|
| 6112 | Chain = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl, VTs, Ops: {N0, N1, Chain}), 0); | 
|---|
| 6113 | Glue = Chain.getValue(R: 1); | 
|---|
| 6114 | } else { | 
|---|
| 6115 | Glue = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Glue, Op1: N0, Op2: N1), 0); | 
|---|
| 6116 | } | 
|---|
| 6117 |  | 
|---|
| 6118 | // Move FPSW to AX. | 
|---|
| 6119 | SDValue FNSTSW = | 
|---|
| 6120 | SDValue(CurDAG->getMachineNode(Opcode: X86::FNSTSW16r, dl, VT: MVT::i16, Op1: Glue), 0); | 
|---|
| 6121 |  | 
|---|
| 6122 | // Extract upper 8-bits of AX. | 
|---|
| 6123 | SDValue  = | 
|---|
| 6124 | CurDAG->getTargetExtractSubreg(SRIdx: X86::sub_8bit_hi, DL: dl, VT: MVT::i8, Operand: FNSTSW); | 
|---|
| 6125 |  | 
|---|
| 6126 | // Move AH into flags. | 
|---|
| 6127 | // Some 64-bit targets lack SAHF support, but they do support FCOMI. | 
|---|
| 6128 | assert(Subtarget->canUseLAHFSAHF() && | 
|---|
| 6129 | "Target doesn't support SAHF or FCOMI?"); | 
|---|
| 6130 | SDValue AH = CurDAG->getCopyToReg(Chain, dl, Reg: X86::AH, N: Extract, Glue: SDValue()); | 
|---|
| 6131 | Chain = AH; | 
|---|
| 6132 | SDValue SAHF = SDValue( | 
|---|
| 6133 | CurDAG->getMachineNode(Opcode: X86::SAHF, dl, VT: MVT::i32, Op1: AH.getValue(R: 1)), 0); | 
|---|
| 6134 |  | 
|---|
| 6135 | if (IsStrictCmp) | 
|---|
| 6136 | ReplaceUses(F: SDValue(Node, 1), T: Chain); | 
|---|
| 6137 |  | 
|---|
| 6138 | ReplaceUses(F: SDValue(Node, 0), T: SAHF); | 
|---|
| 6139 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6140 | return; | 
|---|
| 6141 | } | 
|---|
| 6142 |  | 
|---|
| 6143 | case X86ISD::CMP: { | 
|---|
| 6144 | SDValue N0 = Node->getOperand(Num: 0); | 
|---|
| 6145 | SDValue N1 = Node->getOperand(Num: 1); | 
|---|
| 6146 |  | 
|---|
| 6147 | // Optimizations for TEST compares. | 
|---|
| 6148 | if (!isNullConstant(V: N1)) | 
|---|
| 6149 | break; | 
|---|
| 6150 |  | 
|---|
| 6151 | // Save the original VT of the compare. | 
|---|
| 6152 | MVT CmpVT = N0.getSimpleValueType(); | 
|---|
| 6153 |  | 
|---|
| 6154 | // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed | 
|---|
| 6155 | // by a test instruction. The test should be removed later by | 
|---|
| 6156 | // analyzeCompare if we are using only the zero flag. | 
|---|
| 6157 | // TODO: Should we check the users and use the BEXTR flags directly? | 
|---|
| 6158 | if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { | 
|---|
| 6159 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node: N0.getNode())) { | 
|---|
| 6160 | unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr | 
|---|
| 6161 | : X86::TEST32rr; | 
|---|
| 6162 | SDValue BEXTR = SDValue(NewNode, 0); | 
|---|
| 6163 | NewNode = CurDAG->getMachineNode(Opcode: TestOpc, dl, VT: MVT::i32, Op1: BEXTR, Op2: BEXTR); | 
|---|
| 6164 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(NewNode, 0)); | 
|---|
| 6165 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6166 | return; | 
|---|
| 6167 | } | 
|---|
| 6168 | } | 
|---|
| 6169 |  | 
|---|
| 6170 | // We can peek through truncates, but we need to be careful below. | 
|---|
| 6171 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) | 
|---|
| 6172 | N0 = N0.getOperand(i: 0); | 
|---|
| 6173 |  | 
|---|
| 6174 | // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to | 
|---|
| 6175 | // use a smaller encoding. | 
|---|
| 6176 | // Look past the truncate if CMP is the only use of it. | 
|---|
| 6177 | if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && | 
|---|
| 6178 | N0.getValueType() != MVT::i8) { | 
|---|
| 6179 | auto *MaskC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); | 
|---|
| 6180 | if (!MaskC) | 
|---|
| 6181 | break; | 
|---|
| 6182 |  | 
|---|
| 6183 | // We may have looked through a truncate so mask off any bits that | 
|---|
| 6184 | // shouldn't be part of the compare. | 
|---|
| 6185 | uint64_t Mask = MaskC->getZExtValue(); | 
|---|
| 6186 | Mask &= maskTrailingOnes<uint64_t>(N: CmpVT.getScalarSizeInBits()); | 
|---|
| 6187 |  | 
|---|
| 6188 | // Check if we can replace AND+IMM{32,64} with a shift. This is possible | 
|---|
| 6189 | // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the | 
|---|
| 6190 | // zero flag. | 
|---|
| 6191 | if (CmpVT == MVT::i64 && !isInt<8>(x: Mask) && isShiftedMask_64(Value: Mask) && | 
|---|
| 6192 | onlyUsesZeroFlag(Flags: SDValue(Node, 0))) { | 
|---|
| 6193 | unsigned ShiftOpcode = ISD::DELETED_NODE; | 
|---|
| 6194 | unsigned ShiftAmt; | 
|---|
| 6195 | unsigned SubRegIdx; | 
|---|
| 6196 | MVT SubRegVT; | 
|---|
| 6197 | unsigned TestOpcode; | 
|---|
| 6198 | unsigned LeadingZeros = llvm::countl_zero(Val: Mask); | 
|---|
| 6199 | unsigned TrailingZeros = llvm::countr_zero(Val: Mask); | 
|---|
| 6200 |  | 
|---|
| 6201 | // With leading/trailing zeros, the transform is profitable if we can | 
|---|
| 6202 | // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without | 
|---|
| 6203 | // incurring any extra register moves. | 
|---|
| 6204 | bool SavesBytes = !isInt<32>(x: Mask) || N0.getOperand(i: 0).hasOneUse(); | 
|---|
| 6205 | if (LeadingZeros == 0 && SavesBytes) { | 
|---|
| 6206 | // If the mask covers the most significant bit, then we can replace | 
|---|
| 6207 | // TEST+AND with a SHR and check eflags. | 
|---|
| 6208 | // This emits a redundant TEST which is subsequently eliminated. | 
|---|
| 6209 | ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri); | 
|---|
| 6210 | ShiftAmt = TrailingZeros; | 
|---|
| 6211 | SubRegIdx = 0; | 
|---|
| 6212 | TestOpcode = X86::TEST64rr; | 
|---|
| 6213 | } else if (TrailingZeros == 0 && SavesBytes) { | 
|---|
| 6214 | // If the mask covers the least significant bit, then we can replace | 
|---|
| 6215 | // TEST+AND with a SHL and check eflags. | 
|---|
| 6216 | // This emits a redundant TEST which is subsequently eliminated. | 
|---|
| 6217 | ShiftOpcode = GET_ND_IF_ENABLED(X86::SHL64ri); | 
|---|
| 6218 | ShiftAmt = LeadingZeros; | 
|---|
| 6219 | SubRegIdx = 0; | 
|---|
| 6220 | TestOpcode = X86::TEST64rr; | 
|---|
| 6221 | } else if (MaskC->hasOneUse() && !isInt<32>(x: Mask)) { | 
|---|
| 6222 | // If the shifted mask extends into the high half and is 8/16/32 bits | 
|---|
| 6223 | // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr. | 
|---|
| 6224 | unsigned PopCount = 64 - LeadingZeros - TrailingZeros; | 
|---|
| 6225 | if (PopCount == 8) { | 
|---|
| 6226 | ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri); | 
|---|
| 6227 | ShiftAmt = TrailingZeros; | 
|---|
| 6228 | SubRegIdx = X86::sub_8bit; | 
|---|
| 6229 | SubRegVT = MVT::i8; | 
|---|
| 6230 | TestOpcode = X86::TEST8rr; | 
|---|
| 6231 | } else if (PopCount == 16) { | 
|---|
| 6232 | ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri); | 
|---|
| 6233 | ShiftAmt = TrailingZeros; | 
|---|
| 6234 | SubRegIdx = X86::sub_16bit; | 
|---|
| 6235 | SubRegVT = MVT::i16; | 
|---|
| 6236 | TestOpcode = X86::TEST16rr; | 
|---|
| 6237 | } else if (PopCount == 32) { | 
|---|
| 6238 | ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri); | 
|---|
| 6239 | ShiftAmt = TrailingZeros; | 
|---|
| 6240 | SubRegIdx = X86::sub_32bit; | 
|---|
| 6241 | SubRegVT = MVT::i32; | 
|---|
| 6242 | TestOpcode = X86::TEST32rr; | 
|---|
| 6243 | } | 
|---|
| 6244 | } | 
|---|
| 6245 | if (ShiftOpcode != ISD::DELETED_NODE) { | 
|---|
| 6246 | SDValue ShiftC = CurDAG->getTargetConstant(Val: ShiftAmt, DL: dl, VT: MVT::i64); | 
|---|
| 6247 | SDValue Shift = SDValue( | 
|---|
| 6248 | CurDAG->getMachineNode(Opcode: ShiftOpcode, dl, VT1: MVT::i64, VT2: MVT::i32, | 
|---|
| 6249 | Op1: N0.getOperand(i: 0), Op2: ShiftC), | 
|---|
| 6250 | 0); | 
|---|
| 6251 | if (SubRegIdx != 0) { | 
|---|
| 6252 | Shift = | 
|---|
| 6253 | CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL: dl, VT: SubRegVT, Operand: Shift); | 
|---|
| 6254 | } | 
|---|
| 6255 | MachineSDNode *Test = | 
|---|
| 6256 | CurDAG->getMachineNode(Opcode: TestOpcode, dl, VT: MVT::i32, Op1: Shift, Op2: Shift); | 
|---|
| 6257 | ReplaceNode(F: Node, T: Test); | 
|---|
| 6258 | return; | 
|---|
| 6259 | } | 
|---|
| 6260 | } | 
|---|
| 6261 |  | 
|---|
| 6262 | MVT VT; | 
|---|
| 6263 | int SubRegOp; | 
|---|
| 6264 | unsigned ROpc, MOpc; | 
|---|
| 6265 |  | 
|---|
| 6266 | // For each of these checks we need to be careful if the sign flag is | 
|---|
| 6267 | // being used. It is only safe to use the sign flag in two conditions, | 
|---|
| 6268 | // either the sign bit in the shrunken mask is zero or the final test | 
|---|
| 6269 | // size is equal to the original compare size. | 
|---|
| 6270 |  | 
|---|
| 6271 | if (isUInt<8>(x: Mask) && | 
|---|
| 6272 | (!(Mask & 0x80) || CmpVT == MVT::i8 || | 
|---|
| 6273 | hasNoSignFlagUses(Flags: SDValue(Node, 0)))) { | 
|---|
| 6274 | // For example, convert "testl %eax, $8" to "testb %al, $8" | 
|---|
| 6275 | VT = MVT::i8; | 
|---|
| 6276 | SubRegOp = X86::sub_8bit; | 
|---|
| 6277 | ROpc = X86::TEST8ri; | 
|---|
| 6278 | MOpc = X86::TEST8mi; | 
|---|
| 6279 | } else if (OptForMinSize && isUInt<16>(x: Mask) && | 
|---|
| 6280 | (!(Mask & 0x8000) || CmpVT == MVT::i16 || | 
|---|
| 6281 | hasNoSignFlagUses(Flags: SDValue(Node, 0)))) { | 
|---|
| 6282 | // For example, "testl %eax, $32776" to "testw %ax, $32776". | 
|---|
| 6283 | // NOTE: We only want to form TESTW instructions if optimizing for | 
|---|
| 6284 | // min size. Otherwise we only save one byte and possibly get a length | 
|---|
| 6285 | // changing prefix penalty in the decoders. | 
|---|
| 6286 | VT = MVT::i16; | 
|---|
| 6287 | SubRegOp = X86::sub_16bit; | 
|---|
| 6288 | ROpc = X86::TEST16ri; | 
|---|
| 6289 | MOpc = X86::TEST16mi; | 
|---|
| 6290 | } else if (isUInt<32>(x: Mask) && N0.getValueType() != MVT::i16 && | 
|---|
| 6291 | ((!(Mask & 0x80000000) && | 
|---|
| 6292 | // Without minsize 16-bit Cmps can get here so we need to | 
|---|
| 6293 | // be sure we calculate the correct sign flag if needed. | 
|---|
| 6294 | (CmpVT != MVT::i16 || !(Mask & 0x8000))) || | 
|---|
| 6295 | CmpVT == MVT::i32 || | 
|---|
| 6296 | hasNoSignFlagUses(Flags: SDValue(Node, 0)))) { | 
|---|
| 6297 | // For example, "testq %rax, $268468232" to "testl %eax, $268468232". | 
|---|
| 6298 | // NOTE: We only want to run that transform if N0 is 32 or 64 bits. | 
|---|
| 6299 | // Otherwize, we find ourselves in a position where we have to do | 
|---|
| 6300 | // promotion. If previous passes did not promote the and, we assume | 
|---|
| 6301 | // they had a good reason not to and do not promote here. | 
|---|
| 6302 | VT = MVT::i32; | 
|---|
| 6303 | SubRegOp = X86::sub_32bit; | 
|---|
| 6304 | ROpc = X86::TEST32ri; | 
|---|
| 6305 | MOpc = X86::TEST32mi; | 
|---|
| 6306 | } else { | 
|---|
| 6307 | // No eligible transformation was found. | 
|---|
| 6308 | break; | 
|---|
| 6309 | } | 
|---|
| 6310 |  | 
|---|
| 6311 | SDValue Imm = CurDAG->getTargetConstant(Val: Mask, DL: dl, VT); | 
|---|
| 6312 | SDValue Reg = N0.getOperand(i: 0); | 
|---|
| 6313 |  | 
|---|
| 6314 | // Emit a testl or testw. | 
|---|
| 6315 | MachineSDNode *NewNode; | 
|---|
| 6316 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | 
|---|
| 6317 | if (tryFoldLoad(Root: Node, P: N0.getNode(), N: Reg, Base&: Tmp0, Scale&: Tmp1, Index&: Tmp2, Disp&: Tmp3, Segment&: Tmp4)) { | 
|---|
| 6318 | if (auto *LoadN = dyn_cast<LoadSDNode>(Val: N0.getOperand(i: 0).getNode())) { | 
|---|
| 6319 | if (!LoadN->isSimple()) { | 
|---|
| 6320 | unsigned NumVolBits = LoadN->getValueType(ResNo: 0).getSizeInBits(); | 
|---|
| 6321 | if ((MOpc == X86::TEST8mi && NumVolBits != 8) || | 
|---|
| 6322 | (MOpc == X86::TEST16mi && NumVolBits != 16) || | 
|---|
| 6323 | (MOpc == X86::TEST32mi && NumVolBits != 32)) | 
|---|
| 6324 | break; | 
|---|
| 6325 | } | 
|---|
| 6326 | } | 
|---|
| 6327 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | 
|---|
| 6328 | Reg.getOperand(i: 0) }; | 
|---|
| 6329 | NewNode = CurDAG->getMachineNode(Opcode: MOpc, dl, VT1: MVT::i32, VT2: MVT::Other, Ops); | 
|---|
| 6330 | // Update the chain. | 
|---|
| 6331 | ReplaceUses(F: Reg.getValue(R: 1), T: SDValue(NewNode, 1)); | 
|---|
| 6332 | // Record the mem-refs | 
|---|
| 6333 | CurDAG->setNodeMemRefs(N: NewNode, | 
|---|
| 6334 | NewMemRefs: {cast<LoadSDNode>(Val&: Reg)->getMemOperand()}); | 
|---|
| 6335 | } else { | 
|---|
| 6336 | // Extract the subregister if necessary. | 
|---|
| 6337 | if (N0.getValueType() != VT) | 
|---|
| 6338 | Reg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegOp, DL: dl, VT, Operand: Reg); | 
|---|
| 6339 |  | 
|---|
| 6340 | NewNode = CurDAG->getMachineNode(Opcode: ROpc, dl, VT: MVT::i32, Op1: Reg, Op2: Imm); | 
|---|
| 6341 | } | 
|---|
| 6342 | // Replace CMP with TEST. | 
|---|
| 6343 | ReplaceNode(F: Node, T: NewNode); | 
|---|
| 6344 | return; | 
|---|
| 6345 | } | 
|---|
| 6346 | break; | 
|---|
| 6347 | } | 
|---|
| 6348 | case X86ISD::PCMPISTR: { | 
|---|
| 6349 | if (!Subtarget->hasSSE42()) | 
|---|
| 6350 | break; | 
|---|
| 6351 |  | 
|---|
| 6352 | bool NeedIndex = !SDValue(Node, 0).use_empty(); | 
|---|
| 6353 | bool NeedMask = !SDValue(Node, 1).use_empty(); | 
|---|
| 6354 | // We can't fold a load if we are going to make two instructions. | 
|---|
| 6355 | bool MayFoldLoad = !NeedIndex || !NeedMask; | 
|---|
| 6356 |  | 
|---|
| 6357 | MachineSDNode *CNode; | 
|---|
| 6358 | if (NeedMask) { | 
|---|
| 6359 | unsigned ROpc = | 
|---|
| 6360 | Subtarget->hasAVX() ? X86::VPCMPISTRMrri : X86::PCMPISTRMrri; | 
|---|
| 6361 | unsigned MOpc = | 
|---|
| 6362 | Subtarget->hasAVX() ? X86::VPCMPISTRMrmi : X86::PCMPISTRMrmi; | 
|---|
| 6363 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, VT: MVT::v16i8, Node); | 
|---|
| 6364 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(CNode, 0)); | 
|---|
| 6365 | } | 
|---|
| 6366 | if (NeedIndex || !NeedMask) { | 
|---|
| 6367 | unsigned ROpc = | 
|---|
| 6368 | Subtarget->hasAVX() ? X86::VPCMPISTRIrri : X86::PCMPISTRIrri; | 
|---|
| 6369 | unsigned MOpc = | 
|---|
| 6370 | Subtarget->hasAVX() ? X86::VPCMPISTRIrmi : X86::PCMPISTRIrmi; | 
|---|
| 6371 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, VT: MVT::i32, Node); | 
|---|
| 6372 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(CNode, 0)); | 
|---|
| 6373 | } | 
|---|
| 6374 |  | 
|---|
| 6375 | // Connect the flag usage to the last instruction created. | 
|---|
| 6376 | ReplaceUses(F: SDValue(Node, 2), T: SDValue(CNode, 1)); | 
|---|
| 6377 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6378 | return; | 
|---|
| 6379 | } | 
|---|
| 6380 | case X86ISD::PCMPESTR: { | 
|---|
| 6381 | if (!Subtarget->hasSSE42()) | 
|---|
| 6382 | break; | 
|---|
| 6383 |  | 
|---|
| 6384 | // Copy the two implicit register inputs. | 
|---|
| 6385 | SDValue InGlue = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: X86::EAX, | 
|---|
| 6386 | N: Node->getOperand(Num: 1), | 
|---|
| 6387 | Glue: SDValue()).getValue(R: 1); | 
|---|
| 6388 | InGlue = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: X86::EDX, | 
|---|
| 6389 | N: Node->getOperand(Num: 3), Glue: InGlue).getValue(R: 1); | 
|---|
| 6390 |  | 
|---|
| 6391 | bool NeedIndex = !SDValue(Node, 0).use_empty(); | 
|---|
| 6392 | bool NeedMask = !SDValue(Node, 1).use_empty(); | 
|---|
| 6393 | // We can't fold a load if we are going to make two instructions. | 
|---|
| 6394 | bool MayFoldLoad = !NeedIndex || !NeedMask; | 
|---|
| 6395 |  | 
|---|
| 6396 | MachineSDNode *CNode; | 
|---|
| 6397 | if (NeedMask) { | 
|---|
| 6398 | unsigned ROpc = | 
|---|
| 6399 | Subtarget->hasAVX() ? X86::VPCMPESTRMrri : X86::PCMPESTRMrri; | 
|---|
| 6400 | unsigned MOpc = | 
|---|
| 6401 | Subtarget->hasAVX() ? X86::VPCMPESTRMrmi : X86::PCMPESTRMrmi; | 
|---|
| 6402 | CNode = | 
|---|
| 6403 | emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, VT: MVT::v16i8, Node, InGlue); | 
|---|
| 6404 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(CNode, 0)); | 
|---|
| 6405 | } | 
|---|
| 6406 | if (NeedIndex || !NeedMask) { | 
|---|
| 6407 | unsigned ROpc = | 
|---|
| 6408 | Subtarget->hasAVX() ? X86::VPCMPESTRIrri : X86::PCMPESTRIrri; | 
|---|
| 6409 | unsigned MOpc = | 
|---|
| 6410 | Subtarget->hasAVX() ? X86::VPCMPESTRIrmi : X86::PCMPESTRIrmi; | 
|---|
| 6411 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, VT: MVT::i32, Node, InGlue); | 
|---|
| 6412 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(CNode, 0)); | 
|---|
| 6413 | } | 
|---|
| 6414 | // Connect the flag usage to the last instruction created. | 
|---|
| 6415 | ReplaceUses(F: SDValue(Node, 2), T: SDValue(CNode, 1)); | 
|---|
| 6416 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6417 | return; | 
|---|
| 6418 | } | 
|---|
| 6419 |  | 
|---|
| 6420 | case ISD::SETCC: { | 
|---|
| 6421 | if (NVT.isVector() && tryVPTESTM(Root: Node, Setcc: SDValue(Node, 0), InMask: SDValue())) | 
|---|
| 6422 | return; | 
|---|
| 6423 |  | 
|---|
| 6424 | break; | 
|---|
| 6425 | } | 
|---|
| 6426 |  | 
|---|
| 6427 | case ISD::STORE: | 
|---|
| 6428 | if (foldLoadStoreIntoMemOperand(Node)) | 
|---|
| 6429 | return; | 
|---|
| 6430 | break; | 
|---|
| 6431 |  | 
|---|
| 6432 | case X86ISD::SETCC_CARRY: { | 
|---|
| 6433 | MVT VT = Node->getSimpleValueType(ResNo: 0); | 
|---|
| 6434 | SDValue Result; | 
|---|
| 6435 | if (Subtarget->hasSBBDepBreaking()) { | 
|---|
| 6436 | // We have to do this manually because tblgen will put the eflags copy in | 
|---|
| 6437 | // the wrong place if we use an extract_subreg in the pattern. | 
|---|
| 6438 | // Copy flags to the EFLAGS register and glue it to next node. | 
|---|
| 6439 | SDValue EFLAGS = | 
|---|
| 6440 | CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: X86::EFLAGS, | 
|---|
| 6441 | N: Node->getOperand(Num: 1), Glue: SDValue()); | 
|---|
| 6442 |  | 
|---|
| 6443 | // Create a 64-bit instruction if the result is 64-bits otherwise use the | 
|---|
| 6444 | // 32-bit version. | 
|---|
| 6445 | unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; | 
|---|
| 6446 | MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; | 
|---|
| 6447 | Result = SDValue( | 
|---|
| 6448 | CurDAG->getMachineNode(Opcode: Opc, dl, VT: SetVT, Op1: EFLAGS, Op2: EFLAGS.getValue(R: 1)), | 
|---|
| 6449 | 0); | 
|---|
| 6450 | } else { | 
|---|
| 6451 | // The target does not recognize sbb with the same reg operand as a | 
|---|
| 6452 | // no-source idiom, so we explicitly zero the input values. | 
|---|
| 6453 | Result = getSBBZero(N: Node); | 
|---|
| 6454 | } | 
|---|
| 6455 |  | 
|---|
| 6456 | // For less than 32-bits we need to extract from the 32-bit node. | 
|---|
| 6457 | if (VT == MVT::i8 || VT == MVT::i16) { | 
|---|
| 6458 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; | 
|---|
| 6459 | Result = CurDAG->getTargetExtractSubreg(SRIdx: SubIndex, DL: dl, VT, Operand: Result); | 
|---|
| 6460 | } | 
|---|
| 6461 |  | 
|---|
| 6462 | ReplaceUses(F: SDValue(Node, 0), T: Result); | 
|---|
| 6463 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6464 | return; | 
|---|
| 6465 | } | 
|---|
| 6466 | case X86ISD::SBB: { | 
|---|
| 6467 | if (isNullConstant(V: Node->getOperand(Num: 0)) && | 
|---|
| 6468 | isNullConstant(V: Node->getOperand(Num: 1))) { | 
|---|
| 6469 | SDValue Result = getSBBZero(N: Node); | 
|---|
| 6470 |  | 
|---|
| 6471 | // Replace the flag use. | 
|---|
| 6472 | ReplaceUses(F: SDValue(Node, 1), T: Result.getValue(R: 1)); | 
|---|
| 6473 |  | 
|---|
| 6474 | // Replace the result use. | 
|---|
| 6475 | if (!SDValue(Node, 0).use_empty()) { | 
|---|
| 6476 | // For less than 32-bits we need to extract from the 32-bit node. | 
|---|
| 6477 | MVT VT = Node->getSimpleValueType(ResNo: 0); | 
|---|
| 6478 | if (VT == MVT::i8 || VT == MVT::i16) { | 
|---|
| 6479 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; | 
|---|
| 6480 | Result = CurDAG->getTargetExtractSubreg(SRIdx: SubIndex, DL: dl, VT, Operand: Result); | 
|---|
| 6481 | } | 
|---|
| 6482 | ReplaceUses(F: SDValue(Node, 0), T: Result); | 
|---|
| 6483 | } | 
|---|
| 6484 |  | 
|---|
| 6485 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6486 | return; | 
|---|
| 6487 | } | 
|---|
| 6488 | break; | 
|---|
| 6489 | } | 
|---|
| 6490 | case X86ISD::MGATHER: { | 
|---|
| 6491 | auto *Mgt = cast<X86MaskedGatherSDNode>(Val: Node); | 
|---|
| 6492 | SDValue IndexOp = Mgt->getIndex(); | 
|---|
| 6493 | SDValue Mask = Mgt->getMask(); | 
|---|
| 6494 | MVT IndexVT = IndexOp.getSimpleValueType(); | 
|---|
| 6495 | MVT ValueVT = Node->getSimpleValueType(ResNo: 0); | 
|---|
| 6496 | MVT MaskVT = Mask.getSimpleValueType(); | 
|---|
| 6497 |  | 
|---|
| 6498 | // This is just to prevent crashes if the nodes are malformed somehow. We're | 
|---|
| 6499 | // otherwise only doing loose type checking in here based on type what | 
|---|
| 6500 | // a type constraint would say just like table based isel. | 
|---|
| 6501 | if (!ValueVT.isVector() || !MaskVT.isVector()) | 
|---|
| 6502 | break; | 
|---|
| 6503 |  | 
|---|
| 6504 | unsigned NumElts = ValueVT.getVectorNumElements(); | 
|---|
| 6505 | MVT ValueSVT = ValueVT.getVectorElementType(); | 
|---|
| 6506 |  | 
|---|
| 6507 | bool IsFP = ValueSVT.isFloatingPoint(); | 
|---|
| 6508 | unsigned EltSize = ValueSVT.getSizeInBits(); | 
|---|
| 6509 |  | 
|---|
| 6510 | unsigned Opc = 0; | 
|---|
| 6511 | bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1; | 
|---|
| 6512 | if (AVX512Gather) { | 
|---|
| 6513 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | 
|---|
| 6514 | Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm; | 
|---|
| 6515 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | 
|---|
| 6516 | Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm; | 
|---|
| 6517 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) | 
|---|
| 6518 | Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm; | 
|---|
| 6519 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | 
|---|
| 6520 | Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm; | 
|---|
| 6521 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | 
|---|
| 6522 | Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm; | 
|---|
| 6523 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) | 
|---|
| 6524 | Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm; | 
|---|
| 6525 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | 
|---|
| 6526 | Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm; | 
|---|
| 6527 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | 
|---|
| 6528 | Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm; | 
|---|
| 6529 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) | 
|---|
| 6530 | Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm; | 
|---|
| 6531 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | 
|---|
| 6532 | Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm; | 
|---|
| 6533 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | 
|---|
| 6534 | Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm; | 
|---|
| 6535 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) | 
|---|
| 6536 | Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm; | 
|---|
| 6537 | } else { | 
|---|
| 6538 | assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && | 
|---|
| 6539 | "Unexpected mask VT!"); | 
|---|
| 6540 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | 
|---|
| 6541 | Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm; | 
|---|
| 6542 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | 
|---|
| 6543 | Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm; | 
|---|
| 6544 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | 
|---|
| 6545 | Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm; | 
|---|
| 6546 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | 
|---|
| 6547 | Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm; | 
|---|
| 6548 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | 
|---|
| 6549 | Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm; | 
|---|
| 6550 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | 
|---|
| 6551 | Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm; | 
|---|
| 6552 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | 
|---|
| 6553 | Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm; | 
|---|
| 6554 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | 
|---|
| 6555 | Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm; | 
|---|
| 6556 | } | 
|---|
| 6557 |  | 
|---|
| 6558 | if (!Opc) | 
|---|
| 6559 | break; | 
|---|
| 6560 |  | 
|---|
| 6561 | SDValue Base, Scale, Index, Disp, Segment; | 
|---|
| 6562 | if (!selectVectorAddr(Parent: Mgt, BasePtr: Mgt->getBasePtr(), IndexOp, ScaleOp: Mgt->getScale(), | 
|---|
| 6563 | Base, Scale, Index, Disp, Segment)) | 
|---|
| 6564 | break; | 
|---|
| 6565 |  | 
|---|
| 6566 | SDValue PassThru = Mgt->getPassThru(); | 
|---|
| 6567 | SDValue Chain = Mgt->getChain(); | 
|---|
| 6568 | // Gather instructions have a mask output not in the ISD node. | 
|---|
| 6569 | SDVTList VTs = CurDAG->getVTList(VT1: ValueVT, VT2: MaskVT, VT3: MVT::Other); | 
|---|
| 6570 |  | 
|---|
| 6571 | MachineSDNode *NewNode; | 
|---|
| 6572 | if (AVX512Gather) { | 
|---|
| 6573 | SDValue Ops[] = {PassThru, Mask, Base,    Scale, | 
|---|
| 6574 | Index,    Disp, Segment, Chain}; | 
|---|
| 6575 | NewNode = CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(dl), VTs, Ops); | 
|---|
| 6576 | } else { | 
|---|
| 6577 | SDValue Ops[] = {PassThru, Base,    Scale, Index, | 
|---|
| 6578 | Disp,     Segment, Mask,  Chain}; | 
|---|
| 6579 | NewNode = CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(dl), VTs, Ops); | 
|---|
| 6580 | } | 
|---|
| 6581 | CurDAG->setNodeMemRefs(N: NewNode, NewMemRefs: {Mgt->getMemOperand()}); | 
|---|
| 6582 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(NewNode, 0)); | 
|---|
| 6583 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(NewNode, 2)); | 
|---|
| 6584 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6585 | return; | 
|---|
| 6586 | } | 
|---|
| 6587 | case X86ISD::MSCATTER: { | 
|---|
| 6588 | auto *Sc = cast<X86MaskedScatterSDNode>(Val: Node); | 
|---|
| 6589 | SDValue Value = Sc->getValue(); | 
|---|
| 6590 | SDValue IndexOp = Sc->getIndex(); | 
|---|
| 6591 | MVT IndexVT = IndexOp.getSimpleValueType(); | 
|---|
| 6592 | MVT ValueVT = Value.getSimpleValueType(); | 
|---|
| 6593 |  | 
|---|
| 6594 | // This is just to prevent crashes if the nodes are malformed somehow. We're | 
|---|
| 6595 | // otherwise only doing loose type checking in here based on type what | 
|---|
| 6596 | // a type constraint would say just like table based isel. | 
|---|
| 6597 | if (!ValueVT.isVector()) | 
|---|
| 6598 | break; | 
|---|
| 6599 |  | 
|---|
| 6600 | unsigned NumElts = ValueVT.getVectorNumElements(); | 
|---|
| 6601 | MVT ValueSVT = ValueVT.getVectorElementType(); | 
|---|
| 6602 |  | 
|---|
| 6603 | bool IsFP = ValueSVT.isFloatingPoint(); | 
|---|
| 6604 | unsigned EltSize = ValueSVT.getSizeInBits(); | 
|---|
| 6605 |  | 
|---|
| 6606 | unsigned Opc; | 
|---|
| 6607 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | 
|---|
| 6608 | Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr; | 
|---|
| 6609 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | 
|---|
| 6610 | Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr; | 
|---|
| 6611 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) | 
|---|
| 6612 | Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr; | 
|---|
| 6613 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | 
|---|
| 6614 | Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr; | 
|---|
| 6615 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | 
|---|
| 6616 | Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr; | 
|---|
| 6617 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) | 
|---|
| 6618 | Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr; | 
|---|
| 6619 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | 
|---|
| 6620 | Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr; | 
|---|
| 6621 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | 
|---|
| 6622 | Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr; | 
|---|
| 6623 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) | 
|---|
| 6624 | Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr; | 
|---|
| 6625 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | 
|---|
| 6626 | Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr; | 
|---|
| 6627 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | 
|---|
| 6628 | Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr; | 
|---|
| 6629 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) | 
|---|
| 6630 | Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr; | 
|---|
| 6631 | else | 
|---|
| 6632 | break; | 
|---|
| 6633 |  | 
|---|
| 6634 | SDValue Base, Scale, Index, Disp, Segment; | 
|---|
| 6635 | if (!selectVectorAddr(Parent: Sc, BasePtr: Sc->getBasePtr(), IndexOp, ScaleOp: Sc->getScale(), | 
|---|
| 6636 | Base, Scale, Index, Disp, Segment)) | 
|---|
| 6637 | break; | 
|---|
| 6638 |  | 
|---|
| 6639 | SDValue Mask = Sc->getMask(); | 
|---|
| 6640 | SDValue Chain = Sc->getChain(); | 
|---|
| 6641 | // Scatter instructions have a mask output not in the ISD node. | 
|---|
| 6642 | SDVTList VTs = CurDAG->getVTList(VT1: Mask.getValueType(), VT2: MVT::Other); | 
|---|
| 6643 | SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain}; | 
|---|
| 6644 |  | 
|---|
| 6645 | MachineSDNode *NewNode = CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(dl), VTs, Ops); | 
|---|
| 6646 | CurDAG->setNodeMemRefs(N: NewNode, NewMemRefs: {Sc->getMemOperand()}); | 
|---|
| 6647 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(NewNode, 1)); | 
|---|
| 6648 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6649 | return; | 
|---|
| 6650 | } | 
|---|
| 6651 | case ISD::PREALLOCATED_SETUP: { | 
|---|
| 6652 | auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | 
|---|
| 6653 | auto CallId = MFI->getPreallocatedIdForCallSite( | 
|---|
| 6654 | CS: cast<SrcValueSDNode>(Val: Node->getOperand(Num: 1))->getValue()); | 
|---|
| 6655 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 6656 | SDValue CallIdValue = CurDAG->getTargetConstant(Val: CallId, DL: dl, VT: MVT::i32); | 
|---|
| 6657 | MachineSDNode *New = CurDAG->getMachineNode( | 
|---|
| 6658 | Opcode: TargetOpcode::PREALLOCATED_SETUP, dl, VT: MVT::Other, Op1: CallIdValue, Op2: Chain); | 
|---|
| 6659 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0)); // Chain | 
|---|
| 6660 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6661 | return; | 
|---|
| 6662 | } | 
|---|
| 6663 | case ISD::PREALLOCATED_ARG: { | 
|---|
| 6664 | auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | 
|---|
| 6665 | auto CallId = MFI->getPreallocatedIdForCallSite( | 
|---|
| 6666 | CS: cast<SrcValueSDNode>(Val: Node->getOperand(Num: 1))->getValue()); | 
|---|
| 6667 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 6668 | SDValue CallIdValue = CurDAG->getTargetConstant(Val: CallId, DL: dl, VT: MVT::i32); | 
|---|
| 6669 | SDValue ArgIndex = Node->getOperand(Num: 2); | 
|---|
| 6670 | SDValue Ops[3]; | 
|---|
| 6671 | Ops[0] = CallIdValue; | 
|---|
| 6672 | Ops[1] = ArgIndex; | 
|---|
| 6673 | Ops[2] = Chain; | 
|---|
| 6674 | MachineSDNode *New = CurDAG->getMachineNode( | 
|---|
| 6675 | Opcode: TargetOpcode::PREALLOCATED_ARG, dl, | 
|---|
| 6676 | VTs: CurDAG->getVTList(VT1: TLI->getPointerTy(DL: CurDAG->getDataLayout()), | 
|---|
| 6677 | VT2: MVT::Other), | 
|---|
| 6678 | Ops); | 
|---|
| 6679 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0)); // Arg pointer | 
|---|
| 6680 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(New, 1)); // Chain | 
|---|
| 6681 | CurDAG->RemoveDeadNode(N: Node); | 
|---|
| 6682 | return; | 
|---|
| 6683 | } | 
|---|
| 6684 | case X86ISD::AESENCWIDE128KL: | 
|---|
| 6685 | case X86ISD::AESDECWIDE128KL: | 
|---|
| 6686 | case X86ISD::AESENCWIDE256KL: | 
|---|
| 6687 | case X86ISD::AESDECWIDE256KL: { | 
|---|
| 6688 | if (!Subtarget->hasWIDEKL()) | 
|---|
| 6689 | break; | 
|---|
| 6690 |  | 
|---|
| 6691 | unsigned Opcode; | 
|---|
| 6692 | switch (Node->getOpcode()) { | 
|---|
| 6693 | default: | 
|---|
| 6694 | llvm_unreachable( "Unexpected opcode!"); | 
|---|
| 6695 | case X86ISD::AESENCWIDE128KL: | 
|---|
| 6696 | Opcode = X86::AESENCWIDE128KL; | 
|---|
| 6697 | break; | 
|---|
| 6698 | case X86ISD::AESDECWIDE128KL: | 
|---|
| 6699 | Opcode = X86::AESDECWIDE128KL; | 
|---|
| 6700 | break; | 
|---|
| 6701 | case X86ISD::AESENCWIDE256KL: | 
|---|
| 6702 | Opcode = X86::AESENCWIDE256KL; | 
|---|
| 6703 | break; | 
|---|
| 6704 | case X86ISD::AESDECWIDE256KL: | 
|---|
| 6705 | Opcode = X86::AESDECWIDE256KL; | 
|---|
| 6706 | break; | 
|---|
| 6707 | } | 
|---|
| 6708 |  | 
|---|
| 6709 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 6710 | SDValue Addr = Node->getOperand(Num: 1); | 
|---|
| 6711 |  | 
|---|
| 6712 | SDValue Base, Scale, Index, Disp, Segment; | 
|---|
| 6713 | if (!selectAddr(Parent: Node, N: Addr, Base, Scale, Index, Disp, Segment)) | 
|---|
| 6714 | break; | 
|---|
| 6715 |  | 
|---|
| 6716 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM0, N: Node->getOperand(Num: 2), | 
|---|
| 6717 | Glue: SDValue()); | 
|---|
| 6718 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM1, N: Node->getOperand(Num: 3), | 
|---|
| 6719 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6720 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM2, N: Node->getOperand(Num: 4), | 
|---|
| 6721 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6722 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM3, N: Node->getOperand(Num: 5), | 
|---|
| 6723 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6724 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM4, N: Node->getOperand(Num: 6), | 
|---|
| 6725 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6726 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM5, N: Node->getOperand(Num: 7), | 
|---|
| 6727 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6728 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM6, N: Node->getOperand(Num: 8), | 
|---|
| 6729 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6730 | Chain = CurDAG->getCopyToReg(Chain, dl, Reg: X86::XMM7, N: Node->getOperand(Num: 9), | 
|---|
| 6731 | Glue: Chain.getValue(R: 1)); | 
|---|
| 6732 |  | 
|---|
| 6733 | MachineSDNode *Res = CurDAG->getMachineNode( | 
|---|
| 6734 | Opcode, dl, VTs: Node->getVTList(), | 
|---|
| 6735 | Ops: {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(R: 1)}); | 
|---|
| 6736 | CurDAG->setNodeMemRefs(N: Res, NewMemRefs: cast<MemSDNode>(Val: Node)->getMemOperand()); | 
|---|
| 6737 | ReplaceNode(F: Node, T: Res); | 
|---|
| 6738 | return; | 
|---|
| 6739 | } | 
|---|
| 6740 | case X86ISD::POP_FROM_X87_REG: { | 
|---|
| 6741 | SDValue Chain = Node->getOperand(Num: 0); | 
|---|
| 6742 | Register Reg = cast<RegisterSDNode>(Val: Node->getOperand(Num: 1))->getReg(); | 
|---|
| 6743 | SDValue Glue; | 
|---|
| 6744 | if (Node->getNumValues() == 3) | 
|---|
| 6745 | Glue = Node->getOperand(Num: 2); | 
|---|
| 6746 | SDValue Copy = | 
|---|
| 6747 | CurDAG->getCopyFromReg(Chain, dl, Reg, VT: Node->getValueType(ResNo: 0), Glue); | 
|---|
| 6748 | ReplaceNode(F: Node, T: Copy.getNode()); | 
|---|
| 6749 | return; | 
|---|
| 6750 | } | 
|---|
| 6751 | } | 
|---|
| 6752 |  | 
|---|
| 6753 | SelectCode(N: Node); | 
|---|
| 6754 | } | 
|---|
| 6755 |  | 
|---|
| 6756 | bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand( | 
|---|
| 6757 | const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, | 
|---|
| 6758 | std::vector<SDValue> &OutOps) { | 
|---|
| 6759 | SDValue Op0, Op1, Op2, Op3, Op4; | 
|---|
| 6760 | switch (ConstraintID) { | 
|---|
| 6761 | default: | 
|---|
| 6762 | llvm_unreachable( "Unexpected asm memory constraint"); | 
|---|
| 6763 | case InlineAsm::ConstraintCode::o: // offsetable        ?? | 
|---|
| 6764 | case InlineAsm::ConstraintCode::v: // not offsetable    ?? | 
|---|
| 6765 | case InlineAsm::ConstraintCode::m: // memory | 
|---|
| 6766 | case InlineAsm::ConstraintCode::X: | 
|---|
| 6767 | case InlineAsm::ConstraintCode::p: // address | 
|---|
| 6768 | if (!selectAddr(Parent: nullptr, N: Op, Base&: Op0, Scale&: Op1, Index&: Op2, Disp&: Op3, Segment&: Op4)) | 
|---|
| 6769 | return true; | 
|---|
| 6770 | break; | 
|---|
| 6771 | } | 
|---|
| 6772 |  | 
|---|
| 6773 | OutOps.push_back(x: Op0); | 
|---|
| 6774 | OutOps.push_back(x: Op1); | 
|---|
| 6775 | OutOps.push_back(x: Op2); | 
|---|
| 6776 | OutOps.push_back(x: Op3); | 
|---|
| 6777 | OutOps.push_back(x: Op4); | 
|---|
| 6778 | return false; | 
|---|
| 6779 | } | 
|---|
| 6780 |  | 
|---|
| 6781 | X86ISelDAGToDAGPass::X86ISelDAGToDAGPass(X86TargetMachine &TM) | 
|---|
| 6782 | : SelectionDAGISelPass( | 
|---|
| 6783 | std::make_unique<X86DAGToDAGISel>(args&: TM, args: TM.getOptLevel())) {} | 
|---|
| 6784 |  | 
|---|
| 6785 | /// This pass converts a legalized DAG into a X86-specific DAG, | 
|---|
| 6786 | /// ready for instruction scheduling. | 
|---|
| 6787 | FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, | 
|---|
| 6788 | CodeGenOptLevel OptLevel) { | 
|---|
| 6789 | return new X86DAGToDAGISelLegacy(TM, OptLevel); | 
|---|
| 6790 | } | 
|---|
| 6791 |  | 
|---|