| 1 | //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file implements the targeting of the Machinelegalizer class for X86. |
| 10 | /// \todo This should be generated by TableGen. |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "X86LegalizerInfo.h" |
| 14 | #include "X86Subtarget.h" |
| 15 | #include "X86TargetMachine.h" |
| 16 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| 17 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
| 18 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 19 | #include "llvm/CodeGen/MachineConstantPool.h" |
| 20 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 21 | #include "llvm/CodeGen/ValueTypes.h" |
| 22 | #include "llvm/IR/DerivedTypes.h" |
| 23 | #include "llvm/IR/Type.h" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace TargetOpcode; |
| 27 | using namespace LegalizeActions; |
| 28 | using namespace LegalityPredicates; |
| 29 | |
| 30 | X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, |
| 31 | const X86TargetMachine &TM) |
| 32 | : Subtarget(STI) { |
| 33 | |
| 34 | bool Is64Bit = Subtarget.is64Bit(); |
| 35 | bool HasCMOV = Subtarget.canUseCMOV(); |
| 36 | bool HasSSE1 = Subtarget.hasSSE1(); |
| 37 | bool HasSSE2 = Subtarget.hasSSE2(); |
| 38 | bool HasSSE41 = Subtarget.hasSSE41(); |
| 39 | bool HasAVX = Subtarget.hasAVX(); |
| 40 | bool HasAVX2 = Subtarget.hasAVX2(); |
| 41 | bool HasAVX512 = Subtarget.hasAVX512(); |
| 42 | bool HasVLX = Subtarget.hasVLX(); |
| 43 | bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI(); |
| 44 | bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI(); |
| 45 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
| 46 | bool HasPOPCNT = Subtarget.hasPOPCNT(); |
| 47 | bool HasLZCNT = Subtarget.hasLZCNT(); |
| 48 | bool HasBMI = Subtarget.hasBMI(); |
| 49 | |
| 50 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0)); |
| 51 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
| 52 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
| 53 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
| 54 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 55 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 56 | const LLT s80 = LLT::scalar(SizeInBits: 80); |
| 57 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
| 58 | const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32; |
| 59 | const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
| 60 | const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8); |
| 61 | |
| 62 | const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
| 63 | const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
| 64 | const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
| 65 | const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
| 66 | const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0); |
| 67 | |
| 68 | const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8); |
| 69 | const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16); |
| 70 | const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32); |
| 71 | const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64); |
| 72 | const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0); |
| 73 | |
| 74 | const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8); |
| 75 | const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16); |
| 76 | const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32); |
| 77 | const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64); |
| 78 | |
| 79 | const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8; |
| 80 | const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16; |
| 81 | const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32; |
| 82 | const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64; |
| 83 | |
| 84 | // todo: AVX512 bool vector predicate types |
| 85 | |
| 86 | // implicit/constants |
| 87 | // 32/64-bits needs support for s64/s128 to handle cases: |
| 88 | // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF |
| 89 | // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF |
| 90 | getActionDefinitionsBuilder(Opcode: G_IMPLICIT_DEF) |
| 91 | .legalFor(Types: {p0, s1, s8, s16, s32, s64}) |
| 92 | .legalFor(Pred: Is64Bit, Types: {s128}); |
| 93 | |
| 94 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
| 95 | .legalFor(Types: {p0, s8, s16, s32}) |
| 96 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 97 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 98 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
| 99 | |
| 100 | getActionDefinitionsBuilder( |
| 101 | Opcodes: {G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, |
| 102 | G_FASIN, G_FTAN, G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, |
| 103 | G_FEXP2, G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS}) |
| 104 | .libcall(); |
| 105 | |
| 106 | getActionDefinitionsBuilder(Opcode: G_FSQRT) |
| 107 | .legalFor(Pred: HasSSE1 || UseX87, Types: {s32}) |
| 108 | .legalFor(Pred: HasSSE2 || UseX87, Types: {s64}) |
| 109 | .legalFor(Pred: UseX87, Types: {s80}); |
| 110 | |
| 111 | // merge/unmerge |
| 112 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
| 113 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
| 114 | unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; |
| 115 | getActionDefinitionsBuilder(Opcode: Op) |
| 116 | .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8) |
| 117 | .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16) |
| 118 | .minScalar(TypeIdx: LitTyIdx, Ty: s8) |
| 119 | .minScalar(TypeIdx: BigTyIdx, Ty: s32) |
| 120 | .legalIf(Predicate: [=](const LegalityQuery &Q) { |
| 121 | switch (Q.Types[BigTyIdx].getSizeInBits()) { |
| 122 | case 16: |
| 123 | case 32: |
| 124 | case 64: |
| 125 | case 128: |
| 126 | case 256: |
| 127 | case 512: |
| 128 | break; |
| 129 | default: |
| 130 | return false; |
| 131 | } |
| 132 | switch (Q.Types[LitTyIdx].getSizeInBits()) { |
| 133 | case 8: |
| 134 | case 16: |
| 135 | case 32: |
| 136 | case 64: |
| 137 | case 128: |
| 138 | case 256: |
| 139 | return true; |
| 140 | default: |
| 141 | return false; |
| 142 | } |
| 143 | }); |
| 144 | } |
| 145 | |
| 146 | // integer addition/subtraction |
| 147 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
| 148 | .legalFor(Types: {s8, s16, s32}) |
| 149 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 150 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
| 151 | .legalFor(Pred: HasAVX2, Types: {v32s8, v16s16, v8s32, v4s64}) |
| 152 | .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64}) |
| 153 | .legalFor(Pred: HasBWI, Types: {v64s8, v32s16}) |
| 154 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
| 155 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 156 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 157 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
| 158 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16)) |
| 159 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
| 160 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
| 161 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2)) |
| 162 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 163 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 164 | .scalarize(TypeIdx: 0); |
| 165 | |
| 166 | getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}) |
| 167 | .legalFor(Types: {{s8, s1}, {s16, s1}, {s32, s1}}) |
| 168 | .legalFor(Pred: Is64Bit, Types: {{s64, s1}}) |
| 169 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 170 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 171 | .clampScalar(TypeIdx: 1, MinTy: s1, MaxTy: s1) |
| 172 | .scalarize(TypeIdx: 0); |
| 173 | |
| 174 | // integer multiply |
| 175 | getActionDefinitionsBuilder(Opcode: G_MUL) |
| 176 | .legalFor(Types: {s8, s16, s32}) |
| 177 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 178 | .legalFor(Pred: HasSSE2, Types: {v8s16}) |
| 179 | .legalFor(Pred: HasSSE41, Types: {v4s32}) |
| 180 | .legalFor(Pred: HasAVX2, Types: {v16s16, v8s32}) |
| 181 | .legalFor(Pred: HasAVX512, Types: {v16s32}) |
| 182 | .legalFor(Pred: HasDQI, Types: {v8s64}) |
| 183 | .legalFor(Pred: HasDQI && HasVLX, Types: {v2s64, v4s64}) |
| 184 | .legalFor(Pred: HasBWI, Types: {v32s16}) |
| 185 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 186 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 187 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8) |
| 188 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
| 189 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
| 190 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8) |
| 191 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 192 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 193 | .scalarize(TypeIdx: 0); |
| 194 | |
| 195 | getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}) |
| 196 | .legalFor(Types: {s8, s16, s32}) |
| 197 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 198 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 199 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 200 | .scalarize(TypeIdx: 0); |
| 201 | |
| 202 | // integer divisions |
| 203 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM}) |
| 204 | .legalFor(Types: {s8, s16, s32}) |
| 205 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 206 | .libcallFor(Types: {s64}) |
| 207 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
| 208 | |
| 209 | // integer shifts |
| 210 | getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR}) |
| 211 | .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}}) |
| 212 | .legalFor(Pred: Is64Bit, Types: {{s64, s8}}) |
| 213 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 214 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8); |
| 215 | |
| 216 | // integer logic |
| 217 | getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR}) |
| 218 | .legalFor(Types: {s8, s16, s32}) |
| 219 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 220 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
| 221 | .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64}) |
| 222 | .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64}) |
| 223 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
| 224 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 225 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 226 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
| 227 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
| 228 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
| 229 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
| 230 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
| 231 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 232 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 233 | .scalarize(TypeIdx: 0); |
| 234 | |
| 235 | // integer comparison |
| 236 | const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0}; |
| 237 | const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0}; |
| 238 | |
| 239 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
| 240 | .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32) |
| 241 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
| 242 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar); |
| 243 | |
| 244 | // bswap |
| 245 | getActionDefinitionsBuilder(Opcode: G_BSWAP) |
| 246 | .legalFor(Types: {s32}) |
| 247 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 248 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 249 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar); |
| 250 | |
| 251 | // popcount |
| 252 | getActionDefinitionsBuilder(Opcode: G_CTPOP) |
| 253 | .legalFor(Pred: HasPOPCNT, Types: {{s16, s16}, {s32, s32}}) |
| 254 | .legalFor(Pred: HasPOPCNT && Is64Bit, Types: {{s64, s64}}) |
| 255 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 256 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 257 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 258 | |
| 259 | // count leading zeros (LZCNT) |
| 260 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
| 261 | .legalFor(Pred: HasLZCNT, Types: {{s16, s16}, {s32, s32}}) |
| 262 | .legalFor(Pred: HasLZCNT && Is64Bit, Types: {{s64, s64}}) |
| 263 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 264 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 265 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 266 | |
| 267 | // count trailing zeros |
| 268 | getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF) |
| 269 | .legalFor(Types: {{s16, s16}, {s32, s32}}) |
| 270 | .legalFor(Pred: Is64Bit, Types: {{s64, s64}}) |
| 271 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 272 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 273 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 274 | |
| 275 | getActionDefinitionsBuilder(Opcode: G_CTTZ) |
| 276 | .legalFor(Pred: HasBMI, Types: {{s16, s16}, {s32, s32}}) |
| 277 | .legalFor(Pred: HasBMI && Is64Bit, Types: {{s64, s64}}) |
| 278 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 279 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 280 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 281 | |
| 282 | // control flow |
| 283 | getActionDefinitionsBuilder(Opcode: G_PHI) |
| 284 | .legalFor(Types: {s8, s16, s32, p0}) |
| 285 | .legalFor(Pred: UseX87, Types: {s80}) |
| 286 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 287 | .legalFor(Pred: HasSSE1, Types: {v16s8, v8s16, v4s32, v2s64}) |
| 288 | .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64}) |
| 289 | .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64}) |
| 290 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
| 291 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 292 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 293 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
| 294 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
| 295 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
| 296 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
| 297 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
| 298 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 299 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 300 | .scalarize(TypeIdx: 0); |
| 301 | |
| 302 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1}); |
| 303 | |
| 304 | // pointer handling |
| 305 | const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32}; |
| 306 | const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64}; |
| 307 | |
| 308 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
| 309 | .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0}) |
| 310 | .maxScalar(TypeIdx: 0, Ty: sMaxScalar) |
| 311 | .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8); |
| 312 | |
| 313 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}}); |
| 314 | |
| 315 | getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0}); |
| 316 | |
| 317 | getActionDefinitionsBuilder(Opcode: G_PTR_ADD) |
| 318 | .legalFor(Types: {{p0, s32}}) |
| 319 | .legalFor(Pred: Is64Bit, Types: {{p0, s64}}) |
| 320 | .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32) |
| 321 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar); |
| 322 | |
| 323 | getActionDefinitionsBuilder(Opcodes: {G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor(Types: {p0}); |
| 324 | |
| 325 | // load/store: add more corner cases |
| 326 | for (unsigned Op : {G_LOAD, G_STORE}) { |
| 327 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
| 328 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1}, |
| 329 | {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1}, |
| 330 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1}, |
| 331 | {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1}, |
| 332 | {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1}, |
| 333 | {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}}); |
| 334 | if (Is64Bit) |
| 335 | Action.legalForTypesWithMemDesc( |
| 336 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}}); |
| 337 | |
| 338 | if (HasSSE1) |
| 339 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}}); |
| 340 | if (HasSSE2) |
| 341 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1}, |
| 342 | {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1}, |
| 343 | {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1}, |
| 344 | {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}}); |
| 345 | if (HasAVX) |
| 346 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1}, |
| 347 | {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1}, |
| 348 | {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1}, |
| 349 | {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1}, |
| 350 | {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}}); |
| 351 | if (HasAVX512) |
| 352 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1}, |
| 353 | {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1}, |
| 354 | {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1}, |
| 355 | {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}}); |
| 356 | |
| 357 | // X86 supports extending loads but not stores for GPRs |
| 358 | if (Op == G_LOAD) { |
| 359 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1}, |
| 360 | {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, |
| 361 | {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, |
| 362 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
| 363 | if (Is64Bit) |
| 364 | Action.legalForTypesWithMemDesc( |
| 365 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
| 366 | } else { |
| 367 | Action.customIf(Predicate: [=](const LegalityQuery &Query) { |
| 368 | return Query.Types[0] != Query.MMODescrs[0].MemoryTy; |
| 369 | }); |
| 370 | } |
| 371 | Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 372 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 373 | .scalarize(TypeIdx: 0); |
| 374 | } |
| 375 | |
| 376 | for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { |
| 377 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
| 378 | Action.legalForTypesWithMemDesc( |
| 379 | TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
| 380 | if (Is64Bit) |
| 381 | Action.legalForTypesWithMemDesc( |
| 382 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
| 383 | // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions |
| 384 | } |
| 385 | |
| 386 | // sext, zext, and anyext |
| 387 | getActionDefinitionsBuilder(Opcode: G_ANYEXT) |
| 388 | .legalFor(Types: {s8, s16, s32, s128}) |
| 389 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 390 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 391 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 392 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
| 393 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
| 394 | .scalarize(TypeIdx: 0); |
| 395 | |
| 396 | getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT}) |
| 397 | .legalFor(Types: {s8, s16, s32}) |
| 398 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 399 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 400 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 401 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
| 402 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
| 403 | .scalarize(TypeIdx: 0); |
| 404 | |
| 405 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
| 406 | |
| 407 | // fp constants |
| 408 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
| 409 | .legalFor(Types: {s32, s64}) |
| 410 | .legalFor(Pred: UseX87, Types: {s80}); |
| 411 | |
| 412 | // fp arithmetic |
| 413 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
| 414 | .legalFor(Types: {s32, s64}) |
| 415 | .legalFor(Pred: HasSSE1, Types: {v4s32}) |
| 416 | .legalFor(Pred: HasSSE2, Types: {v2s64}) |
| 417 | .legalFor(Pred: HasAVX, Types: {v8s32, v4s64}) |
| 418 | .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64}) |
| 419 | .legalFor(Pred: UseX87, Types: {s80}); |
| 420 | |
| 421 | getActionDefinitionsBuilder(Opcode: G_FABS) |
| 422 | .legalFor(Pred: UseX87, Types: {s80}) |
| 423 | .legalFor(Pred: UseX87 && !Is64Bit, Types: {s64}) |
| 424 | .lower(); |
| 425 | |
| 426 | // fp comparison |
| 427 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
| 428 | .legalFor(Pred: HasSSE1 || UseX87, Types: {s8, s32}) |
| 429 | .legalFor(Pred: HasSSE2 || UseX87, Types: {s8, s64}) |
| 430 | .legalFor(Pred: UseX87, Types: {s8, s80}) |
| 431 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
| 432 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 433 | .widenScalarToNextPow2(TypeIdx: 1); |
| 434 | |
| 435 | // fp conversions |
| 436 | getActionDefinitionsBuilder(Opcode: G_FPEXT) |
| 437 | .legalFor(Pred: HasSSE2, Types: {{s64, s32}}) |
| 438 | .legalFor(Pred: HasAVX, Types: {{v4s64, v4s32}}) |
| 439 | .legalFor(Pred: HasAVX512, Types: {{v8s64, v8s32}}); |
| 440 | |
| 441 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC) |
| 442 | .legalFor(Pred: HasSSE2, Types: {{s32, s64}}) |
| 443 | .legalFor(Pred: HasAVX, Types: {{v4s32, v4s64}}) |
| 444 | .legalFor(Pred: HasAVX512, Types: {{v8s32, v8s64}}); |
| 445 | |
| 446 | getActionDefinitionsBuilder(Opcode: G_SITOFP) |
| 447 | .legalFor(Pred: HasSSE1, Types: {{s32, s32}}) |
| 448 | .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s32, s64}}) |
| 449 | .legalFor(Pred: HasSSE2, Types: {{s64, s32}}) |
| 450 | .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}}) |
| 451 | .clampScalar(TypeIdx: 1, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar) |
| 452 | .widenScalarToNextPow2(TypeIdx: 1) |
| 453 | .customForCartesianProduct(Pred: UseX87, Types0: {s32, s64, s80}, Types1: {s16, s32, s64}) |
| 454 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 455 | .widenScalarToNextPow2(TypeIdx: 0); |
| 456 | |
| 457 | getActionDefinitionsBuilder(Opcode: G_FPTOSI) |
| 458 | .legalFor(Pred: HasSSE1, Types: {{s32, s32}}) |
| 459 | .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s64, s32}}) |
| 460 | .legalFor(Pred: HasSSE2, Types: {{s32, s64}}) |
| 461 | .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}}) |
| 462 | .clampScalar(TypeIdx: 0, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar) |
| 463 | .widenScalarToNextPow2(TypeIdx: 0) |
| 464 | .customForCartesianProduct(Pred: UseX87, Types0: {s16, s32, s64}, Types1: {s32, s64, s80}) |
| 465 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 466 | .widenScalarToNextPow2(TypeIdx: 1); |
| 467 | |
| 468 | // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types |
| 469 | // <= s32 manually. Otherwise, in custom handler there is no way to |
| 470 | // understand whether s32 is an original type and we need to promote it to |
| 471 | // s64 or s32 is obtained after widening and we shouldn't widen it to s64. |
| 472 | // |
| 473 | // For AVX512 we simply widen types as there is direct mapping from opcodes |
| 474 | // to asm instructions. |
| 475 | getActionDefinitionsBuilder(Opcode: G_UITOFP) |
| 476 | .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
| 477 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
| 478 | return !HasAVX512 && |
| 479 | ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) || |
| 480 | (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) && |
| 481 | scalarNarrowerThan(TypeIdx: 1, Size: Is64Bit ? 64 : 32)(Query); |
| 482 | }) |
| 483 | .lowerIf(Predicate: [=](const LegalityQuery &Query) { |
| 484 | // Lower conversions from s64 |
| 485 | return !HasAVX512 && |
| 486 | ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) || |
| 487 | (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) && |
| 488 | (Is64Bit && typeIs(TypeIdx: 1, TypesInit: s64)(Query)); |
| 489 | }) |
| 490 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 491 | .widenScalarToNextPow2(TypeIdx: 0) |
| 492 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar) |
| 493 | .widenScalarToNextPow2(TypeIdx: 1); |
| 494 | |
| 495 | getActionDefinitionsBuilder(Opcode: G_FPTOUI) |
| 496 | .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
| 497 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
| 498 | return !HasAVX512 && |
| 499 | ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
| 500 | (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) && |
| 501 | scalarNarrowerThan(TypeIdx: 0, Size: Is64Bit ? 64 : 32)(Query); |
| 502 | }) |
| 503 | // TODO: replace with customized legalization using |
| 504 | // specifics of cvttsd2si. The selection of this node requires |
| 505 | // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced |
| 506 | // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. |
| 507 | .lowerIf(Predicate: [=](const LegalityQuery &Query) { |
| 508 | return !HasAVX512 && |
| 509 | ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
| 510 | (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) && |
| 511 | (Is64Bit && typeIs(TypeIdx: 0, TypesInit: s64)(Query)); |
| 512 | }) |
| 513 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar) |
| 514 | .widenScalarToNextPow2(TypeIdx: 0) |
| 515 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 516 | .widenScalarToNextPow2(TypeIdx: 1); |
| 517 | |
| 518 | // vector ops |
| 519 | getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR) |
| 520 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
| 521 | return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) || |
| 522 | (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) || |
| 523 | (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) || |
| 524 | (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8})); |
| 525 | }) |
| 526 | .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector) |
| 527 | .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector) |
| 528 | .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector) |
| 529 | .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector) |
| 530 | .moreElementsToNextPow2(TypeIdx: 0); |
| 531 | |
| 532 | getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT}) |
| 533 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
| 534 | unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1; |
| 535 | unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0; |
| 536 | return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
| 537 | TypesInit: {{v16s8, v32s8}, |
| 538 | {v8s16, v16s16}, |
| 539 | {v4s32, v8s32}, |
| 540 | {v2s64, v4s64}})(Query)) || |
| 541 | (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
| 542 | TypesInit: {{v16s8, v64s8}, |
| 543 | {v32s8, v64s8}, |
| 544 | {v8s16, v32s16}, |
| 545 | {v16s16, v32s16}, |
| 546 | {v4s32, v16s32}, |
| 547 | {v8s32, v16s32}, |
| 548 | {v2s64, v8s64}, |
| 549 | {v4s64, v8s64}})(Query)); |
| 550 | }); |
| 551 | |
| 552 | // todo: only permit dst types up to max legal vector register size? |
| 553 | getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS) |
| 554 | .legalFor( |
| 555 | Pred: HasSSE1, |
| 556 | Types: {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}}) |
| 557 | .legalFor(Pred: HasAVX, Types: {{v64s8, v16s8}, |
| 558 | {v64s8, v32s8}, |
| 559 | {v32s16, v8s16}, |
| 560 | {v32s16, v16s16}, |
| 561 | {v16s32, v4s32}, |
| 562 | {v16s32, v8s32}, |
| 563 | {v8s64, v2s64}, |
| 564 | {v8s64, v4s64}}); |
| 565 | |
| 566 | // todo: vectors and address spaces |
| 567 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
| 568 | .legalFor(Types: {{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) |
| 569 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 570 | .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar) |
| 571 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
| 572 | |
| 573 | // memory intrinsics |
| 574 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
| 575 | |
| 576 | getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE}) |
| 577 | .lower(); |
| 578 | |
| 579 | // fp intrinsics |
| 580 | getActionDefinitionsBuilder(Opcode: G_INTRINSIC_ROUNDEVEN) |
| 581 | .scalarize(TypeIdx: 0) |
| 582 | .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32)) |
| 583 | .libcall(); |
| 584 | |
| 585 | getActionDefinitionsBuilder(Opcodes: {G_FREEZE, G_CONSTANT_FOLD_BARRIER}) |
| 586 | .legalFor(Types: {s8, s16, s32, s64, p0}) |
| 587 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 588 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
| 589 | |
| 590 | getLegacyLegalizerInfo().computeTables(); |
| 591 | verify(MII: *STI.getInstrInfo()); |
| 592 | } |
| 593 | |
| 594 | bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
| 595 | LostDebugLocObserver &LocObserver) const { |
| 596 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 597 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
| 598 | switch (MI.getOpcode()) { |
| 599 | default: |
| 600 | // No idea what to do. |
| 601 | return false; |
| 602 | case TargetOpcode::G_BUILD_VECTOR: |
| 603 | return legalizeBuildVector(MI, MRI, Helper); |
| 604 | case TargetOpcode::G_FPTOUI: |
| 605 | return legalizeFPTOUI(MI, MRI, Helper); |
| 606 | case TargetOpcode::G_UITOFP: |
| 607 | return legalizeUITOFP(MI, MRI, Helper); |
| 608 | case TargetOpcode::G_STORE: |
| 609 | return legalizeNarrowingStore(MI, MRI, Helper); |
| 610 | case TargetOpcode::G_SITOFP: |
| 611 | return legalizeSITOFP(MI, MRI, Helper); |
| 612 | case TargetOpcode::G_FPTOSI: |
| 613 | return legalizeFPTOSI(MI, MRI, Helper); |
| 614 | } |
| 615 | llvm_unreachable("expected switch to return" ); |
| 616 | } |
| 617 | |
| 618 | bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI, |
| 619 | MachineRegisterInfo &MRI, |
| 620 | LegalizerHelper &Helper) const { |
| 621 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 622 | MachineFunction &MF = *MI.getMF(); |
| 623 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 624 | |
| 625 | assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 || |
| 626 | SrcTy.getSizeInBits() == 64) && |
| 627 | "Unexpected source type for SITOFP in X87 mode." ); |
| 628 | |
| 629 | TypeSize MemSize = SrcTy.getSizeInBytes(); |
| 630 | MachinePointerInfo PtrInfo; |
| 631 | Align Alignmt = Helper.getStackTemporaryAlignment(Type: SrcTy); |
| 632 | auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo); |
| 633 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
| 634 | PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize)); |
| 635 | |
| 636 | // Store the integer value on the FPU stack. |
| 637 | MIRBuilder.buildStore(Val: Src, Addr: SlotPointer, MMO&: *StoreMMO); |
| 638 | |
| 639 | MachineMemOperand *LoadMMO = MF.getMachineMemOperand( |
| 640 | PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Align(MemSize)); |
| 641 | MIRBuilder.buildInstr(Opcode: X86::G_FILD) |
| 642 | .addDef(RegNo: Dst) |
| 643 | .addUse(RegNo: SlotPointer.getReg(Idx: 0)) |
| 644 | .addMemOperand(MMO: LoadMMO); |
| 645 | |
| 646 | MI.eraseFromParent(); |
| 647 | return true; |
| 648 | } |
| 649 | |
| 650 | bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI, |
| 651 | MachineRegisterInfo &MRI, |
| 652 | LegalizerHelper &Helper) const { |
| 653 | MachineFunction &MF = *MI.getMF(); |
| 654 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 655 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 656 | |
| 657 | TypeSize MemSize = DstTy.getSizeInBytes(); |
| 658 | MachinePointerInfo PtrInfo; |
| 659 | Align Alignmt = Helper.getStackTemporaryAlignment(Type: DstTy); |
| 660 | auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo); |
| 661 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
| 662 | PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize)); |
| 663 | |
| 664 | MIRBuilder.buildInstr(Opcode: X86::G_FIST) |
| 665 | .addUse(RegNo: Src) |
| 666 | .addUse(RegNo: SlotPointer.getReg(Idx: 0)) |
| 667 | .addMemOperand(MMO: StoreMMO); |
| 668 | |
| 669 | MIRBuilder.buildLoad(Res: Dst, Addr: SlotPointer, PtrInfo, Alignment: Align(MemSize)); |
| 670 | MI.eraseFromParent(); |
| 671 | return true; |
| 672 | } |
| 673 | |
| 674 | bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, |
| 675 | MachineRegisterInfo &MRI, |
| 676 | LegalizerHelper &Helper) const { |
| 677 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 678 | const auto &BuildVector = cast<GBuildVector>(Val&: MI); |
| 679 | Register Dst = BuildVector.getReg(Idx: 0); |
| 680 | LLT DstTy = MRI.getType(Reg: Dst); |
| 681 | MachineFunction &MF = MIRBuilder.getMF(); |
| 682 | LLVMContext &Ctx = MF.getFunction().getContext(); |
| 683 | uint64_t DstTySize = DstTy.getScalarSizeInBits(); |
| 684 | |
| 685 | SmallVector<Constant *, 4> CstIdxs; |
| 686 | for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) { |
| 687 | Register Source = BuildVector.getSourceReg(I: i); |
| 688 | |
| 689 | auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI); |
| 690 | if (ValueAndReg) { |
| 691 | CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value)); |
| 692 | continue; |
| 693 | } |
| 694 | |
| 695 | auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI); |
| 696 | if (FPValueAndReg) { |
| 697 | CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value)); |
| 698 | continue; |
| 699 | } |
| 700 | |
| 701 | if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) { |
| 702 | CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize))); |
| 703 | continue; |
| 704 | } |
| 705 | return false; |
| 706 | } |
| 707 | |
| 708 | Constant *ConstVal = ConstantVector::get(V: CstIdxs); |
| 709 | |
| 710 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
| 711 | unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); |
| 712 | Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType())); |
| 713 | auto Addr = MIRBuilder.buildConstantPool( |
| 714 | Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)), |
| 715 | Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment)); |
| 716 | MachineMemOperand *MMO = |
| 717 | MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF), |
| 718 | f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment); |
| 719 | |
| 720 | MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO); |
| 721 | MI.eraseFromParent(); |
| 722 | return true; |
| 723 | } |
| 724 | |
| 725 | bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, |
| 726 | MachineRegisterInfo &MRI, |
| 727 | LegalizerHelper &Helper) const { |
| 728 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 729 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 730 | unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); |
| 731 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 732 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 733 | |
| 734 | // Simply reuse FPTOSI when it is possible to widen the type |
| 735 | if (DstSizeInBits <= 32) { |
| 736 | auto Casted = MIRBuilder.buildFPTOSI(Dst: DstTy == s32 ? s64 : s32, Src0: Src); |
| 737 | MIRBuilder.buildTrunc(Res: Dst, Op: Casted); |
| 738 | MI.eraseFromParent(); |
| 739 | return true; |
| 740 | } |
| 741 | |
| 742 | return false; |
| 743 | } |
| 744 | |
| 745 | bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, |
| 746 | MachineRegisterInfo &MRI, |
| 747 | LegalizerHelper &Helper) const { |
| 748 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 749 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 750 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 751 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 752 | |
| 753 | // Simply reuse SITOFP when it is possible to widen the type |
| 754 | if (SrcTy.getSizeInBits() <= 32) { |
| 755 | auto Ext = MIRBuilder.buildZExt(Res: SrcTy == s32 ? s64 : s32, Op: Src); |
| 756 | MIRBuilder.buildSITOFP(Dst, Src0: Ext); |
| 757 | MI.eraseFromParent(); |
| 758 | return true; |
| 759 | } |
| 760 | |
| 761 | return false; |
| 762 | } |
| 763 | |
| 764 | bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, |
| 765 | MachineRegisterInfo &MRI, |
| 766 | LegalizerHelper &Helper) const { |
| 767 | auto &Store = cast<GStore>(Val&: MI); |
| 768 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 769 | MachineMemOperand &MMO = **Store.memoperands_begin(); |
| 770 | MachineFunction &MF = MIRBuilder.getMF(); |
| 771 | LLT ValTy = MRI.getType(Reg: Store.getValueReg()); |
| 772 | auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: ValTy); |
| 773 | |
| 774 | Helper.Observer.changingInstr(MI&: Store); |
| 775 | Store.setMemRefs(MF, MemRefs: {NewMMO}); |
| 776 | Helper.Observer.changedInstr(MI&: Store); |
| 777 | return true; |
| 778 | } |
| 779 | |
| 780 | bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
| 781 | MachineInstr &MI) const { |
| 782 | return true; |
| 783 | } |
| 784 | |