| 1 | //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file implements the targeting of the Machinelegalizer class for X86. |
| 10 | /// \todo This should be generated by TableGen. |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "X86LegalizerInfo.h" |
| 14 | #include "X86Subtarget.h" |
| 15 | #include "X86TargetMachine.h" |
| 16 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| 17 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
| 18 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 19 | #include "llvm/CodeGen/MachineConstantPool.h" |
| 20 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 21 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 22 | #include "llvm/CodeGen/ValueTypes.h" |
| 23 | #include "llvm/IR/DerivedTypes.h" |
| 24 | #include "llvm/IR/IntrinsicsX86.h" |
| 25 | #include "llvm/IR/Type.h" |
| 26 | |
| 27 | using namespace llvm; |
| 28 | using namespace TargetOpcode; |
| 29 | using namespace LegalizeActions; |
| 30 | using namespace LegalityPredicates; |
| 31 | |
| 32 | X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, |
| 33 | const X86TargetMachine &TM) |
| 34 | : Subtarget(STI) { |
| 35 | |
| 36 | bool Is64Bit = Subtarget.is64Bit(); |
| 37 | bool HasCMOV = Subtarget.canUseCMOV(); |
| 38 | bool HasSSE1 = Subtarget.hasSSE1(); |
| 39 | bool HasSSE2 = Subtarget.hasSSE2(); |
| 40 | bool HasSSE41 = Subtarget.hasSSE41(); |
| 41 | bool HasAVX = Subtarget.hasAVX(); |
| 42 | bool HasAVX2 = Subtarget.hasAVX2(); |
| 43 | bool HasAVX512 = Subtarget.hasAVX512(); |
| 44 | bool HasVLX = Subtarget.hasVLX(); |
| 45 | bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI(); |
| 46 | bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI(); |
| 47 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
| 48 | bool HasPOPCNT = Subtarget.hasPOPCNT(); |
| 49 | bool HasLZCNT = Subtarget.hasLZCNT(); |
| 50 | bool HasBMI = Subtarget.hasBMI(); |
| 51 | |
| 52 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0)); |
| 53 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
| 54 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
| 55 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
| 56 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 57 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 58 | const LLT s80 = LLT::scalar(SizeInBits: 80); |
| 59 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
| 60 | const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32; |
| 61 | const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
| 62 | const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8); |
| 63 | |
| 64 | const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
| 65 | const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
| 66 | const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
| 67 | const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
| 68 | const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0); |
| 69 | |
| 70 | const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8); |
| 71 | const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16); |
| 72 | const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32); |
| 73 | const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64); |
| 74 | const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0); |
| 75 | |
| 76 | const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8); |
| 77 | const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16); |
| 78 | const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32); |
| 79 | const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64); |
| 80 | |
| 81 | const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8; |
| 82 | const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16; |
| 83 | const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32; |
| 84 | const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64; |
| 85 | |
| 86 | // todo: AVX512 bool vector predicate types |
| 87 | |
| 88 | // implicit/constants |
| 89 | // 32/64-bits needs support for s64/s128 to handle cases: |
| 90 | // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF |
| 91 | // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF |
| 92 | getActionDefinitionsBuilder( |
| 93 | Opcodes: {G_IMPLICIT_DEF, G_PHI, G_FREEZE, G_CONSTANT_FOLD_BARRIER}) |
| 94 | .legalFor(Types: {p0, s1, s8, s16, s32, s64}) |
| 95 | .legalFor(Pred: UseX87, Types: {s80}) |
| 96 | .legalFor(Pred: Is64Bit, Types: {s128}) |
| 97 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
| 98 | .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64}) |
| 99 | .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64}) |
| 100 | .widenScalarOrEltToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 101 | .clampScalarOrElt(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 102 | .moreElementsToNextPow2(TypeIdx: 0) |
| 103 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
| 104 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 105 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 106 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
| 107 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
| 108 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
| 109 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
| 110 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
| 111 | .clampMaxNumElements(TypeIdx: 0, EltTy: p0, |
| 112 | MaxElements: Is64Bit ? s64MaxVector.getNumElements() |
| 113 | : s32MaxVector.getNumElements()) |
| 114 | .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0); |
| 115 | |
| 116 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
| 117 | .legalFor(Types: {p0, s8, s16, s32}) |
| 118 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 119 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 120 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
| 121 | |
| 122 | getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_LLROUND}) |
| 123 | .widenScalarIf(Predicate: typeIs(TypeIdx: 1, TypesInit: s16), |
| 124 | Mutation: [=](const LegalityQuery &) { |
| 125 | return std::pair<unsigned, LLT>(1, s32); |
| 126 | }) |
| 127 | .libcall(); |
| 128 | |
| 129 | getActionDefinitionsBuilder( |
| 130 | Opcodes: {G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, G_FASIN, G_FTAN, |
| 131 | G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, G_FEXP10, |
| 132 | G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS, G_FCEIL, G_FFLOOR}) |
| 133 | .libcall(); |
| 134 | |
| 135 | getActionDefinitionsBuilder(Opcode: G_FSQRT) |
| 136 | .legalFor(Pred: HasSSE1 || UseX87, Types: {s32}) |
| 137 | .legalFor(Pred: HasSSE2 || UseX87, Types: {s64}) |
| 138 | .legalFor(Pred: UseX87, Types: {s80}); |
| 139 | |
| 140 | getActionDefinitionsBuilder(Opcodes: {G_GET_ROUNDING, G_SET_ROUNDING}) |
| 141 | .customFor(Types: {s32}); |
| 142 | |
| 143 | // merge/unmerge |
| 144 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
| 145 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
| 146 | unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; |
| 147 | getActionDefinitionsBuilder(Opcode: Op) |
| 148 | .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8) |
| 149 | .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16) |
| 150 | .minScalar(TypeIdx: LitTyIdx, Ty: s8) |
| 151 | .minScalar(TypeIdx: BigTyIdx, Ty: s32) |
| 152 | .legalIf(Predicate: [=](const LegalityQuery &Q) { |
| 153 | switch (Q.Types[BigTyIdx].getSizeInBits()) { |
| 154 | case 16: |
| 155 | case 32: |
| 156 | case 64: |
| 157 | case 128: |
| 158 | case 256: |
| 159 | case 512: |
| 160 | break; |
| 161 | default: |
| 162 | return false; |
| 163 | } |
| 164 | switch (Q.Types[LitTyIdx].getSizeInBits()) { |
| 165 | case 8: |
| 166 | case 16: |
| 167 | case 32: |
| 168 | case 64: |
| 169 | case 128: |
| 170 | case 256: |
| 171 | return true; |
| 172 | default: |
| 173 | return false; |
| 174 | } |
| 175 | }); |
| 176 | } |
| 177 | |
| 178 | getActionDefinitionsBuilder(Opcodes: {G_UMIN, G_UMAX, G_SMIN, G_SMAX}) |
| 179 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 180 | .lower(); |
| 181 | |
| 182 | // integer addition/subtraction |
| 183 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
| 184 | .legalFor(Types: {s8, s16, s32}) |
| 185 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 186 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
| 187 | .legalFor(Pred: HasAVX2, Types: {v32s8, v16s16, v8s32, v4s64}) |
| 188 | .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64}) |
| 189 | .legalFor(Pred: HasBWI, Types: {v64s8, v32s16}) |
| 190 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
| 191 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 192 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 193 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
| 194 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16)) |
| 195 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
| 196 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
| 197 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2)) |
| 198 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 199 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 200 | .scalarize(TypeIdx: 0); |
| 201 | |
| 202 | getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}) |
| 203 | .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}}) |
| 204 | .legalFor(Pred: Is64Bit, Types: {{s64, s8}}) |
| 205 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 206 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 207 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8) |
| 208 | .scalarize(TypeIdx: 0); |
| 209 | |
| 210 | // integer multiply |
| 211 | getActionDefinitionsBuilder(Opcode: G_MUL) |
| 212 | .legalFor(Types: {s8, s16, s32}) |
| 213 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 214 | .legalFor(Pred: HasSSE2, Types: {v8s16}) |
| 215 | .legalFor(Pred: HasSSE41, Types: {v4s32}) |
| 216 | .legalFor(Pred: HasAVX2, Types: {v16s16, v8s32}) |
| 217 | .legalFor(Pred: HasAVX512, Types: {v16s32}) |
| 218 | .legalFor(Pred: HasDQI, Types: {v8s64}) |
| 219 | .legalFor(Pred: HasDQI && HasVLX, Types: {v2s64, v4s64}) |
| 220 | .legalFor(Pred: HasBWI, Types: {v32s16}) |
| 221 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 222 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 223 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8) |
| 224 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
| 225 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
| 226 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8) |
| 227 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 228 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 229 | .scalarize(TypeIdx: 0); |
| 230 | |
| 231 | getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}) |
| 232 | .legalFor(Types: {s8, s16, s32}) |
| 233 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 234 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 235 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 236 | .scalarize(TypeIdx: 0); |
| 237 | |
| 238 | // integer divisions |
| 239 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM}) |
| 240 | .legalFor(Types: {s8, s16, s32}) |
| 241 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 242 | .libcallFor(Types: {s64}) |
| 243 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
| 244 | |
| 245 | // integer shifts |
| 246 | getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR}) |
| 247 | .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}}) |
| 248 | .legalFor(Pred: Is64Bit, Types: {{s64, s8}}) |
| 249 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 250 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8); |
| 251 | |
| 252 | // integer logic |
| 253 | getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR}) |
| 254 | .legalFor(Types: {s8, s16, s32}) |
| 255 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 256 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
| 257 | .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64}) |
| 258 | .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64}) |
| 259 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
| 260 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
| 261 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
| 262 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
| 263 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
| 264 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
| 265 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
| 266 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
| 267 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 268 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 269 | .scalarize(TypeIdx: 0); |
| 270 | |
| 271 | // integer comparison |
| 272 | const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0}; |
| 273 | const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0}; |
| 274 | |
| 275 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
| 276 | .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32) |
| 277 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
| 278 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
| 279 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar); |
| 280 | |
| 281 | // bswap |
| 282 | getActionDefinitionsBuilder(Opcode: G_BSWAP) |
| 283 | .legalFor(Types: {s32}) |
| 284 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 285 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
| 286 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar); |
| 287 | |
| 288 | // popcount |
| 289 | getActionDefinitionsBuilder(Opcode: G_CTPOP) |
| 290 | .legalFor(Pred: HasPOPCNT, Types: {{s16, s16}, {s32, s32}}) |
| 291 | .legalFor(Pred: HasPOPCNT && Is64Bit, Types: {{s64, s64}}) |
| 292 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 293 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 294 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 295 | |
| 296 | // count leading zeros (LZCNT) |
| 297 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
| 298 | .legalFor(Pred: HasLZCNT, Types: {{s16, s16}, {s32, s32}}) |
| 299 | .legalFor(Pred: HasLZCNT && Is64Bit, Types: {{s64, s64}}) |
| 300 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 301 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 302 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 303 | |
| 304 | // count trailing zeros |
| 305 | getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF) |
| 306 | .legalFor(Types: {{s16, s16}, {s32, s32}}) |
| 307 | .legalFor(Pred: Is64Bit, Types: {{s64, s64}}) |
| 308 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 309 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 310 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 311 | |
| 312 | getActionDefinitionsBuilder(Opcode: G_CTTZ) |
| 313 | .legalFor(Pred: HasBMI, Types: {{s16, s16}, {s32, s32}}) |
| 314 | .legalFor(Pred: HasBMI && Is64Bit, Types: {{s64, s64}}) |
| 315 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
| 316 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
| 317 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
| 318 | |
| 319 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1}); |
| 320 | |
| 321 | // pointer handling |
| 322 | const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32}; |
| 323 | const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64}; |
| 324 | |
| 325 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
| 326 | .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0}) |
| 327 | .maxScalar(TypeIdx: 0, Ty: sMaxScalar) |
| 328 | .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8); |
| 329 | |
| 330 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}}); |
| 331 | |
| 332 | getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0}); |
| 333 | |
| 334 | getActionDefinitionsBuilder(Opcode: G_PTR_ADD) |
| 335 | .legalFor(Types: {{p0, s32}}) |
| 336 | .legalFor(Pred: Is64Bit, Types: {{p0, s64}}) |
| 337 | .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32) |
| 338 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar); |
| 339 | |
| 340 | getActionDefinitionsBuilder(Opcodes: {G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor(Types: {p0}); |
| 341 | |
| 342 | // load/store: add more corner cases |
| 343 | for (unsigned Op : {G_LOAD, G_STORE}) { |
| 344 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
| 345 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1}, |
| 346 | {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1}, |
| 347 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1}, |
| 348 | {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1}, |
| 349 | {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1}, |
| 350 | {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}}); |
| 351 | if (Is64Bit) |
| 352 | Action.legalForTypesWithMemDesc( |
| 353 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}}); |
| 354 | |
| 355 | if (HasSSE1) |
| 356 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}}); |
| 357 | if (HasSSE2) |
| 358 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1}, |
| 359 | {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1}, |
| 360 | {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1}, |
| 361 | {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}}); |
| 362 | if (HasAVX) |
| 363 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1}, |
| 364 | {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1}, |
| 365 | {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1}, |
| 366 | {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1}, |
| 367 | {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}}); |
| 368 | if (HasAVX512) |
| 369 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1}, |
| 370 | {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1}, |
| 371 | {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1}, |
| 372 | {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}}); |
| 373 | |
| 374 | // X86 supports extending loads but not stores for GPRs |
| 375 | if (Op == G_LOAD) { |
| 376 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1}, |
| 377 | {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, |
| 378 | {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, |
| 379 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
| 380 | if (Is64Bit) |
| 381 | Action.legalForTypesWithMemDesc( |
| 382 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
| 383 | } else { |
| 384 | Action.customIf(Predicate: [=](const LegalityQuery &Query) { |
| 385 | return Query.Types[0] != Query.MMODescrs[0].MemoryTy; |
| 386 | }); |
| 387 | } |
| 388 | Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 389 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 390 | .scalarize(TypeIdx: 0); |
| 391 | } |
| 392 | |
| 393 | for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { |
| 394 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
| 395 | Action.legalForTypesWithMemDesc( |
| 396 | TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
| 397 | if (Is64Bit) |
| 398 | Action.legalForTypesWithMemDesc( |
| 399 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
| 400 | // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions |
| 401 | } |
| 402 | |
| 403 | // sext, zext, and anyext |
| 404 | getActionDefinitionsBuilder(Opcode: G_ANYEXT) |
| 405 | .legalFor(Types: {s8, s16, s32, s128}) |
| 406 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 407 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 408 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 409 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
| 410 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
| 411 | .scalarize(TypeIdx: 0); |
| 412 | |
| 413 | getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT}) |
| 414 | .legalFor(Types: {s8, s16, s32}) |
| 415 | .legalFor(Pred: Is64Bit, Types: {s64}) |
| 416 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 417 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
| 418 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
| 419 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
| 420 | .scalarize(TypeIdx: 0); |
| 421 | |
| 422 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
| 423 | |
| 424 | // fp constants |
| 425 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
| 426 | .legalFor(Types: {s32, s64}) |
| 427 | .legalFor(Pred: UseX87, Types: {s80}); |
| 428 | |
| 429 | // fp arithmetic |
| 430 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
| 431 | .legalFor(Types: {s32, s64}) |
| 432 | .legalFor(Pred: HasSSE1, Types: {v4s32}) |
| 433 | .legalFor(Pred: HasSSE2, Types: {v2s64}) |
| 434 | .legalFor(Pred: HasAVX, Types: {v8s32, v4s64}) |
| 435 | .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64}) |
| 436 | .legalFor(Pred: UseX87, Types: {s80}); |
| 437 | |
| 438 | getActionDefinitionsBuilder(Opcode: G_FABS) |
| 439 | .legalFor(Pred: UseX87, Types: {s80}) |
| 440 | .legalFor(Pred: UseX87 && !Is64Bit, Types: {s64}) |
| 441 | .lower(); |
| 442 | |
| 443 | // fp comparison |
| 444 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
| 445 | .legalFor(Pred: HasSSE1 || UseX87, Types: {s8, s32}) |
| 446 | .legalFor(Pred: HasSSE2 || UseX87, Types: {s8, s64}) |
| 447 | .legalFor(Pred: UseX87, Types: {s8, s80}) |
| 448 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
| 449 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 450 | .widenScalarToNextPow2(TypeIdx: 1); |
| 451 | |
| 452 | // fp conversions |
| 453 | getActionDefinitionsBuilder(Opcode: G_FPEXT) |
| 454 | .legalFor(Pred: HasSSE2, Types: {{s64, s32}}) |
| 455 | .legalFor(Pred: HasAVX, Types: {{v4s64, v4s32}}) |
| 456 | .legalFor(Pred: HasAVX512, Types: {{v8s64, v8s32}}) |
| 457 | .libcall(); |
| 458 | |
| 459 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC) |
| 460 | .legalFor(Pred: HasSSE2, Types: {{s32, s64}}) |
| 461 | .legalFor(Pred: HasAVX, Types: {{v4s32, v4s64}}) |
| 462 | .legalFor(Pred: HasAVX512, Types: {{v8s32, v8s64}}); |
| 463 | |
| 464 | getActionDefinitionsBuilder(Opcode: G_SITOFP) |
| 465 | .legalFor(Pred: HasSSE1, Types: {{s32, s32}}) |
| 466 | .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s32, s64}}) |
| 467 | .legalFor(Pred: HasSSE2, Types: {{s64, s32}}) |
| 468 | .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}}) |
| 469 | .clampScalar(TypeIdx: 1, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar) |
| 470 | .widenScalarToNextPow2(TypeIdx: 1) |
| 471 | .customForCartesianProduct(Pred: UseX87, Types0: {s32, s64, s80}, Types1: {s16, s32, s64}) |
| 472 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 473 | .widenScalarToNextPow2(TypeIdx: 0); |
| 474 | |
| 475 | getActionDefinitionsBuilder(Opcode: G_FPTOSI) |
| 476 | .legalFor(Pred: HasSSE1, Types: {{s32, s32}}) |
| 477 | .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s64, s32}}) |
| 478 | .legalFor(Pred: HasSSE2, Types: {{s32, s64}}) |
| 479 | .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}}) |
| 480 | .clampScalar(TypeIdx: 0, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar) |
| 481 | .widenScalarToNextPow2(TypeIdx: 0) |
| 482 | .customForCartesianProduct(Pred: UseX87, Types0: {s16, s32, s64}, Types1: {s32, s64, s80}) |
| 483 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 484 | .widenScalarToNextPow2(TypeIdx: 1); |
| 485 | |
| 486 | // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types |
| 487 | // <= s32 manually. Otherwise, in custom handler there is no way to |
| 488 | // understand whether s32 is an original type and we need to promote it to |
| 489 | // s64 or s32 is obtained after widening and we shouldn't widen it to s64. |
| 490 | // |
| 491 | // For AVX512 we simply widen types as there is direct mapping from opcodes |
| 492 | // to asm instructions. |
| 493 | getActionDefinitionsBuilder(Opcode: G_UITOFP) |
| 494 | .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
| 495 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
| 496 | return !HasAVX512 && |
| 497 | ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) || |
| 498 | (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) && |
| 499 | scalarNarrowerThan(TypeIdx: 1, Size: Is64Bit ? 64 : 32)(Query); |
| 500 | }) |
| 501 | .lowerIf(Predicate: [=](const LegalityQuery &Query) { |
| 502 | // Lower conversions from s64 |
| 503 | return !HasAVX512 && |
| 504 | ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) || |
| 505 | (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) && |
| 506 | (Is64Bit && typeIs(TypeIdx: 1, TypesInit: s64)(Query)); |
| 507 | }) |
| 508 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 509 | .widenScalarToNextPow2(TypeIdx: 0) |
| 510 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar) |
| 511 | .widenScalarToNextPow2(TypeIdx: 1); |
| 512 | |
| 513 | getActionDefinitionsBuilder(Opcode: G_FPTOUI) |
| 514 | .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
| 515 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
| 516 | return !HasAVX512 && |
| 517 | ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
| 518 | (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) && |
| 519 | scalarNarrowerThan(TypeIdx: 0, Size: Is64Bit ? 64 : 32)(Query); |
| 520 | }) |
| 521 | // TODO: replace with customized legalization using |
| 522 | // specifics of cvttsd2si. The selection of this node requires |
| 523 | // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced |
| 524 | // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. |
| 525 | .lowerIf(Predicate: [=](const LegalityQuery &Query) { |
| 526 | return !HasAVX512 && |
| 527 | ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
| 528 | (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) && |
| 529 | (Is64Bit && typeIs(TypeIdx: 0, TypesInit: s64)(Query)); |
| 530 | }) |
| 531 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar) |
| 532 | .widenScalarToNextPow2(TypeIdx: 0) |
| 533 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
| 534 | .widenScalarToNextPow2(TypeIdx: 1); |
| 535 | |
| 536 | // vector ops |
| 537 | getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR) |
| 538 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
| 539 | return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) || |
| 540 | (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) || |
| 541 | (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) || |
| 542 | (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8})); |
| 543 | }) |
| 544 | .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector) |
| 545 | .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector) |
| 546 | .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector) |
| 547 | .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector) |
| 548 | .moreElementsToNextPow2(TypeIdx: 0); |
| 549 | |
| 550 | getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT}) |
| 551 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
| 552 | unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1; |
| 553 | unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0; |
| 554 | return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
| 555 | TypesInit: {{v16s8, v32s8}, |
| 556 | {v8s16, v16s16}, |
| 557 | {v4s32, v8s32}, |
| 558 | {v2s64, v4s64}})(Query)) || |
| 559 | (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
| 560 | TypesInit: {{v16s8, v64s8}, |
| 561 | {v32s8, v64s8}, |
| 562 | {v8s16, v32s16}, |
| 563 | {v16s16, v32s16}, |
| 564 | {v4s32, v16s32}, |
| 565 | {v8s32, v16s32}, |
| 566 | {v2s64, v8s64}, |
| 567 | {v4s64, v8s64}})(Query)); |
| 568 | }); |
| 569 | |
| 570 | // todo: only permit dst types up to max legal vector register size? |
| 571 | getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS) |
| 572 | .legalFor( |
| 573 | Pred: HasSSE1, |
| 574 | Types: {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}}) |
| 575 | .legalFor(Pred: HasAVX, Types: {{v64s8, v16s8}, |
| 576 | {v64s8, v32s8}, |
| 577 | {v32s16, v8s16}, |
| 578 | {v32s16, v16s16}, |
| 579 | {v16s32, v4s32}, |
| 580 | {v16s32, v8s32}, |
| 581 | {v8s64, v2s64}, |
| 582 | {v8s64, v4s64}}); |
| 583 | |
| 584 | // todo: vectors and address spaces |
| 585 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
| 586 | .legalFor(Types: {{s16, s32}, {s32, s32}, {p0, s32}}) |
| 587 | .legalFor(Pred: !HasCMOV, Types: {{s8, s32}}) |
| 588 | .legalFor(Pred: Is64Bit, Types: {{s64, s32}}) |
| 589 | .legalFor(Pred: UseX87, Types: {{s80, s32}}) |
| 590 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 591 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
| 592 | .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar); |
| 593 | |
| 594 | // memory intrinsics |
| 595 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
| 596 | |
| 597 | getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE}) |
| 598 | .lower(); |
| 599 | |
| 600 | // fp intrinsics |
| 601 | // fpclass for i686 is disabled for llvm issue #171992 |
| 602 | getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS) |
| 603 | .lowerFor(Pred: Is64Bit, Types: {{s1, s32}, {s1, s64}, {s1, s80}}); |
| 604 | |
| 605 | getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC_ROUNDEVEN, G_INTRINSIC_TRUNC}) |
| 606 | .scalarize(TypeIdx: 0) |
| 607 | .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32)) |
| 608 | .libcall(); |
| 609 | |
| 610 | getLegacyLegalizerInfo().computeTables(); |
| 611 | verify(MII: *STI.getInstrInfo()); |
| 612 | } |
| 613 | |
| 614 | bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
| 615 | LostDebugLocObserver &LocObserver) const { |
| 616 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 617 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
| 618 | switch (MI.getOpcode()) { |
| 619 | default: |
| 620 | // No idea what to do. |
| 621 | return false; |
| 622 | case TargetOpcode::G_BUILD_VECTOR: |
| 623 | return legalizeBuildVector(MI, MRI, Helper); |
| 624 | case TargetOpcode::G_FPTOUI: |
| 625 | return legalizeFPTOUI(MI, MRI, Helper); |
| 626 | case TargetOpcode::G_UITOFP: |
| 627 | return legalizeUITOFP(MI, MRI, Helper); |
| 628 | case TargetOpcode::G_STORE: |
| 629 | return legalizeNarrowingStore(MI, MRI, Helper); |
| 630 | case TargetOpcode::G_SITOFP: |
| 631 | return legalizeSITOFP(MI, MRI, Helper); |
| 632 | case TargetOpcode::G_FPTOSI: |
| 633 | return legalizeFPTOSI(MI, MRI, Helper); |
| 634 | case TargetOpcode::G_GET_ROUNDING: |
| 635 | return legalizeGETROUNDING(MI, MRI, Helper); |
| 636 | case TargetOpcode::G_SET_ROUNDING: |
| 637 | return legalizeSETROUNDING(MI, MRI, Helper); |
| 638 | } |
| 639 | llvm_unreachable("expected switch to return" ); |
| 640 | } |
| 641 | |
| 642 | bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI, |
| 643 | MachineRegisterInfo &MRI, |
| 644 | LegalizerHelper &Helper) const { |
| 645 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 646 | MachineFunction &MF = *MI.getMF(); |
| 647 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 648 | |
| 649 | assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 || |
| 650 | SrcTy.getSizeInBits() == 64) && |
| 651 | "Unexpected source type for SITOFP in X87 mode." ); |
| 652 | |
| 653 | TypeSize MemSize = SrcTy.getSizeInBytes(); |
| 654 | MachinePointerInfo PtrInfo; |
| 655 | Align Alignmt = Helper.getStackTemporaryAlignment(Type: SrcTy); |
| 656 | auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo); |
| 657 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
| 658 | PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize)); |
| 659 | |
| 660 | // Store the integer value on the FPU stack. |
| 661 | MIRBuilder.buildStore(Val: Src, Addr: SlotPointer, MMO&: *StoreMMO); |
| 662 | |
| 663 | MachineMemOperand *LoadMMO = MF.getMachineMemOperand( |
| 664 | PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Align(MemSize)); |
| 665 | MIRBuilder.buildInstr(Opcode: X86::G_FILD) |
| 666 | .addDef(RegNo: Dst) |
| 667 | .addUse(RegNo: SlotPointer.getReg(Idx: 0)) |
| 668 | .addMemOperand(MMO: LoadMMO); |
| 669 | |
| 670 | MI.eraseFromParent(); |
| 671 | return true; |
| 672 | } |
| 673 | |
| 674 | bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI, |
| 675 | MachineRegisterInfo &MRI, |
| 676 | LegalizerHelper &Helper) const { |
| 677 | MachineFunction &MF = *MI.getMF(); |
| 678 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 679 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 680 | |
| 681 | TypeSize MemSize = DstTy.getSizeInBytes(); |
| 682 | MachinePointerInfo PtrInfo; |
| 683 | Align Alignmt = Helper.getStackTemporaryAlignment(Type: DstTy); |
| 684 | auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo); |
| 685 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
| 686 | PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize)); |
| 687 | |
| 688 | MIRBuilder.buildInstr(Opcode: X86::G_FIST) |
| 689 | .addUse(RegNo: Src) |
| 690 | .addUse(RegNo: SlotPointer.getReg(Idx: 0)) |
| 691 | .addMemOperand(MMO: StoreMMO); |
| 692 | |
| 693 | MIRBuilder.buildLoad(Res: Dst, Addr: SlotPointer, PtrInfo, Alignment: Align(MemSize)); |
| 694 | MI.eraseFromParent(); |
| 695 | return true; |
| 696 | } |
| 697 | |
| 698 | bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, |
| 699 | MachineRegisterInfo &MRI, |
| 700 | LegalizerHelper &Helper) const { |
| 701 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 702 | const auto &BuildVector = cast<GBuildVector>(Val&: MI); |
| 703 | Register Dst = BuildVector.getReg(Idx: 0); |
| 704 | LLT DstTy = MRI.getType(Reg: Dst); |
| 705 | MachineFunction &MF = MIRBuilder.getMF(); |
| 706 | LLVMContext &Ctx = MF.getFunction().getContext(); |
| 707 | uint64_t DstTySize = DstTy.getScalarSizeInBits(); |
| 708 | |
| 709 | SmallVector<Constant *, 4> CstIdxs; |
| 710 | for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) { |
| 711 | Register Source = BuildVector.getSourceReg(I: i); |
| 712 | |
| 713 | auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI); |
| 714 | if (ValueAndReg) { |
| 715 | CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value)); |
| 716 | continue; |
| 717 | } |
| 718 | |
| 719 | auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI); |
| 720 | if (FPValueAndReg) { |
| 721 | CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value)); |
| 722 | continue; |
| 723 | } |
| 724 | |
| 725 | if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) { |
| 726 | CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize))); |
| 727 | continue; |
| 728 | } |
| 729 | return false; |
| 730 | } |
| 731 | |
| 732 | Constant *ConstVal = ConstantVector::get(V: CstIdxs); |
| 733 | |
| 734 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
| 735 | unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); |
| 736 | Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType())); |
| 737 | auto Addr = MIRBuilder.buildConstantPool( |
| 738 | Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)), |
| 739 | Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment)); |
| 740 | MachineMemOperand *MMO = |
| 741 | MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF), |
| 742 | f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment); |
| 743 | |
| 744 | MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO); |
| 745 | MI.eraseFromParent(); |
| 746 | return true; |
| 747 | } |
| 748 | |
| 749 | bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, |
| 750 | MachineRegisterInfo &MRI, |
| 751 | LegalizerHelper &Helper) const { |
| 752 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 753 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 754 | unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); |
| 755 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 756 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 757 | |
| 758 | // Simply reuse FPTOSI when it is possible to widen the type |
| 759 | if (DstSizeInBits <= 32) { |
| 760 | auto Casted = MIRBuilder.buildFPTOSI(Dst: DstTy == s32 ? s64 : s32, Src0: Src); |
| 761 | MIRBuilder.buildTrunc(Res: Dst, Op: Casted); |
| 762 | MI.eraseFromParent(); |
| 763 | return true; |
| 764 | } |
| 765 | |
| 766 | return false; |
| 767 | } |
| 768 | |
| 769 | bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, |
| 770 | MachineRegisterInfo &MRI, |
| 771 | LegalizerHelper &Helper) const { |
| 772 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 773 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
| 774 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 775 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 776 | |
| 777 | // Simply reuse SITOFP when it is possible to widen the type |
| 778 | if (SrcTy.getSizeInBits() <= 32) { |
| 779 | auto Ext = MIRBuilder.buildZExt(Res: SrcTy == s32 ? s64 : s32, Op: Src); |
| 780 | MIRBuilder.buildSITOFP(Dst, Src0: Ext); |
| 781 | MI.eraseFromParent(); |
| 782 | return true; |
| 783 | } |
| 784 | |
| 785 | return false; |
| 786 | } |
| 787 | |
| 788 | bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, |
| 789 | MachineRegisterInfo &MRI, |
| 790 | LegalizerHelper &Helper) const { |
| 791 | auto &Store = cast<GStore>(Val&: MI); |
| 792 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 793 | MachineMemOperand &MMO = **Store.memoperands_begin(); |
| 794 | MachineFunction &MF = MIRBuilder.getMF(); |
| 795 | LLT ValTy = MRI.getType(Reg: Store.getValueReg()); |
| 796 | auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: ValTy); |
| 797 | |
| 798 | Helper.Observer.changingInstr(MI&: Store); |
| 799 | Store.setMemRefs(MF, MemRefs: {NewMMO}); |
| 800 | Helper.Observer.changedInstr(MI&: Store); |
| 801 | return true; |
| 802 | } |
| 803 | |
| 804 | bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI, |
| 805 | MachineRegisterInfo &MRI, |
| 806 | LegalizerHelper &Helper) const { |
| 807 | /* |
| 808 | The rounding mode is in bits 11:10 of FPSR, and has the following |
| 809 | settings: |
| 810 | 00 Round to nearest |
| 811 | 01 Round to -inf |
| 812 | 10 Round to +inf |
| 813 | 11 Round to 0 |
| 814 | |
| 815 | GET_ROUNDING, on the other hand, expects the following: |
| 816 | -1 Undefined |
| 817 | 0 Round to 0 |
| 818 | 1 Round to nearest |
| 819 | 2 Round to +inf |
| 820 | 3 Round to -inf |
| 821 | |
| 822 | To perform the conversion, we use a packed lookup table of the four 2-bit |
| 823 | values that we can index by FPSP[11:10] |
| 824 | 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] |
| 825 | |
| 826 | (0x2d >> ((FPSR >> 9) & 6)) & 3 |
| 827 | */ |
| 828 | |
| 829 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 830 | MachineFunction &MF = MIRBuilder.getMF(); |
| 831 | Register Dst = MI.getOperand(i: 0).getReg(); |
| 832 | LLT DstTy = MRI.getType(Reg: Dst); |
| 833 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
| 834 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
| 835 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 836 | |
| 837 | // Save FP Control Word to stack slot |
| 838 | int MemSize = 2; |
| 839 | Align Alignment = Align(2); |
| 840 | MachinePointerInfo PtrInfo; |
| 841 | auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize), |
| 842 | Alignment, PtrInfo); |
| 843 | Register StackPtr = StackTemp.getReg(Idx: 0); |
| 844 | |
| 845 | auto StoreMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, |
| 846 | Size: MemSize, BaseAlignment: Alignment); |
| 847 | |
| 848 | // Store FP Control Word to stack slot using G_FNSTCW16 |
| 849 | MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16) |
| 850 | .addUse(RegNo: StackPtr) |
| 851 | .addMemOperand(MMO: StoreMMO); |
| 852 | |
| 853 | // Load FP Control Word from stack slot |
| 854 | auto LoadMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, |
| 855 | Size: MemSize, BaseAlignment: Alignment); |
| 856 | |
| 857 | auto CWD32 = |
| 858 | MIRBuilder.buildZExt(Res: s32, Op: MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO)); |
| 859 | auto Shifted8 = MIRBuilder.buildTrunc( |
| 860 | Res: s8, Op: MIRBuilder.buildLShr(Dst: s32, Src0: CWD32, Src1: MIRBuilder.buildConstant(Res: s8, Val: 9))); |
| 861 | auto Masked32 = MIRBuilder.buildZExt( |
| 862 | Res: s32, Op: MIRBuilder.buildAnd(Dst: s8, Src0: Shifted8, Src1: MIRBuilder.buildConstant(Res: s8, Val: 6))); |
| 863 | |
| 864 | // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding |
| 865 | // mode (from bits 11:10 of the control word) to the values expected by |
| 866 | // GET_ROUNDING. The mapping is performed by shifting LUT right by the |
| 867 | // extracted rounding mode and masking the result with 3 to obtain the final |
| 868 | auto LUT = MIRBuilder.buildConstant(Res: s32, Val: 0x2d); |
| 869 | auto LUTShifted = MIRBuilder.buildLShr(Dst: s32, Src0: LUT, Src1: Masked32); |
| 870 | auto RetVal = |
| 871 | MIRBuilder.buildAnd(Dst: s32, Src0: LUTShifted, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3)); |
| 872 | auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(Res: DstTy, Op: RetVal); |
| 873 | |
| 874 | MIRBuilder.buildCopy(Res: Dst, Op: RetValTrunc); |
| 875 | |
| 876 | MI.eraseFromParent(); |
| 877 | return true; |
| 878 | } |
| 879 | |
| 880 | bool X86LegalizerInfo::legalizeSETROUNDING(MachineInstr &MI, |
| 881 | MachineRegisterInfo &MRI, |
| 882 | LegalizerHelper &Helper) const { |
| 883 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 884 | MachineFunction &MF = MIRBuilder.getMF(); |
| 885 | Register Src = MI.getOperand(i: 0).getReg(); |
| 886 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
| 887 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
| 888 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 889 | |
| 890 | // Allocate stack slot for control word and MXCSR (4 bytes). |
| 891 | int MemSize = 4; |
| 892 | Align Alignment = Align(4); |
| 893 | MachinePointerInfo PtrInfo; |
| 894 | auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize), |
| 895 | Alignment, PtrInfo); |
| 896 | Register StackPtr = StackTemp.getReg(Idx: 0); |
| 897 | |
| 898 | auto StoreMMO = |
| 899 | MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2)); |
| 900 | MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16) |
| 901 | .addUse(RegNo: StackPtr) |
| 902 | .addMemOperand(MMO: StoreMMO); |
| 903 | |
| 904 | auto LoadMMO = |
| 905 | MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2)); |
| 906 | auto CWD16 = MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO); |
| 907 | |
| 908 | // Clear RM field (bits 11:10) |
| 909 | auto ClearedCWD = |
| 910 | MIRBuilder.buildAnd(Dst: s16, Src0: CWD16, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xf3ff)); |
| 911 | |
| 912 | // Check if Src is a constant |
| 913 | auto *SrcDef = MRI.getVRegDef(Reg: Src); |
| 914 | Register RMBits; |
| 915 | Register MXCSRRMBits; |
| 916 | |
| 917 | if (SrcDef && SrcDef->getOpcode() == TargetOpcode::G_CONSTANT) { |
| 918 | uint64_t RM = getIConstantFromReg(VReg: Src, MRI).getZExtValue(); |
| 919 | int FieldVal = X86::getRoundingModeX86(RM); |
| 920 | |
| 921 | if (FieldVal == X86::rmInvalid) { |
| 922 | FieldVal = X86::rmToNearest; |
| 923 | LLVMContext &C = MF.getFunction().getContext(); |
| 924 | C.diagnose(DI: DiagnosticInfoUnsupported( |
| 925 | MF.getFunction(), "rounding mode is not supported by X86 hardware" , |
| 926 | DiagnosticLocation(MI.getDebugLoc()), DS_Error)); |
| 927 | return false; |
| 928 | } |
| 929 | |
| 930 | FieldVal = FieldVal << 3; |
| 931 | RMBits = MIRBuilder.buildConstant(Res: s16, Val: FieldVal).getReg(Idx: 0); |
| 932 | MXCSRRMBits = MIRBuilder.buildConstant(Res: s32, Val: FieldVal).getReg(Idx: 0); |
| 933 | } else { |
| 934 | // Convert Src (rounding mode) to bits for control word |
| 935 | // (0xc9 << (2 * Src + 4)) & 0xc00 |
| 936 | auto Src32 = MIRBuilder.buildZExtOrTrunc(Res: s32, Op: Src); |
| 937 | auto ShiftAmt = MIRBuilder.buildAdd( |
| 938 | Dst: s32, Src0: MIRBuilder.buildShl(Dst: s32, Src0: Src32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 1)), |
| 939 | Src1: MIRBuilder.buildConstant(Res: s32, Val: 4)); |
| 940 | auto ShiftAmt8 = MIRBuilder.buildTrunc(Res: s8, Op: ShiftAmt); |
| 941 | auto Shifted = MIRBuilder.buildShl(Dst: s16, Src0: MIRBuilder.buildConstant(Res: s16, Val: 0xc9), |
| 942 | Src1: ShiftAmt8); |
| 943 | RMBits = |
| 944 | MIRBuilder.buildAnd(Dst: s16, Src0: Shifted, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xc00)) |
| 945 | .getReg(Idx: 0); |
| 946 | |
| 947 | // For non-constant case, we still need to compute MXCSR bits dynamically |
| 948 | auto RMBits32 = MIRBuilder.buildZExt(Res: s32, Op: RMBits); |
| 949 | MXCSRRMBits = |
| 950 | MIRBuilder.buildShl(Dst: s32, Src0: RMBits32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3)) |
| 951 | .getReg(Idx: 0); |
| 952 | } |
| 953 | // Update rounding mode bits |
| 954 | auto NewCWD = |
| 955 | MIRBuilder.buildOr(Dst: s16, Src0: ClearedCWD, Src1: RMBits, Flags: MachineInstr::Disjoint); |
| 956 | |
| 957 | // Store new FP Control Word to stack |
| 958 | auto StoreNewMMO = |
| 959 | MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2)); |
| 960 | MIRBuilder.buildStore(Val: NewCWD, Addr: StackPtr, MMO&: *StoreNewMMO); |
| 961 | |
| 962 | // Load FP control word from the slot using G_FLDCW16 |
| 963 | auto LoadNewMMO = |
| 964 | MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2)); |
| 965 | MIRBuilder.buildInstr(Opcode: X86::G_FLDCW16) |
| 966 | .addUse(RegNo: StackPtr) |
| 967 | .addMemOperand(MMO: LoadNewMMO); |
| 968 | |
| 969 | if (Subtarget.hasSSE1()) { |
| 970 | // Store MXCSR to stack (use STMXCSR) |
| 971 | auto StoreMXCSRMMO = MF.getMachineMemOperand( |
| 972 | PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4)); |
| 973 | MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) |
| 974 | .addIntrinsicID(ID: Intrinsic::x86_sse_stmxcsr) |
| 975 | .addUse(RegNo: StackPtr) |
| 976 | .addMemOperand(MMO: StoreMXCSRMMO); |
| 977 | |
| 978 | // Load MXCSR from stack |
| 979 | auto LoadMXCSRMMO = MF.getMachineMemOperand( |
| 980 | PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4)); |
| 981 | auto MXCSR = MIRBuilder.buildLoad(Res: s32, Addr: StackPtr, MMO&: *LoadMXCSRMMO); |
| 982 | |
| 983 | // Clear RM field (bits 14:13) |
| 984 | auto ClearedMXCSR = MIRBuilder.buildAnd( |
| 985 | Dst: s32, Src0: MXCSR, Src1: MIRBuilder.buildConstant(Res: s32, Val: 0xffff9fff)); |
| 986 | |
| 987 | // Update rounding mode bits |
| 988 | auto NewMXCSR = MIRBuilder.buildOr(Dst: s32, Src0: ClearedMXCSR, Src1: MXCSRRMBits); |
| 989 | |
| 990 | // Store new MXCSR to stack |
| 991 | auto StoreNewMXCSRMMO = MF.getMachineMemOperand( |
| 992 | PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4)); |
| 993 | MIRBuilder.buildStore(Val: NewMXCSR, Addr: StackPtr, MMO&: *StoreNewMXCSRMMO); |
| 994 | |
| 995 | // Load MXCSR from stack (use LDMXCSR) |
| 996 | auto LoadNewMXCSRMMO = MF.getMachineMemOperand( |
| 997 | PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4)); |
| 998 | MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) |
| 999 | .addIntrinsicID(ID: Intrinsic::x86_sse_ldmxcsr) |
| 1000 | .addUse(RegNo: StackPtr) |
| 1001 | .addMemOperand(MMO: LoadNewMXCSRMMO); |
| 1002 | } |
| 1003 | |
| 1004 | MI.eraseFromParent(); |
| 1005 | return true; |
| 1006 | } |
| 1007 | |
| 1008 | bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
| 1009 | MachineInstr &MI) const { |
| 1010 | return true; |
| 1011 | } |
| 1012 | |