| 1 | //===- MipsLegalizerInfo.cpp ------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file implements the targeting of the Machinelegalizer class for Mips. |
| 10 | /// \todo This should be generated by TableGen. |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "MipsLegalizerInfo.h" |
| 14 | #include "MipsTargetMachine.h" |
| 15 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| 16 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
| 17 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 18 | #include "llvm/IR/IntrinsicsMips.h" |
| 19 | |
| 20 | using namespace llvm; |
| 21 | |
| 22 | struct TypesAndMemOps { |
| 23 | LLT ValTy; |
| 24 | LLT PtrTy; |
| 25 | unsigned MemSize; |
| 26 | bool SystemSupportsUnalignedAccess; |
| 27 | }; |
| 28 | |
| 29 | // Assumes power of 2 memory size. Subtargets that have only naturally-aligned |
| 30 | // memory access need to perform additional legalization here. |
| 31 | static bool isUnalignedMemmoryAccess(uint64_t MemSize, uint64_t AlignInBits) { |
| 32 | assert(isPowerOf2_64(MemSize) && "Expected power of 2 memory size" ); |
| 33 | assert(isPowerOf2_64(AlignInBits) && "Expected power of 2 align" ); |
| 34 | if (MemSize > AlignInBits) |
| 35 | return true; |
| 36 | return false; |
| 37 | } |
| 38 | |
| 39 | static bool |
| 40 | CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query, |
| 41 | std::initializer_list<TypesAndMemOps> SupportedValues) { |
| 42 | unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); |
| 43 | |
| 44 | // Non power of two memory access is never legal. |
| 45 | if (!isPowerOf2_64(Value: QueryMemSize)) |
| 46 | return false; |
| 47 | |
| 48 | for (auto &Val : SupportedValues) { |
| 49 | if (Val.ValTy != Query.Types[0]) |
| 50 | continue; |
| 51 | if (Val.PtrTy != Query.Types[1]) |
| 52 | continue; |
| 53 | if (Val.MemSize != QueryMemSize) |
| 54 | continue; |
| 55 | if (!Val.SystemSupportsUnalignedAccess && |
| 56 | isUnalignedMemmoryAccess(MemSize: QueryMemSize, AlignInBits: Query.MMODescrs[0].AlignInBits)) |
| 57 | return false; |
| 58 | return true; |
| 59 | } |
| 60 | return false; |
| 61 | } |
| 62 | |
| 63 | static bool CheckTyN(unsigned N, const LegalityQuery &Query, |
| 64 | std::initializer_list<LLT> SupportedValues) { |
| 65 | return llvm::is_contained(Set: SupportedValues, Element: Query.Types[N]); |
| 66 | } |
| 67 | |
| 68 | MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { |
| 69 | using namespace TargetOpcode; |
| 70 | |
| 71 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
| 72 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
| 73 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
| 74 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 75 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 76 | const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
| 77 | const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
| 78 | const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
| 79 | const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
| 80 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 32); |
| 81 | |
| 82 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_MUL}) |
| 83 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 84 | if (CheckTyN(N: 0, Query, SupportedValues: {s32})) |
| 85 | return true; |
| 86 | if (ST.hasMSA() && CheckTyN(N: 0, Query, SupportedValues: {v16s8, v8s16, v4s32, v2s64})) |
| 87 | return true; |
| 88 | return false; |
| 89 | }) |
| 90 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 91 | |
| 92 | getActionDefinitionsBuilder(Opcodes: {G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO}) |
| 93 | .lowerFor(Types: {{s32, s1}}); |
| 94 | |
| 95 | getActionDefinitionsBuilder(Opcode: G_UMULH) |
| 96 | .legalFor(Types: {s32}) |
| 97 | .maxScalar(TypeIdx: 0, Ty: s32); |
| 98 | |
| 99 | // MIPS32r6 does not have alignment restrictions for memory access. |
| 100 | // For MIPS32r5 and older memory access must be naturally-aligned i.e. aligned |
| 101 | // to at least a multiple of its own size. There is however a two instruction |
| 102 | // combination that performs 4 byte unaligned access (lwr/lwl and swl/swr) |
| 103 | // therefore 4 byte load and store are legal and will use NoAlignRequirements. |
| 104 | bool NoAlignRequirements = true; |
| 105 | |
| 106 | getActionDefinitionsBuilder(Opcodes: {G_LOAD, G_STORE}) |
| 107 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 108 | if (CheckTy0Ty1MemSizeAlign( |
| 109 | Query, SupportedValues: {{.ValTy: s32, .PtrTy: p0, .MemSize: 8, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
| 110 | {.ValTy: s32, .PtrTy: p0, .MemSize: 16, .SystemSupportsUnalignedAccess: ST.systemSupportsUnalignedAccess()}, |
| 111 | {.ValTy: s32, .PtrTy: p0, .MemSize: 32, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
| 112 | {.ValTy: p0, .PtrTy: p0, .MemSize: 32, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
| 113 | {.ValTy: s64, .PtrTy: p0, .MemSize: 64, .SystemSupportsUnalignedAccess: ST.systemSupportsUnalignedAccess()}})) |
| 114 | return true; |
| 115 | if (ST.hasMSA() && CheckTy0Ty1MemSizeAlign( |
| 116 | Query, SupportedValues: {{.ValTy: v16s8, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
| 117 | {.ValTy: v8s16, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
| 118 | {.ValTy: v4s32, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
| 119 | {.ValTy: v2s64, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}})) |
| 120 | return true; |
| 121 | return false; |
| 122 | }) |
| 123 | // Custom lower scalar memory access, up to 8 bytes, for: |
| 124 | // - non-power-of-2 MemSizes |
| 125 | // - unaligned 2 or 8 byte MemSizes for MIPS32r5 and older |
| 126 | .customIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 127 | if (!Query.Types[0].isScalar() || Query.Types[1] != p0 || |
| 128 | Query.Types[0] == s1) |
| 129 | return false; |
| 130 | |
| 131 | unsigned Size = Query.Types[0].getSizeInBits(); |
| 132 | unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); |
| 133 | assert(QueryMemSize <= Size && "Scalar can't hold MemSize" ); |
| 134 | |
| 135 | if (Size > 64 || QueryMemSize > 64) |
| 136 | return false; |
| 137 | |
| 138 | if (!isPowerOf2_64(Value: Query.MMODescrs[0].MemoryTy.getSizeInBits())) |
| 139 | return true; |
| 140 | |
| 141 | if (!ST.systemSupportsUnalignedAccess() && |
| 142 | isUnalignedMemmoryAccess(MemSize: QueryMemSize, |
| 143 | AlignInBits: Query.MMODescrs[0].AlignInBits)) { |
| 144 | assert(QueryMemSize != 32 && "4 byte load and store are legal" ); |
| 145 | return true; |
| 146 | } |
| 147 | |
| 148 | return false; |
| 149 | }) |
| 150 | .minScalar(TypeIdx: 0, Ty: s32) |
| 151 | .lower(); |
| 152 | |
| 153 | getActionDefinitionsBuilder(Opcode: G_IMPLICIT_DEF) |
| 154 | .legalFor(Types: {s32, s64}); |
| 155 | |
| 156 | getActionDefinitionsBuilder(Opcode: G_UNMERGE_VALUES) |
| 157 | .legalFor(Types: {{s32, s64}}); |
| 158 | |
| 159 | getActionDefinitionsBuilder(Opcode: G_MERGE_VALUES) |
| 160 | .legalFor(Types: {{s64, s32}}); |
| 161 | |
| 162 | getActionDefinitionsBuilder(Opcodes: {G_ZEXTLOAD, G_SEXTLOAD}) |
| 163 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, |
| 164 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}}) |
| 165 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 166 | |
| 167 | getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT}) |
| 168 | .legalIf(Predicate: [](const LegalityQuery &Query) { return false; }) |
| 169 | .maxScalar(TypeIdx: 0, Ty: s32); |
| 170 | |
| 171 | getActionDefinitionsBuilder(Opcode: G_TRUNC) |
| 172 | .legalIf(Predicate: [](const LegalityQuery &Query) { return false; }) |
| 173 | .maxScalar(TypeIdx: 1, Ty: s32); |
| 174 | |
| 175 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
| 176 | .legalForCartesianProduct(Types0: {p0, s32, s64}, Types1: {s32}) |
| 177 | .minScalar(TypeIdx: 0, Ty: s32) |
| 178 | .minScalar(TypeIdx: 1, Ty: s32); |
| 179 | |
| 180 | getActionDefinitionsBuilder(Opcode: G_BRCOND) |
| 181 | .legalFor(Types: {s32}) |
| 182 | .minScalar(TypeIdx: 0, Ty: s32); |
| 183 | |
| 184 | getActionDefinitionsBuilder(Opcode: G_BRJT) |
| 185 | .legalFor(Types: {{p0, s32}}); |
| 186 | |
| 187 | getActionDefinitionsBuilder(Opcode: G_BRINDIRECT) |
| 188 | .legalFor(Types: {p0}); |
| 189 | |
| 190 | getActionDefinitionsBuilder(Opcode: G_PHI) |
| 191 | .legalFor(Types: {p0, s32, s64}) |
| 192 | .minScalar(TypeIdx: 0, Ty: s32); |
| 193 | |
| 194 | getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR}) |
| 195 | .legalFor(Types: {s32}) |
| 196 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 197 | |
| 198 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM}) |
| 199 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 200 | if (CheckTyN(N: 0, Query, SupportedValues: {s32})) |
| 201 | return true; |
| 202 | if (ST.hasMSA() && CheckTyN(N: 0, Query, SupportedValues: {v16s8, v8s16, v4s32, v2s64})) |
| 203 | return true; |
| 204 | return false; |
| 205 | }) |
| 206 | .minScalar(TypeIdx: 0, Ty: s32) |
| 207 | .libcallFor(Types: {s64}); |
| 208 | |
| 209 | getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR}) |
| 210 | .legalFor(Types: {{s32, s32}}) |
| 211 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 212 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 213 | |
| 214 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
| 215 | .legalForCartesianProduct(Types0: {s32}, Types1: {s32, p0}) |
| 216 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 217 | .minScalar(TypeIdx: 0, Ty: s32); |
| 218 | |
| 219 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
| 220 | .legalFor(Types: {s32}) |
| 221 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 222 | |
| 223 | getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_INTTOPTR}) |
| 224 | .legalFor(Types: {{p0, s32}}); |
| 225 | |
| 226 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
| 227 | .legalFor(Types: {{s32, p0}}); |
| 228 | |
| 229 | getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX) |
| 230 | .legalFor(Types: {p0}); |
| 231 | |
| 232 | getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE}) |
| 233 | .legalFor(Types: {p0}); |
| 234 | |
| 235 | getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC) |
| 236 | .lowerFor(Types: {{p0, s32}}); |
| 237 | |
| 238 | getActionDefinitionsBuilder(Opcode: G_VASTART) |
| 239 | .legalFor(Types: {p0}); |
| 240 | |
| 241 | getActionDefinitionsBuilder(Opcode: G_BSWAP) |
| 242 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 243 | if (ST.hasMips32r2() && CheckTyN(N: 0, Query, SupportedValues: {s32})) |
| 244 | return true; |
| 245 | return false; |
| 246 | }) |
| 247 | .lowerIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 248 | if (!ST.hasMips32r2() && CheckTyN(N: 0, Query, SupportedValues: {s32})) |
| 249 | return true; |
| 250 | return false; |
| 251 | }) |
| 252 | .maxScalar(TypeIdx: 0, Ty: s32); |
| 253 | |
| 254 | getActionDefinitionsBuilder(Opcode: G_BITREVERSE) |
| 255 | .lowerFor(Types: {s32}) |
| 256 | .maxScalar(TypeIdx: 0, Ty: s32); |
| 257 | |
| 258 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
| 259 | .legalFor(Types: {{s32, s32}}) |
| 260 | .maxScalar(TypeIdx: 0, Ty: s32) |
| 261 | .maxScalar(TypeIdx: 1, Ty: s32); |
| 262 | getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF) |
| 263 | .lowerFor(Types: {{s32, s32}}); |
| 264 | |
| 265 | getActionDefinitionsBuilder(Opcode: G_CTTZ) |
| 266 | .lowerFor(Types: {{s32, s32}}) |
| 267 | .maxScalar(TypeIdx: 0, Ty: s32) |
| 268 | .maxScalar(TypeIdx: 1, Ty: s32); |
| 269 | getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF) |
| 270 | .lowerFor(Types: {{s32, s32}, {s64, s64}}); |
| 271 | |
| 272 | getActionDefinitionsBuilder(Opcode: G_CTPOP) |
| 273 | .lowerFor(Types: {{s32, s32}}) |
| 274 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32) |
| 275 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
| 276 | |
| 277 | // FP instructions |
| 278 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
| 279 | .legalFor(Types: {s32, s64}); |
| 280 | |
| 281 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT}) |
| 282 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
| 283 | if (CheckTyN(N: 0, Query, SupportedValues: {s32, s64})) |
| 284 | return true; |
| 285 | if (ST.hasMSA() && CheckTyN(N: 0, Query, SupportedValues: {v16s8, v8s16, v4s32, v2s64})) |
| 286 | return true; |
| 287 | return false; |
| 288 | }); |
| 289 | |
| 290 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
| 291 | .legalFor(Types: {{s32, s32}, {s32, s64}}) |
| 292 | .minScalar(TypeIdx: 0, Ty: s32); |
| 293 | |
| 294 | getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR}) |
| 295 | .libcallFor(Types: {s32, s64}); |
| 296 | |
| 297 | getActionDefinitionsBuilder(Opcode: G_FPEXT) |
| 298 | .legalFor(Types: {{s64, s32}}); |
| 299 | |
| 300 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC) |
| 301 | .legalFor(Types: {{s32, s64}}); |
| 302 | |
| 303 | // FP to int conversion instructions |
| 304 | getActionDefinitionsBuilder(Opcode: G_FPTOSI) |
| 305 | .legalForCartesianProduct(Types0: {s32}, Types1: {s64, s32}) |
| 306 | .libcallForCartesianProduct(Types0: {s64}, Types1: {s64, s32}) |
| 307 | .minScalar(TypeIdx: 0, Ty: s32); |
| 308 | |
| 309 | getActionDefinitionsBuilder(Opcode: G_FPTOUI) |
| 310 | .libcallForCartesianProduct(Types0: {s64}, Types1: {s64, s32}) |
| 311 | .lowerForCartesianProduct(Types0: {s32}, Types1: {s64, s32}) |
| 312 | .minScalar(TypeIdx: 0, Ty: s32); |
| 313 | |
| 314 | // Int to FP conversion instructions |
| 315 | getActionDefinitionsBuilder(Opcode: G_SITOFP) |
| 316 | .legalForCartesianProduct(Types0: {s64, s32}, Types1: {s32}) |
| 317 | .libcallForCartesianProduct(Types0: {s64, s32}, Types1: {s64}) |
| 318 | .minScalar(TypeIdx: 1, Ty: s32); |
| 319 | |
| 320 | getActionDefinitionsBuilder(Opcode: G_UITOFP) |
| 321 | .libcallForCartesianProduct(Types0: {s64, s32}, Types1: {s64}) |
| 322 | .customForCartesianProduct(Types0: {s64, s32}, Types1: {s32}) |
| 323 | .minScalar(TypeIdx: 1, Ty: s32); |
| 324 | |
| 325 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
| 326 | |
| 327 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
| 328 | |
| 329 | getLegacyLegalizerInfo().computeTables(); |
| 330 | verify(MII: *ST.getInstrInfo()); |
| 331 | } |
| 332 | |
| 333 | bool MipsLegalizerInfo::legalizeCustom( |
| 334 | LegalizerHelper &Helper, MachineInstr &MI, |
| 335 | LostDebugLocObserver &LocObserver) const { |
| 336 | using namespace TargetOpcode; |
| 337 | |
| 338 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 339 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
| 340 | |
| 341 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 342 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 343 | |
| 344 | switch (MI.getOpcode()) { |
| 345 | case G_LOAD: |
| 346 | case G_STORE: { |
| 347 | unsigned MemSize = (**MI.memoperands_begin()).getSize().getValue(); |
| 348 | Register Val = MI.getOperand(i: 0).getReg(); |
| 349 | unsigned Size = MRI.getType(Reg: Val).getSizeInBits(); |
| 350 | |
| 351 | MachineMemOperand *MMOBase = *MI.memoperands_begin(); |
| 352 | |
| 353 | assert(MemSize <= 8 && "MemSize is too large" ); |
| 354 | assert(Size <= 64 && "Scalar size is too large" ); |
| 355 | |
| 356 | // Split MemSize into two, P2HalfMemSize is largest power of two smaller |
| 357 | // then MemSize. e.g. 8 = 4 + 4 , 6 = 4 + 2, 3 = 2 + 1. |
| 358 | unsigned P2HalfMemSize, RemMemSize; |
| 359 | if (isPowerOf2_64(Value: MemSize)) { |
| 360 | P2HalfMemSize = RemMemSize = MemSize / 2; |
| 361 | } else { |
| 362 | P2HalfMemSize = 1 << Log2_32(Value: MemSize); |
| 363 | RemMemSize = MemSize - P2HalfMemSize; |
| 364 | } |
| 365 | |
| 366 | Register BaseAddr = MI.getOperand(i: 1).getReg(); |
| 367 | LLT PtrTy = MRI.getType(Reg: BaseAddr); |
| 368 | MachineFunction &MF = MIRBuilder.getMF(); |
| 369 | |
| 370 | auto P2HalfMemOp = MF.getMachineMemOperand(MMO: MMOBase, Offset: 0, Size: P2HalfMemSize); |
| 371 | auto RemMemOp = MF.getMachineMemOperand(MMO: MMOBase, Offset: P2HalfMemSize, Size: RemMemSize); |
| 372 | |
| 373 | if (MI.getOpcode() == G_STORE) { |
| 374 | // Widen Val to s32 or s64 in order to create legal G_LSHR or G_UNMERGE. |
| 375 | if (Size < 32) |
| 376 | Val = MIRBuilder.buildAnyExt(Res: s32, Op: Val).getReg(Idx: 0); |
| 377 | if (Size > 32 && Size < 64) |
| 378 | Val = MIRBuilder.buildAnyExt(Res: s64, Op: Val).getReg(Idx: 0); |
| 379 | |
| 380 | auto C_P2HalfMemSize = MIRBuilder.buildConstant(Res: s32, Val: P2HalfMemSize); |
| 381 | auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: BaseAddr, Op1: C_P2HalfMemSize); |
| 382 | |
| 383 | if (MI.getOpcode() == G_STORE && MemSize <= 4) { |
| 384 | MIRBuilder.buildStore(Val, Addr: BaseAddr, MMO&: *P2HalfMemOp); |
| 385 | auto C_P2Half_InBits = MIRBuilder.buildConstant(Res: s32, Val: P2HalfMemSize * 8); |
| 386 | auto Shift = MIRBuilder.buildLShr(Dst: s32, Src0: Val, Src1: C_P2Half_InBits); |
| 387 | MIRBuilder.buildStore(Val: Shift, Addr, MMO&: *RemMemOp); |
| 388 | } else { |
| 389 | auto Unmerge = MIRBuilder.buildUnmerge(Res: s32, Op: Val); |
| 390 | MIRBuilder.buildStore(Val: Unmerge.getReg(Idx: 0), Addr: BaseAddr, MMO&: *P2HalfMemOp); |
| 391 | MIRBuilder.buildStore(Val: Unmerge.getReg(Idx: 1), Addr, MMO&: *RemMemOp); |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | if (MI.getOpcode() == G_LOAD) { |
| 396 | |
| 397 | if (MemSize <= 4) { |
| 398 | // This is anyextending load, use 4 byte lwr/lwl. |
| 399 | auto *Load4MMO = MF.getMachineMemOperand(MMO: MMOBase, Offset: 0, Size: 4); |
| 400 | |
| 401 | if (Size == 32) |
| 402 | MIRBuilder.buildLoad(Res: Val, Addr: BaseAddr, MMO&: *Load4MMO); |
| 403 | else { |
| 404 | auto Load = MIRBuilder.buildLoad(Res: s32, Addr: BaseAddr, MMO&: *Load4MMO); |
| 405 | MIRBuilder.buildTrunc(Res: Val, Op: Load.getReg(Idx: 0)); |
| 406 | } |
| 407 | |
| 408 | } else { |
| 409 | auto C_P2HalfMemSize = MIRBuilder.buildConstant(Res: s32, Val: P2HalfMemSize); |
| 410 | auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: BaseAddr, Op1: C_P2HalfMemSize); |
| 411 | |
| 412 | auto Load_P2Half = MIRBuilder.buildLoad(Res: s32, Addr: BaseAddr, MMO&: *P2HalfMemOp); |
| 413 | auto Load_Rem = MIRBuilder.buildLoad(Res: s32, Addr, MMO&: *RemMemOp); |
| 414 | |
| 415 | if (Size == 64) |
| 416 | MIRBuilder.buildMergeLikeInstr(Res: Val, Ops: {Load_P2Half, Load_Rem}); |
| 417 | else { |
| 418 | auto Merge = |
| 419 | MIRBuilder.buildMergeLikeInstr(Res: s64, Ops: {Load_P2Half, Load_Rem}); |
| 420 | MIRBuilder.buildTrunc(Res: Val, Op: Merge); |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | MI.eraseFromParent(); |
| 425 | break; |
| 426 | } |
| 427 | case G_UITOFP: { |
| 428 | Register Dst = MI.getOperand(i: 0).getReg(); |
| 429 | Register Src = MI.getOperand(i: 1).getReg(); |
| 430 | LLT DstTy = MRI.getType(Reg: Dst); |
| 431 | LLT SrcTy = MRI.getType(Reg: Src); |
| 432 | |
| 433 | if (SrcTy != s32) |
| 434 | return false; |
| 435 | if (DstTy != s32 && DstTy != s64) |
| 436 | return false; |
| 437 | |
| 438 | // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert |
| 439 | // unsigned to double. Mantissa has 52 bits so we use following trick: |
| 440 | // First make floating point bit mask 0x43300000ABCDEFGH. |
| 441 | // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 . |
| 442 | // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it. |
| 443 | // Done. Trunc double to float if needed. |
| 444 | |
| 445 | auto C_HiMask = MIRBuilder.buildConstant(Res: s32, UINT32_C(0x43300000)); |
| 446 | auto Bitcast = |
| 447 | MIRBuilder.buildMergeLikeInstr(Res: s64, Ops: {Src, C_HiMask.getReg(Idx: 0)}); |
| 448 | |
| 449 | MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant( |
| 450 | Res: s64, Val: llvm::bit_cast<double>(UINT64_C(0x4330000000000000))); |
| 451 | |
| 452 | if (DstTy == s64) |
| 453 | MIRBuilder.buildFSub(Dst, Src0: Bitcast, Src1: TwoP52FP); |
| 454 | else { |
| 455 | MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(Dst: s64, Src0: Bitcast, Src1: TwoP52FP); |
| 456 | MIRBuilder.buildFPTrunc(Res: Dst, Op: ResF64); |
| 457 | } |
| 458 | |
| 459 | MI.eraseFromParent(); |
| 460 | break; |
| 461 | } |
| 462 | default: |
| 463 | return false; |
| 464 | } |
| 465 | |
| 466 | return true; |
| 467 | } |
| 468 | |
| 469 | static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode, |
| 470 | MachineIRBuilder &MIRBuilder, |
| 471 | const MipsSubtarget &ST) { |
| 472 | assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA." ); |
| 473 | if (!MIRBuilder.buildInstr(Opcode) |
| 474 | .add(MO: MI.getOperand(i: 0)) |
| 475 | .add(MO: MI.getOperand(i: 2)) |
| 476 | .add(MO: MI.getOperand(i: 3)) |
| 477 | .constrainAllUses(TII: MIRBuilder.getTII(), TRI: *ST.getRegisterInfo(), |
| 478 | RBI: *ST.getRegBankInfo())) |
| 479 | return false; |
| 480 | MI.eraseFromParent(); |
| 481 | return true; |
| 482 | } |
| 483 | |
| 484 | static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, |
| 485 | MachineIRBuilder &MIRBuilder, |
| 486 | const MipsSubtarget &ST) { |
| 487 | assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA." ); |
| 488 | MIRBuilder.buildInstr(Opcode) |
| 489 | .add(MO: MI.getOperand(i: 0)) |
| 490 | .add(MO: MI.getOperand(i: 2)) |
| 491 | .add(MO: MI.getOperand(i: 3)); |
| 492 | MI.eraseFromParent(); |
| 493 | return true; |
| 494 | } |
| 495 | |
| 496 | static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, |
| 497 | MachineIRBuilder &MIRBuilder, |
| 498 | const MipsSubtarget &ST) { |
| 499 | assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA." ); |
| 500 | MIRBuilder.buildInstr(Opcode) |
| 501 | .add(MO: MI.getOperand(i: 0)) |
| 502 | .add(MO: MI.getOperand(i: 2)); |
| 503 | MI.eraseFromParent(); |
| 504 | return true; |
| 505 | } |
| 506 | |
| 507 | bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
| 508 | MachineInstr &MI) const { |
| 509 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 510 | const MipsSubtarget &ST = MI.getMF()->getSubtarget<MipsSubtarget>(); |
| 511 | |
| 512 | switch (cast<GIntrinsic>(Val&: MI).getIntrinsicID()) { |
| 513 | case Intrinsic::vacopy: { |
| 514 | MachinePointerInfo MPO; |
| 515 | LLT PtrTy = LLT::pointer(AddressSpace: 0, SizeInBits: 32); |
| 516 | auto Tmp = |
| 517 | MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: 2), |
| 518 | MMO&: *MI.getMF()->getMachineMemOperand( |
| 519 | PtrInfo: MPO, f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Align(4))); |
| 520 | MIRBuilder.buildStore(Val: Tmp, Addr: MI.getOperand(i: 1), |
| 521 | MMO&: *MI.getMF()->getMachineMemOperand( |
| 522 | PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Align(4))); |
| 523 | MI.eraseFromParent(); |
| 524 | return true; |
| 525 | } |
| 526 | case Intrinsic::mips_addv_b: |
| 527 | case Intrinsic::mips_addv_h: |
| 528 | case Intrinsic::mips_addv_w: |
| 529 | case Intrinsic::mips_addv_d: |
| 530 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_ADD, MIRBuilder, ST); |
| 531 | case Intrinsic::mips_addvi_b: |
| 532 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_B, MIRBuilder, ST); |
| 533 | case Intrinsic::mips_addvi_h: |
| 534 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_H, MIRBuilder, ST); |
| 535 | case Intrinsic::mips_addvi_w: |
| 536 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_W, MIRBuilder, ST); |
| 537 | case Intrinsic::mips_addvi_d: |
| 538 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_D, MIRBuilder, ST); |
| 539 | case Intrinsic::mips_subv_b: |
| 540 | case Intrinsic::mips_subv_h: |
| 541 | case Intrinsic::mips_subv_w: |
| 542 | case Intrinsic::mips_subv_d: |
| 543 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_SUB, MIRBuilder, ST); |
| 544 | case Intrinsic::mips_subvi_b: |
| 545 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_B, MIRBuilder, ST); |
| 546 | case Intrinsic::mips_subvi_h: |
| 547 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_H, MIRBuilder, ST); |
| 548 | case Intrinsic::mips_subvi_w: |
| 549 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_W, MIRBuilder, ST); |
| 550 | case Intrinsic::mips_subvi_d: |
| 551 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_D, MIRBuilder, ST); |
| 552 | case Intrinsic::mips_mulv_b: |
| 553 | case Intrinsic::mips_mulv_h: |
| 554 | case Intrinsic::mips_mulv_w: |
| 555 | case Intrinsic::mips_mulv_d: |
| 556 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_MUL, MIRBuilder, ST); |
| 557 | case Intrinsic::mips_div_s_b: |
| 558 | case Intrinsic::mips_div_s_h: |
| 559 | case Intrinsic::mips_div_s_w: |
| 560 | case Intrinsic::mips_div_s_d: |
| 561 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_SDIV, MIRBuilder, ST); |
| 562 | case Intrinsic::mips_mod_s_b: |
| 563 | case Intrinsic::mips_mod_s_h: |
| 564 | case Intrinsic::mips_mod_s_w: |
| 565 | case Intrinsic::mips_mod_s_d: |
| 566 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_SREM, MIRBuilder, ST); |
| 567 | case Intrinsic::mips_div_u_b: |
| 568 | case Intrinsic::mips_div_u_h: |
| 569 | case Intrinsic::mips_div_u_w: |
| 570 | case Intrinsic::mips_div_u_d: |
| 571 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_UDIV, MIRBuilder, ST); |
| 572 | case Intrinsic::mips_mod_u_b: |
| 573 | case Intrinsic::mips_mod_u_h: |
| 574 | case Intrinsic::mips_mod_u_w: |
| 575 | case Intrinsic::mips_mod_u_d: |
| 576 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_UREM, MIRBuilder, ST); |
| 577 | case Intrinsic::mips_fadd_w: |
| 578 | case Intrinsic::mips_fadd_d: |
| 579 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FADD, MIRBuilder, ST); |
| 580 | case Intrinsic::mips_fsub_w: |
| 581 | case Intrinsic::mips_fsub_d: |
| 582 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FSUB, MIRBuilder, ST); |
| 583 | case Intrinsic::mips_fmul_w: |
| 584 | case Intrinsic::mips_fmul_d: |
| 585 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FMUL, MIRBuilder, ST); |
| 586 | case Intrinsic::mips_fdiv_w: |
| 587 | case Intrinsic::mips_fdiv_d: |
| 588 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FDIV, MIRBuilder, ST); |
| 589 | case Intrinsic::mips_fmax_a_w: |
| 590 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::FMAX_A_W, MIRBuilder, ST); |
| 591 | case Intrinsic::mips_fmax_a_d: |
| 592 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::FMAX_A_D, MIRBuilder, ST); |
| 593 | case Intrinsic::mips_fsqrt_w: |
| 594 | return MSA2OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FSQRT, MIRBuilder, ST); |
| 595 | case Intrinsic::mips_fsqrt_d: |
| 596 | return MSA2OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FSQRT, MIRBuilder, ST); |
| 597 | default: |
| 598 | break; |
| 599 | } |
| 600 | return true; |
| 601 | } |
| 602 | |