| 1 | //===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file implements the targeting of the Machinelegalizer class for ARM. |
| 10 | /// \todo This should be generated by TableGen. |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "ARMLegalizerInfo.h" |
| 14 | #include "ARMCallLowering.h" |
| 15 | #include "ARMSubtarget.h" |
| 16 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
| 17 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 18 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
| 19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 20 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 21 | #include "llvm/CodeGen/ValueTypes.h" |
| 22 | #include "llvm/IR/DerivedTypes.h" |
| 23 | #include "llvm/IR/Type.h" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace LegalizeActions; |
| 27 | |
| 28 | static bool AEABI(const ARMSubtarget &ST) { |
| 29 | return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); |
| 30 | } |
| 31 | |
| 32 | ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) { |
| 33 | using namespace TargetOpcode; |
| 34 | |
| 35 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 32); |
| 36 | |
| 37 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
| 38 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
| 39 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
| 40 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
| 41 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 42 | |
| 43 | auto &LegacyInfo = getLegacyLegalizerInfo(); |
| 44 | if (ST.isThumb1Only()) { |
| 45 | // Thumb1 is not supported yet. |
| 46 | LegacyInfo.computeTables(); |
| 47 | verify(MII: *ST.getInstrInfo()); |
| 48 | return; |
| 49 | } |
| 50 | |
| 51 | getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT, G_ANYEXT}) |
| 52 | .legalForCartesianProduct(Types0: {s8, s16, s32}, Types1: {s1, s8, s16}); |
| 53 | |
| 54 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
| 55 | |
| 56 | getActionDefinitionsBuilder(Opcodes: {G_MUL, G_AND, G_OR, G_XOR}) |
| 57 | .legalFor(Types: {s32}) |
| 58 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 59 | |
| 60 | if (ST.hasNEON()) |
| 61 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
| 62 | .legalFor(Types: {s32, s64}) |
| 63 | .minScalar(TypeIdx: 0, Ty: s32); |
| 64 | else |
| 65 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
| 66 | .legalFor(Types: {s32}) |
| 67 | .minScalar(TypeIdx: 0, Ty: s32); |
| 68 | |
| 69 | getActionDefinitionsBuilder(Opcodes: {G_ASHR, G_LSHR, G_SHL}) |
| 70 | .legalFor(Types: {{s32, s32}}) |
| 71 | .minScalar(TypeIdx: 0, Ty: s32) |
| 72 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
| 73 | |
| 74 | bool HasHWDivide = (!ST.isThumb() && ST.hasDivideInARMMode()) || |
| 75 | (ST.isThumb() && ST.hasDivideInThumbMode()); |
| 76 | if (HasHWDivide) |
| 77 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV}) |
| 78 | .legalFor(Types: {s32}) |
| 79 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 80 | else |
| 81 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV}) |
| 82 | .libcallFor(Types: {s32}) |
| 83 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 84 | |
| 85 | auto &REMBuilder = |
| 86 | getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM}).minScalar(TypeIdx: 0, Ty: s32); |
| 87 | if (HasHWDivide) |
| 88 | REMBuilder.lowerFor(Types: {s32}); |
| 89 | else if (AEABI(ST)) |
| 90 | REMBuilder.customFor(Types: {s32}); |
| 91 | else |
| 92 | REMBuilder.libcallFor(Types: {s32}); |
| 93 | |
| 94 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR) |
| 95 | .legalFor(Types: {{p0, s32}}) |
| 96 | .minScalar(TypeIdx: 1, Ty: s32); |
| 97 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
| 98 | .legalFor(Types: {{s32, p0}}) |
| 99 | .minScalar(TypeIdx: 0, Ty: s32); |
| 100 | |
| 101 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
| 102 | .customFor(Types: {s32, p0}) |
| 103 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 104 | |
| 105 | getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0}); |
| 106 | |
| 107 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
| 108 | .legalForCartesianProduct(Types0: {s1}, Types1: {s32, p0}) |
| 109 | .minScalar(TypeIdx: 1, Ty: s32); |
| 110 | |
| 111 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
| 112 | .legalForCartesianProduct(Types0: {s32, p0}, Types1: {s1}) |
| 113 | .minScalar(TypeIdx: 0, Ty: s32); |
| 114 | |
| 115 | // We're keeping these builders around because we'll want to add support for |
| 116 | // floating point to them. |
| 117 | auto &LoadStoreBuilder = getActionDefinitionsBuilder(Opcodes: {G_LOAD, G_STORE}) |
| 118 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, |
| 119 | {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, |
| 120 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8}, |
| 121 | {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8}}) |
| 122 | .unsupportedIfMemSizeNotPow2(); |
| 123 | |
| 124 | getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0}); |
| 125 | getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0}); |
| 126 | |
| 127 | auto &PhiBuilder = |
| 128 | getActionDefinitionsBuilder(Opcode: G_PHI) |
| 129 | .legalFor(Types: {s32, p0}) |
| 130 | .minScalar(TypeIdx: 0, Ty: s32); |
| 131 | |
| 132 | getActionDefinitionsBuilder(Opcode: G_PTR_ADD) |
| 133 | .legalFor(Types: {{p0, s32}}) |
| 134 | .minScalar(TypeIdx: 1, Ty: s32); |
| 135 | |
| 136 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1}); |
| 137 | |
| 138 | if (!ST.useSoftFloat() && ST.hasVFP2Base()) { |
| 139 | getActionDefinitionsBuilder( |
| 140 | Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) |
| 141 | .legalFor(Types: {s32, s64}); |
| 142 | |
| 143 | LoadStoreBuilder |
| 144 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 32}}) |
| 145 | .maxScalar(TypeIdx: 0, Ty: s32); |
| 146 | PhiBuilder.legalFor(Types: {s64}); |
| 147 | |
| 148 | getActionDefinitionsBuilder(Opcode: G_FCMP).legalForCartesianProduct(Types0: {s1}, |
| 149 | Types1: {s32, s64}); |
| 150 | |
| 151 | getActionDefinitionsBuilder(Opcode: G_MERGE_VALUES).legalFor(Types: {{s64, s32}}); |
| 152 | getActionDefinitionsBuilder(Opcode: G_UNMERGE_VALUES).legalFor(Types: {{s32, s64}}); |
| 153 | |
| 154 | getActionDefinitionsBuilder(Opcode: G_FPEXT).legalFor(Types: {{s64, s32}}); |
| 155 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC).legalFor(Types: {{s32, s64}}); |
| 156 | |
| 157 | getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI}) |
| 158 | .legalForCartesianProduct(Types0: {s32}, Types1: {s32, s64}); |
| 159 | getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP}) |
| 160 | .legalForCartesianProduct(Types0: {s32, s64}, Types1: {s32}); |
| 161 | |
| 162 | getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_GET_FPMODE}) |
| 163 | .legalFor(Types: {s32}); |
| 164 | getActionDefinitionsBuilder(Opcode: G_RESET_FPENV).alwaysLegal(); |
| 165 | getActionDefinitionsBuilder(Opcode: G_SET_FPMODE).customFor(Types: {s32}); |
| 166 | getActionDefinitionsBuilder(Opcode: G_RESET_FPMODE).custom(); |
| 167 | } else { |
| 168 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
| 169 | .libcallFor(Types: {s32, s64}); |
| 170 | |
| 171 | LoadStoreBuilder.maxScalar(TypeIdx: 0, Ty: s32); |
| 172 | |
| 173 | getActionDefinitionsBuilder(Opcode: G_FNEG).lowerFor(Types: {s32, s64}); |
| 174 | |
| 175 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT).customFor(Types: {s32, s64}); |
| 176 | |
| 177 | getActionDefinitionsBuilder(Opcode: G_FCMP).customForCartesianProduct(Types0: {s1}, |
| 178 | Types1: {s32, s64}); |
| 179 | |
| 180 | if (AEABI(ST)) |
| 181 | setFCmpLibcallsAEABI(); |
| 182 | else |
| 183 | setFCmpLibcallsGNU(); |
| 184 | |
| 185 | getActionDefinitionsBuilder(Opcode: G_FPEXT).libcallFor(Types: {{s64, s32}}); |
| 186 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC).libcallFor(Types: {{s32, s64}}); |
| 187 | |
| 188 | getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI}) |
| 189 | .libcallForCartesianProduct(Types0: {s32}, Types1: {s32, s64}); |
| 190 | getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP}) |
| 191 | .libcallForCartesianProduct(Types0: {s32, s64}, Types1: {s32}); |
| 192 | |
| 193 | getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV}) |
| 194 | .libcall(); |
| 195 | getActionDefinitionsBuilder(Opcodes: {G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) |
| 196 | .libcall(); |
| 197 | } |
| 198 | |
| 199 | // Just expand whatever loads and stores are left. |
| 200 | LoadStoreBuilder.lower(); |
| 201 | |
| 202 | if (!ST.useSoftFloat() && ST.hasVFP4Base()) |
| 203 | getActionDefinitionsBuilder(Opcode: G_FMA).legalFor(Types: {s32, s64}); |
| 204 | else |
| 205 | getActionDefinitionsBuilder(Opcode: G_FMA).libcallFor(Types: {s32, s64}); |
| 206 | |
| 207 | getActionDefinitionsBuilder(Opcodes: {G_FREM, G_FPOW}).libcallFor(Types: {s32, s64}); |
| 208 | |
| 209 | if (ST.hasV5TOps()) { |
| 210 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
| 211 | .legalFor(Types: {s32, s32}) |
| 212 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 213 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 214 | getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF) |
| 215 | .lowerFor(Types: {s32, s32}) |
| 216 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 217 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 218 | } else { |
| 219 | getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF) |
| 220 | .libcallFor(Types: {s32, s32}) |
| 221 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 222 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 223 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
| 224 | .lowerFor(Types: {s32, s32}) |
| 225 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
| 226 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
| 227 | } |
| 228 | |
| 229 | LegacyInfo.computeTables(); |
| 230 | verify(MII: *ST.getInstrInfo()); |
| 231 | } |
| 232 | |
| 233 | void ARMLegalizerInfo::setFCmpLibcallsAEABI() { |
| 234 | // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be |
| 235 | // default-initialized. |
| 236 | FCmp32Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
| 237 | FCmp32Libcalls[CmpInst::FCMP_OEQ] = { |
| 238 | {.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 239 | FCmp32Libcalls[CmpInst::FCMP_OGE] = { |
| 240 | {.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 241 | FCmp32Libcalls[CmpInst::FCMP_OGT] = { |
| 242 | {.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 243 | FCmp32Libcalls[CmpInst::FCMP_OLE] = { |
| 244 | {.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 245 | FCmp32Libcalls[CmpInst::FCMP_OLT] = { |
| 246 | {.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 247 | FCmp32Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 248 | FCmp32Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 249 | FCmp32Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 250 | FCmp32Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 251 | FCmp32Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 252 | FCmp32Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 253 | FCmp32Libcalls[CmpInst::FCMP_UNO] = { |
| 254 | {.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 255 | FCmp32Libcalls[CmpInst::FCMP_ONE] = { |
| 256 | {.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
| 257 | {.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 258 | FCmp32Libcalls[CmpInst::FCMP_UEQ] = { |
| 259 | {.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
| 260 | {.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 261 | |
| 262 | FCmp64Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
| 263 | FCmp64Libcalls[CmpInst::FCMP_OEQ] = { |
| 264 | {.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 265 | FCmp64Libcalls[CmpInst::FCMP_OGE] = { |
| 266 | {.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 267 | FCmp64Libcalls[CmpInst::FCMP_OGT] = { |
| 268 | {.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 269 | FCmp64Libcalls[CmpInst::FCMP_OLE] = { |
| 270 | {.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 271 | FCmp64Libcalls[CmpInst::FCMP_OLT] = { |
| 272 | {.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 273 | FCmp64Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 274 | FCmp64Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 275 | FCmp64Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 276 | FCmp64Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 277 | FCmp64Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 278 | FCmp64Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 279 | FCmp64Libcalls[CmpInst::FCMP_UNO] = { |
| 280 | {.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 281 | FCmp64Libcalls[CmpInst::FCMP_ONE] = { |
| 282 | {.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
| 283 | {.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 284 | FCmp64Libcalls[CmpInst::FCMP_UEQ] = { |
| 285 | {.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
| 286 | {.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
| 287 | } |
| 288 | |
| 289 | void ARMLegalizerInfo::setFCmpLibcallsGNU() { |
| 290 | // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be |
| 291 | // default-initialized. |
| 292 | FCmp32Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
| 293 | FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 294 | FCmp32Libcalls[CmpInst::FCMP_OGE] = {{.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::ICMP_SGE}}; |
| 295 | FCmp32Libcalls[CmpInst::FCMP_OGT] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_SGT}}; |
| 296 | FCmp32Libcalls[CmpInst::FCMP_OLE] = {{.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::ICMP_SLE}}; |
| 297 | FCmp32Libcalls[CmpInst::FCMP_OLT] = {{.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_SLT}}; |
| 298 | FCmp32Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_EQ}}; |
| 299 | FCmp32Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_SGE}}; |
| 300 | FCmp32Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::ICMP_SGT}}; |
| 301 | FCmp32Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_SLE}}; |
| 302 | FCmp32Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::ICMP_SLT}}; |
| 303 | FCmp32Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F32, .Predicate: CmpInst::ICMP_NE}}; |
| 304 | FCmp32Libcalls[CmpInst::FCMP_UNO] = {{.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_NE}}; |
| 305 | FCmp32Libcalls[CmpInst::FCMP_ONE] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_SGT}, |
| 306 | {.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_SLT}}; |
| 307 | FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::ICMP_EQ}, |
| 308 | {.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_NE}}; |
| 309 | |
| 310 | FCmp64Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
| 311 | FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 312 | FCmp64Libcalls[CmpInst::FCMP_OGE] = {{.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::ICMP_SGE}}; |
| 313 | FCmp64Libcalls[CmpInst::FCMP_OGT] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_SGT}}; |
| 314 | FCmp64Libcalls[CmpInst::FCMP_OLE] = {{.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::ICMP_SLE}}; |
| 315 | FCmp64Libcalls[CmpInst::FCMP_OLT] = {{.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_SLT}}; |
| 316 | FCmp64Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_EQ}}; |
| 317 | FCmp64Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_SGE}}; |
| 318 | FCmp64Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::ICMP_SGT}}; |
| 319 | FCmp64Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_SLE}}; |
| 320 | FCmp64Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::ICMP_SLT}}; |
| 321 | FCmp64Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F64, .Predicate: CmpInst::ICMP_NE}}; |
| 322 | FCmp64Libcalls[CmpInst::FCMP_UNO] = {{.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_NE}}; |
| 323 | FCmp64Libcalls[CmpInst::FCMP_ONE] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_SGT}, |
| 324 | {.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_SLT}}; |
| 325 | FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::ICMP_EQ}, |
| 326 | {.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_NE}}; |
| 327 | } |
| 328 | |
| 329 | ARMLegalizerInfo::FCmpLibcallsList |
| 330 | ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, |
| 331 | unsigned Size) const { |
| 332 | assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate" ); |
| 333 | if (Size == 32) |
| 334 | return FCmp32Libcalls[Predicate]; |
| 335 | if (Size == 64) |
| 336 | return FCmp64Libcalls[Predicate]; |
| 337 | llvm_unreachable("Unsupported size for FCmp predicate" ); |
| 338 | } |
| 339 | |
| 340 | bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
| 341 | LostDebugLocObserver &LocObserver) const { |
| 342 | using namespace TargetOpcode; |
| 343 | |
| 344 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 345 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
| 346 | LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); |
| 347 | |
| 348 | switch (MI.getOpcode()) { |
| 349 | default: |
| 350 | return false; |
| 351 | case G_SREM: |
| 352 | case G_UREM: { |
| 353 | Register OriginalResult = MI.getOperand(i: 0).getReg(); |
| 354 | auto Size = MRI.getType(Reg: OriginalResult).getSizeInBits(); |
| 355 | if (Size != 32) |
| 356 | return false; |
| 357 | |
| 358 | auto Libcall = |
| 359 | MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; |
| 360 | |
| 361 | // Our divmod libcalls return a struct containing the quotient and the |
| 362 | // remainder. Create a new, unused register for the quotient and use the |
| 363 | // destination of the original instruction for the remainder. |
| 364 | Type *ArgTy = Type::getInt32Ty(C&: Ctx); |
| 365 | StructType *RetTy = StructType::get(Context&: Ctx, Elements: {ArgTy, ArgTy}, /* Packed */ isPacked: true); |
| 366 | Register RetRegs[] = {MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 32)), |
| 367 | OriginalResult}; |
| 368 | auto Status = createLibcall(MIRBuilder, Libcall, Result: {RetRegs, RetTy, 0}, |
| 369 | Args: {{MI.getOperand(i: 1).getReg(), ArgTy, 0}, |
| 370 | {MI.getOperand(i: 2).getReg(), ArgTy, 0}}, |
| 371 | LocObserver, MI: &MI); |
| 372 | if (Status != LegalizerHelper::Legalized) |
| 373 | return false; |
| 374 | break; |
| 375 | } |
| 376 | case G_FCMP: { |
| 377 | assert(MRI.getType(MI.getOperand(2).getReg()) == |
| 378 | MRI.getType(MI.getOperand(3).getReg()) && |
| 379 | "Mismatched operands for G_FCMP" ); |
| 380 | auto OpSize = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getSizeInBits(); |
| 381 | |
| 382 | auto OriginalResult = MI.getOperand(i: 0).getReg(); |
| 383 | auto Predicate = |
| 384 | static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate()); |
| 385 | auto Libcalls = getFCmpLibcalls(Predicate, Size: OpSize); |
| 386 | |
| 387 | if (Libcalls.empty()) { |
| 388 | assert((Predicate == CmpInst::FCMP_TRUE || |
| 389 | Predicate == CmpInst::FCMP_FALSE) && |
| 390 | "Predicate needs libcalls, but none specified" ); |
| 391 | MIRBuilder.buildConstant(Res: OriginalResult, |
| 392 | Val: Predicate == CmpInst::FCMP_TRUE ? 1 : 0); |
| 393 | MI.eraseFromParent(); |
| 394 | return true; |
| 395 | } |
| 396 | |
| 397 | assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size" ); |
| 398 | auto *ArgTy = OpSize == 32 ? Type::getFloatTy(C&: Ctx) : Type::getDoubleTy(C&: Ctx); |
| 399 | auto *RetTy = Type::getInt32Ty(C&: Ctx); |
| 400 | |
| 401 | SmallVector<Register, 2> Results; |
| 402 | for (auto Libcall : Libcalls) { |
| 403 | auto LibcallResult = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 32)); |
| 404 | auto Status = createLibcall(MIRBuilder, Libcall: Libcall.LibcallID, |
| 405 | Result: {LibcallResult, RetTy, 0}, |
| 406 | Args: {{MI.getOperand(i: 2).getReg(), ArgTy, 0}, |
| 407 | {MI.getOperand(i: 3).getReg(), ArgTy, 0}}, |
| 408 | LocObserver, MI: &MI); |
| 409 | |
| 410 | if (Status != LegalizerHelper::Legalized) |
| 411 | return false; |
| 412 | |
| 413 | auto ProcessedResult = |
| 414 | Libcalls.size() == 1 |
| 415 | ? OriginalResult |
| 416 | : MRI.createGenericVirtualRegister(Ty: MRI.getType(Reg: OriginalResult)); |
| 417 | |
| 418 | // We have a result, but we need to transform it into a proper 1-bit 0 or |
| 419 | // 1, taking into account the different peculiarities of the values |
| 420 | // returned by the comparison functions. |
| 421 | CmpInst::Predicate ResultPred = Libcall.Predicate; |
| 422 | if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { |
| 423 | // We have a nice 0 or 1, and we just need to truncate it back to 1 bit |
| 424 | // to keep the types consistent. |
| 425 | MIRBuilder.buildTrunc(Res: ProcessedResult, Op: LibcallResult); |
| 426 | } else { |
| 427 | // We need to compare against 0. |
| 428 | assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate" ); |
| 429 | auto Zero = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: 0); |
| 430 | MIRBuilder.buildICmp(Pred: ResultPred, Res: ProcessedResult, Op0: LibcallResult, Op1: Zero); |
| 431 | } |
| 432 | Results.push_back(Elt: ProcessedResult); |
| 433 | } |
| 434 | |
| 435 | if (Results.size() != 1) { |
| 436 | assert(Results.size() == 2 && "Unexpected number of results" ); |
| 437 | MIRBuilder.buildOr(Dst: OriginalResult, Src0: Results[0], Src1: Results[1]); |
| 438 | } |
| 439 | break; |
| 440 | } |
| 441 | case G_CONSTANT: { |
| 442 | const ConstantInt *ConstVal = MI.getOperand(i: 1).getCImm(); |
| 443 | uint64_t ImmVal = ConstVal->getZExtValue(); |
| 444 | if (ConstantMaterializationCost(Val: ImmVal, Subtarget: &ST) > 2 && !ST.genExecuteOnly()) |
| 445 | return Helper.lowerConstant(MI) == LegalizerHelper::Legalized; |
| 446 | return true; |
| 447 | } |
| 448 | case G_FCONSTANT: { |
| 449 | // Convert to integer constants, while preserving the binary representation. |
| 450 | auto AsInteger = |
| 451 | MI.getOperand(i: 1).getFPImm()->getValueAPF().bitcastToAPInt(); |
| 452 | MIRBuilder.buildConstant(Res: MI.getOperand(i: 0), |
| 453 | Val: *ConstantInt::get(Context&: Ctx, V: AsInteger)); |
| 454 | break; |
| 455 | } |
| 456 | case G_SET_FPMODE: { |
| 457 | // New FPSCR = (FPSCR & FPStatusBits) | (Modes & ~FPStatusBits) |
| 458 | LLT FPEnvTy = LLT::scalar(SizeInBits: 32); |
| 459 | auto FPEnv = MRI.createGenericVirtualRegister(Ty: FPEnvTy); |
| 460 | Register Modes = MI.getOperand(i: 0).getReg(); |
| 461 | MIRBuilder.buildGetFPEnv(Dst: FPEnv); |
| 462 | auto StatusBitMask = MIRBuilder.buildConstant(Res: FPEnvTy, Val: ARM::FPStatusBits); |
| 463 | auto StatusBits = MIRBuilder.buildAnd(Dst: FPEnvTy, Src0: FPEnv, Src1: StatusBitMask); |
| 464 | auto NotStatusBitMask = |
| 465 | MIRBuilder.buildConstant(Res: FPEnvTy, Val: ~ARM::FPStatusBits); |
| 466 | auto FPModeBits = MIRBuilder.buildAnd(Dst: FPEnvTy, Src0: Modes, Src1: NotStatusBitMask); |
| 467 | auto NewFPSCR = MIRBuilder.buildOr(Dst: FPEnvTy, Src0: StatusBits, Src1: FPModeBits); |
| 468 | MIRBuilder.buildSetFPEnv(Src: NewFPSCR); |
| 469 | break; |
| 470 | } |
| 471 | case G_RESET_FPMODE: { |
| 472 | // To get the default FP mode all control bits are cleared: |
| 473 | // FPSCR = FPSCR & (FPStatusBits | FPReservedBits) |
| 474 | LLT FPEnvTy = LLT::scalar(SizeInBits: 32); |
| 475 | auto FPEnv = MIRBuilder.buildGetFPEnv(Dst: FPEnvTy); |
| 476 | auto NotModeBitMask = MIRBuilder.buildConstant( |
| 477 | Res: FPEnvTy, Val: ARM::FPStatusBits | ARM::FPReservedBits); |
| 478 | auto NewFPSCR = MIRBuilder.buildAnd(Dst: FPEnvTy, Src0: FPEnv, Src1: NotModeBitMask); |
| 479 | MIRBuilder.buildSetFPEnv(Src: NewFPSCR); |
| 480 | break; |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | MI.eraseFromParent(); |
| 485 | return true; |
| 486 | } |
| 487 | |