1 | //===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the Machinelegalizer class for ARM. |
10 | /// \todo This should be generated by TableGen. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "ARMLegalizerInfo.h" |
14 | #include "ARMCallLowering.h" |
15 | #include "ARMSubtarget.h" |
16 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
17 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
18 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
20 | #include "llvm/CodeGen/TargetOpcodes.h" |
21 | #include "llvm/CodeGen/ValueTypes.h" |
22 | #include "llvm/IR/DerivedTypes.h" |
23 | #include "llvm/IR/Type.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace LegalizeActions; |
27 | |
28 | static bool AEABI(const ARMSubtarget &ST) { |
29 | return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); |
30 | } |
31 | |
32 | ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) { |
33 | using namespace TargetOpcode; |
34 | |
35 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 32); |
36 | |
37 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
38 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
39 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
40 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
41 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
42 | |
43 | auto &LegacyInfo = getLegacyLegalizerInfo(); |
44 | if (ST.isThumb1Only()) { |
45 | // Thumb1 is not supported yet. |
46 | LegacyInfo.computeTables(); |
47 | verify(MII: *ST.getInstrInfo()); |
48 | return; |
49 | } |
50 | |
51 | getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT, G_ANYEXT}) |
52 | .legalForCartesianProduct(Types0: {s8, s16, s32}, Types1: {s1, s8, s16}); |
53 | |
54 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
55 | |
56 | getActionDefinitionsBuilder(Opcodes: {G_MUL, G_AND, G_OR, G_XOR}) |
57 | .legalFor(Types: {s32}) |
58 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
59 | |
60 | if (ST.hasNEON()) |
61 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
62 | .legalFor(Types: {s32, s64}) |
63 | .minScalar(TypeIdx: 0, Ty: s32); |
64 | else |
65 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
66 | .legalFor(Types: {s32}) |
67 | .minScalar(TypeIdx: 0, Ty: s32); |
68 | |
69 | getActionDefinitionsBuilder(Opcodes: {G_ASHR, G_LSHR, G_SHL}) |
70 | .legalFor(Types: {{s32, s32}}) |
71 | .minScalar(TypeIdx: 0, Ty: s32) |
72 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
73 | |
74 | bool HasHWDivide = (!ST.isThumb() && ST.hasDivideInARMMode()) || |
75 | (ST.isThumb() && ST.hasDivideInThumbMode()); |
76 | if (HasHWDivide) |
77 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV}) |
78 | .legalFor(Types: {s32}) |
79 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
80 | else |
81 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV}) |
82 | .libcallFor(Types: {s32}) |
83 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
84 | |
85 | auto &REMBuilder = |
86 | getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM}).minScalar(TypeIdx: 0, Ty: s32); |
87 | if (HasHWDivide) |
88 | REMBuilder.lowerFor(Types: {s32}); |
89 | else if (AEABI(ST)) |
90 | REMBuilder.customFor(Types: {s32}); |
91 | else |
92 | REMBuilder.libcallFor(Types: {s32}); |
93 | |
94 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR) |
95 | .legalFor(Types: {{p0, s32}}) |
96 | .minScalar(TypeIdx: 1, Ty: s32); |
97 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
98 | .legalFor(Types: {{s32, p0}}) |
99 | .minScalar(TypeIdx: 0, Ty: s32); |
100 | |
101 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
102 | .customFor(Types: {s32, p0}) |
103 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
104 | |
105 | getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0}); |
106 | |
107 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
108 | .legalForCartesianProduct(Types0: {s1}, Types1: {s32, p0}) |
109 | .minScalar(TypeIdx: 1, Ty: s32); |
110 | |
111 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
112 | .legalForCartesianProduct(Types0: {s32, p0}, Types1: {s1}) |
113 | .minScalar(TypeIdx: 0, Ty: s32); |
114 | |
115 | // We're keeping these builders around because we'll want to add support for |
116 | // floating point to them. |
117 | auto &LoadStoreBuilder = getActionDefinitionsBuilder(Opcodes: {G_LOAD, G_STORE}) |
118 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, |
119 | {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, |
120 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8}, |
121 | {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8}}) |
122 | .unsupportedIfMemSizeNotPow2(); |
123 | |
124 | getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0}); |
125 | getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0}); |
126 | |
127 | auto &PhiBuilder = |
128 | getActionDefinitionsBuilder(Opcode: G_PHI) |
129 | .legalFor(Types: {s32, p0}) |
130 | .minScalar(TypeIdx: 0, Ty: s32); |
131 | |
132 | getActionDefinitionsBuilder(Opcode: G_PTR_ADD) |
133 | .legalFor(Types: {{p0, s32}}) |
134 | .minScalar(TypeIdx: 1, Ty: s32); |
135 | |
136 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1}); |
137 | |
138 | if (!ST.useSoftFloat() && ST.hasVFP2Base()) { |
139 | getActionDefinitionsBuilder( |
140 | Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) |
141 | .legalFor(Types: {s32, s64}); |
142 | |
143 | LoadStoreBuilder |
144 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 32}}) |
145 | .maxScalar(TypeIdx: 0, Ty: s32); |
146 | PhiBuilder.legalFor(Types: {s64}); |
147 | |
148 | getActionDefinitionsBuilder(Opcode: G_FCMP).legalForCartesianProduct(Types0: {s1}, |
149 | Types1: {s32, s64}); |
150 | |
151 | getActionDefinitionsBuilder(Opcode: G_MERGE_VALUES).legalFor(Types: {{s64, s32}}); |
152 | getActionDefinitionsBuilder(Opcode: G_UNMERGE_VALUES).legalFor(Types: {{s32, s64}}); |
153 | |
154 | getActionDefinitionsBuilder(Opcode: G_FPEXT).legalFor(Types: {{s64, s32}}); |
155 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC).legalFor(Types: {{s32, s64}}); |
156 | |
157 | getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI}) |
158 | .legalForCartesianProduct(Types0: {s32}, Types1: {s32, s64}); |
159 | getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP}) |
160 | .legalForCartesianProduct(Types0: {s32, s64}, Types1: {s32}); |
161 | |
162 | getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_GET_FPMODE}) |
163 | .legalFor(Types: {s32}); |
164 | getActionDefinitionsBuilder(Opcode: G_RESET_FPENV).alwaysLegal(); |
165 | getActionDefinitionsBuilder(Opcode: G_SET_FPMODE).customFor(Types: {s32}); |
166 | getActionDefinitionsBuilder(Opcode: G_RESET_FPMODE).custom(); |
167 | } else { |
168 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
169 | .libcallFor(Types: {s32, s64}); |
170 | |
171 | LoadStoreBuilder.maxScalar(TypeIdx: 0, Ty: s32); |
172 | |
173 | getActionDefinitionsBuilder(Opcode: G_FNEG).lowerFor(Types: {s32, s64}); |
174 | |
175 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT).customFor(Types: {s32, s64}); |
176 | |
177 | getActionDefinitionsBuilder(Opcode: G_FCMP).customForCartesianProduct(Types0: {s1}, |
178 | Types1: {s32, s64}); |
179 | |
180 | if (AEABI(ST)) |
181 | setFCmpLibcallsAEABI(); |
182 | else |
183 | setFCmpLibcallsGNU(); |
184 | |
185 | getActionDefinitionsBuilder(Opcode: G_FPEXT).libcallFor(Types: {{s64, s32}}); |
186 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC).libcallFor(Types: {{s32, s64}}); |
187 | |
188 | getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI}) |
189 | .libcallForCartesianProduct(Types0: {s32}, Types1: {s32, s64}); |
190 | getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP}) |
191 | .libcallForCartesianProduct(Types0: {s32, s64}, Types1: {s32}); |
192 | |
193 | getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV}) |
194 | .libcall(); |
195 | getActionDefinitionsBuilder(Opcodes: {G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) |
196 | .libcall(); |
197 | } |
198 | |
199 | // Just expand whatever loads and stores are left. |
200 | LoadStoreBuilder.lower(); |
201 | |
202 | if (!ST.useSoftFloat() && ST.hasVFP4Base()) |
203 | getActionDefinitionsBuilder(Opcode: G_FMA).legalFor(Types: {s32, s64}); |
204 | else |
205 | getActionDefinitionsBuilder(Opcode: G_FMA).libcallFor(Types: {s32, s64}); |
206 | |
207 | getActionDefinitionsBuilder(Opcodes: {G_FREM, G_FPOW}).libcallFor(Types: {s32, s64}); |
208 | |
209 | if (ST.hasV5TOps()) { |
210 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
211 | .legalFor(Types: {s32, s32}) |
212 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
213 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
214 | getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF) |
215 | .lowerFor(Types: {s32, s32}) |
216 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
217 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
218 | } else { |
219 | getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF) |
220 | .libcallFor(Types: {s32, s32}) |
221 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
222 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
223 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
224 | .lowerFor(Types: {s32, s32}) |
225 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
226 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
227 | } |
228 | |
229 | LegacyInfo.computeTables(); |
230 | verify(MII: *ST.getInstrInfo()); |
231 | } |
232 | |
233 | void ARMLegalizerInfo::setFCmpLibcallsAEABI() { |
234 | // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be |
235 | // default-initialized. |
236 | FCmp32Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
237 | FCmp32Libcalls[CmpInst::FCMP_OEQ] = { |
238 | {.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
239 | FCmp32Libcalls[CmpInst::FCMP_OGE] = { |
240 | {.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
241 | FCmp32Libcalls[CmpInst::FCMP_OGT] = { |
242 | {.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
243 | FCmp32Libcalls[CmpInst::FCMP_OLE] = { |
244 | {.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
245 | FCmp32Libcalls[CmpInst::FCMP_OLT] = { |
246 | {.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
247 | FCmp32Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_EQ}}; |
248 | FCmp32Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_EQ}}; |
249 | FCmp32Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::ICMP_EQ}}; |
250 | FCmp32Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_EQ}}; |
251 | FCmp32Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::ICMP_EQ}}; |
252 | FCmp32Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F32, .Predicate: CmpInst::ICMP_EQ}}; |
253 | FCmp32Libcalls[CmpInst::FCMP_UNO] = { |
254 | {.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
255 | FCmp32Libcalls[CmpInst::FCMP_ONE] = { |
256 | {.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
257 | {.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
258 | FCmp32Libcalls[CmpInst::FCMP_UEQ] = { |
259 | {.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
260 | {.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
261 | |
262 | FCmp64Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
263 | FCmp64Libcalls[CmpInst::FCMP_OEQ] = { |
264 | {.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
265 | FCmp64Libcalls[CmpInst::FCMP_OGE] = { |
266 | {.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
267 | FCmp64Libcalls[CmpInst::FCMP_OGT] = { |
268 | {.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
269 | FCmp64Libcalls[CmpInst::FCMP_OLE] = { |
270 | {.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
271 | FCmp64Libcalls[CmpInst::FCMP_OLT] = { |
272 | {.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
273 | FCmp64Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_EQ}}; |
274 | FCmp64Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_EQ}}; |
275 | FCmp64Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::ICMP_EQ}}; |
276 | FCmp64Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_EQ}}; |
277 | FCmp64Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::ICMP_EQ}}; |
278 | FCmp64Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F64, .Predicate: CmpInst::ICMP_EQ}}; |
279 | FCmp64Libcalls[CmpInst::FCMP_UNO] = { |
280 | {.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
281 | FCmp64Libcalls[CmpInst::FCMP_ONE] = { |
282 | {.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
283 | {.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
284 | FCmp64Libcalls[CmpInst::FCMP_UEQ] = { |
285 | {.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}, |
286 | {.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::BAD_ICMP_PREDICATE}}; |
287 | } |
288 | |
289 | void ARMLegalizerInfo::setFCmpLibcallsGNU() { |
290 | // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be |
291 | // default-initialized. |
292 | FCmp32Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
293 | FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::ICMP_EQ}}; |
294 | FCmp32Libcalls[CmpInst::FCMP_OGE] = {{.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::ICMP_SGE}}; |
295 | FCmp32Libcalls[CmpInst::FCMP_OGT] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_SGT}}; |
296 | FCmp32Libcalls[CmpInst::FCMP_OLE] = {{.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::ICMP_SLE}}; |
297 | FCmp32Libcalls[CmpInst::FCMP_OLT] = {{.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_SLT}}; |
298 | FCmp32Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_EQ}}; |
299 | FCmp32Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_SGE}}; |
300 | FCmp32Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F32, .Predicate: CmpInst::ICMP_SGT}}; |
301 | FCmp32Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_SLE}}; |
302 | FCmp32Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F32, .Predicate: CmpInst::ICMP_SLT}}; |
303 | FCmp32Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F32, .Predicate: CmpInst::ICMP_NE}}; |
304 | FCmp32Libcalls[CmpInst::FCMP_UNO] = {{.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_NE}}; |
305 | FCmp32Libcalls[CmpInst::FCMP_ONE] = {{.LibcallID: RTLIB::OGT_F32, .Predicate: CmpInst::ICMP_SGT}, |
306 | {.LibcallID: RTLIB::OLT_F32, .Predicate: CmpInst::ICMP_SLT}}; |
307 | FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{.LibcallID: RTLIB::OEQ_F32, .Predicate: CmpInst::ICMP_EQ}, |
308 | {.LibcallID: RTLIB::UO_F32, .Predicate: CmpInst::ICMP_NE}}; |
309 | |
310 | FCmp64Libcalls.resize(s: CmpInst::LAST_FCMP_PREDICATE + 1); |
311 | FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::ICMP_EQ}}; |
312 | FCmp64Libcalls[CmpInst::FCMP_OGE] = {{.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::ICMP_SGE}}; |
313 | FCmp64Libcalls[CmpInst::FCMP_OGT] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_SGT}}; |
314 | FCmp64Libcalls[CmpInst::FCMP_OLE] = {{.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::ICMP_SLE}}; |
315 | FCmp64Libcalls[CmpInst::FCMP_OLT] = {{.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_SLT}}; |
316 | FCmp64Libcalls[CmpInst::FCMP_ORD] = {{.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_EQ}}; |
317 | FCmp64Libcalls[CmpInst::FCMP_UGE] = {{.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_SGE}}; |
318 | FCmp64Libcalls[CmpInst::FCMP_UGT] = {{.LibcallID: RTLIB::OLE_F64, .Predicate: CmpInst::ICMP_SGT}}; |
319 | FCmp64Libcalls[CmpInst::FCMP_ULE] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_SLE}}; |
320 | FCmp64Libcalls[CmpInst::FCMP_ULT] = {{.LibcallID: RTLIB::OGE_F64, .Predicate: CmpInst::ICMP_SLT}}; |
321 | FCmp64Libcalls[CmpInst::FCMP_UNE] = {{.LibcallID: RTLIB::UNE_F64, .Predicate: CmpInst::ICMP_NE}}; |
322 | FCmp64Libcalls[CmpInst::FCMP_UNO] = {{.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_NE}}; |
323 | FCmp64Libcalls[CmpInst::FCMP_ONE] = {{.LibcallID: RTLIB::OGT_F64, .Predicate: CmpInst::ICMP_SGT}, |
324 | {.LibcallID: RTLIB::OLT_F64, .Predicate: CmpInst::ICMP_SLT}}; |
325 | FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{.LibcallID: RTLIB::OEQ_F64, .Predicate: CmpInst::ICMP_EQ}, |
326 | {.LibcallID: RTLIB::UO_F64, .Predicate: CmpInst::ICMP_NE}}; |
327 | } |
328 | |
329 | ARMLegalizerInfo::FCmpLibcallsList |
330 | ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, |
331 | unsigned Size) const { |
332 | assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate" ); |
333 | if (Size == 32) |
334 | return FCmp32Libcalls[Predicate]; |
335 | if (Size == 64) |
336 | return FCmp64Libcalls[Predicate]; |
337 | llvm_unreachable("Unsupported size for FCmp predicate" ); |
338 | } |
339 | |
340 | bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
341 | LostDebugLocObserver &LocObserver) const { |
342 | using namespace TargetOpcode; |
343 | |
344 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
345 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
346 | LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); |
347 | |
348 | switch (MI.getOpcode()) { |
349 | default: |
350 | return false; |
351 | case G_SREM: |
352 | case G_UREM: { |
353 | Register OriginalResult = MI.getOperand(i: 0).getReg(); |
354 | auto Size = MRI.getType(Reg: OriginalResult).getSizeInBits(); |
355 | if (Size != 32) |
356 | return false; |
357 | |
358 | auto Libcall = |
359 | MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; |
360 | |
361 | // Our divmod libcalls return a struct containing the quotient and the |
362 | // remainder. Create a new, unused register for the quotient and use the |
363 | // destination of the original instruction for the remainder. |
364 | Type *ArgTy = Type::getInt32Ty(C&: Ctx); |
365 | StructType *RetTy = StructType::get(Context&: Ctx, Elements: {ArgTy, ArgTy}, /* Packed */ isPacked: true); |
366 | Register RetRegs[] = {MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 32)), |
367 | OriginalResult}; |
368 | auto Status = createLibcall(MIRBuilder, Libcall, Result: {RetRegs, RetTy, 0}, |
369 | Args: {{MI.getOperand(i: 1).getReg(), ArgTy, 0}, |
370 | {MI.getOperand(i: 2).getReg(), ArgTy, 0}}, |
371 | LocObserver, MI: &MI); |
372 | if (Status != LegalizerHelper::Legalized) |
373 | return false; |
374 | break; |
375 | } |
376 | case G_FCMP: { |
377 | assert(MRI.getType(MI.getOperand(2).getReg()) == |
378 | MRI.getType(MI.getOperand(3).getReg()) && |
379 | "Mismatched operands for G_FCMP" ); |
380 | auto OpSize = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getSizeInBits(); |
381 | |
382 | auto OriginalResult = MI.getOperand(i: 0).getReg(); |
383 | auto Predicate = |
384 | static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate()); |
385 | auto Libcalls = getFCmpLibcalls(Predicate, Size: OpSize); |
386 | |
387 | if (Libcalls.empty()) { |
388 | assert((Predicate == CmpInst::FCMP_TRUE || |
389 | Predicate == CmpInst::FCMP_FALSE) && |
390 | "Predicate needs libcalls, but none specified" ); |
391 | MIRBuilder.buildConstant(Res: OriginalResult, |
392 | Val: Predicate == CmpInst::FCMP_TRUE ? 1 : 0); |
393 | MI.eraseFromParent(); |
394 | return true; |
395 | } |
396 | |
397 | assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size" ); |
398 | auto *ArgTy = OpSize == 32 ? Type::getFloatTy(C&: Ctx) : Type::getDoubleTy(C&: Ctx); |
399 | auto *RetTy = Type::getInt32Ty(C&: Ctx); |
400 | |
401 | SmallVector<Register, 2> Results; |
402 | for (auto Libcall : Libcalls) { |
403 | auto LibcallResult = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 32)); |
404 | auto Status = createLibcall(MIRBuilder, Libcall: Libcall.LibcallID, |
405 | Result: {LibcallResult, RetTy, 0}, |
406 | Args: {{MI.getOperand(i: 2).getReg(), ArgTy, 0}, |
407 | {MI.getOperand(i: 3).getReg(), ArgTy, 0}}, |
408 | LocObserver, MI: &MI); |
409 | |
410 | if (Status != LegalizerHelper::Legalized) |
411 | return false; |
412 | |
413 | auto ProcessedResult = |
414 | Libcalls.size() == 1 |
415 | ? OriginalResult |
416 | : MRI.createGenericVirtualRegister(Ty: MRI.getType(Reg: OriginalResult)); |
417 | |
418 | // We have a result, but we need to transform it into a proper 1-bit 0 or |
419 | // 1, taking into account the different peculiarities of the values |
420 | // returned by the comparison functions. |
421 | CmpInst::Predicate ResultPred = Libcall.Predicate; |
422 | if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { |
423 | // We have a nice 0 or 1, and we just need to truncate it back to 1 bit |
424 | // to keep the types consistent. |
425 | MIRBuilder.buildTrunc(Res: ProcessedResult, Op: LibcallResult); |
426 | } else { |
427 | // We need to compare against 0. |
428 | assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate" ); |
429 | auto Zero = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: 0); |
430 | MIRBuilder.buildICmp(Pred: ResultPred, Res: ProcessedResult, Op0: LibcallResult, Op1: Zero); |
431 | } |
432 | Results.push_back(Elt: ProcessedResult); |
433 | } |
434 | |
435 | if (Results.size() != 1) { |
436 | assert(Results.size() == 2 && "Unexpected number of results" ); |
437 | MIRBuilder.buildOr(Dst: OriginalResult, Src0: Results[0], Src1: Results[1]); |
438 | } |
439 | break; |
440 | } |
441 | case G_CONSTANT: { |
442 | const ConstantInt *ConstVal = MI.getOperand(i: 1).getCImm(); |
443 | uint64_t ImmVal = ConstVal->getZExtValue(); |
444 | if (ConstantMaterializationCost(Val: ImmVal, Subtarget: &ST) > 2 && !ST.genExecuteOnly()) |
445 | return Helper.lowerConstant(MI) == LegalizerHelper::Legalized; |
446 | return true; |
447 | } |
448 | case G_FCONSTANT: { |
449 | // Convert to integer constants, while preserving the binary representation. |
450 | auto AsInteger = |
451 | MI.getOperand(i: 1).getFPImm()->getValueAPF().bitcastToAPInt(); |
452 | MIRBuilder.buildConstant(Res: MI.getOperand(i: 0), |
453 | Val: *ConstantInt::get(Context&: Ctx, V: AsInteger)); |
454 | break; |
455 | } |
456 | case G_SET_FPMODE: { |
457 | // New FPSCR = (FPSCR & FPStatusBits) | (Modes & ~FPStatusBits) |
458 | LLT FPEnvTy = LLT::scalar(SizeInBits: 32); |
459 | auto FPEnv = MRI.createGenericVirtualRegister(Ty: FPEnvTy); |
460 | Register Modes = MI.getOperand(i: 0).getReg(); |
461 | MIRBuilder.buildGetFPEnv(Dst: FPEnv); |
462 | auto StatusBitMask = MIRBuilder.buildConstant(Res: FPEnvTy, Val: ARM::FPStatusBits); |
463 | auto StatusBits = MIRBuilder.buildAnd(Dst: FPEnvTy, Src0: FPEnv, Src1: StatusBitMask); |
464 | auto NotStatusBitMask = |
465 | MIRBuilder.buildConstant(Res: FPEnvTy, Val: ~ARM::FPStatusBits); |
466 | auto FPModeBits = MIRBuilder.buildAnd(Dst: FPEnvTy, Src0: Modes, Src1: NotStatusBitMask); |
467 | auto NewFPSCR = MIRBuilder.buildOr(Dst: FPEnvTy, Src0: StatusBits, Src1: FPModeBits); |
468 | MIRBuilder.buildSetFPEnv(Src: NewFPSCR); |
469 | break; |
470 | } |
471 | case G_RESET_FPMODE: { |
472 | // To get the default FP mode all control bits are cleared: |
473 | // FPSCR = FPSCR & (FPStatusBits | FPReservedBits) |
474 | LLT FPEnvTy = LLT::scalar(SizeInBits: 32); |
475 | auto FPEnv = MIRBuilder.buildGetFPEnv(Dst: FPEnvTy); |
476 | auto NotModeBitMask = MIRBuilder.buildConstant( |
477 | Res: FPEnvTy, Val: ARM::FPStatusBits | ARM::FPReservedBits); |
478 | auto NewFPSCR = MIRBuilder.buildAnd(Dst: FPEnvTy, Src0: FPEnv, Src1: NotModeBitMask); |
479 | MIRBuilder.buildSetFPEnv(Src: NewFPSCR); |
480 | break; |
481 | } |
482 | } |
483 | |
484 | MI.eraseFromParent(); |
485 | return true; |
486 | } |
487 | |