1 | //===- MipsLegalizerInfo.cpp ------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the Machinelegalizer class for Mips. |
10 | /// \todo This should be generated by TableGen. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MipsLegalizerInfo.h" |
14 | #include "MipsTargetMachine.h" |
15 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
16 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
17 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
18 | #include "llvm/IR/IntrinsicsMips.h" |
19 | |
20 | using namespace llvm; |
21 | |
22 | struct TypesAndMemOps { |
23 | LLT ValTy; |
24 | LLT PtrTy; |
25 | unsigned MemSize; |
26 | bool SystemSupportsUnalignedAccess; |
27 | }; |
28 | |
29 | // Assumes power of 2 memory size. Subtargets that have only naturally-aligned |
30 | // memory access need to perform additional legalization here. |
31 | static bool isUnalignedMemmoryAccess(uint64_t MemSize, uint64_t AlignInBits) { |
32 | assert(isPowerOf2_64(MemSize) && "Expected power of 2 memory size" ); |
33 | assert(isPowerOf2_64(AlignInBits) && "Expected power of 2 align" ); |
34 | if (MemSize > AlignInBits) |
35 | return true; |
36 | return false; |
37 | } |
38 | |
39 | static bool |
40 | CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query, |
41 | std::initializer_list<TypesAndMemOps> SupportedValues) { |
42 | unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); |
43 | |
44 | // Non power of two memory access is never legal. |
45 | if (!isPowerOf2_64(Value: QueryMemSize)) |
46 | return false; |
47 | |
48 | for (auto &Val : SupportedValues) { |
49 | if (Val.ValTy != Query.Types[0]) |
50 | continue; |
51 | if (Val.PtrTy != Query.Types[1]) |
52 | continue; |
53 | if (Val.MemSize != QueryMemSize) |
54 | continue; |
55 | if (!Val.SystemSupportsUnalignedAccess && |
56 | isUnalignedMemmoryAccess(MemSize: QueryMemSize, AlignInBits: Query.MMODescrs[0].AlignInBits)) |
57 | return false; |
58 | return true; |
59 | } |
60 | return false; |
61 | } |
62 | |
63 | static bool CheckTyN(unsigned N, const LegalityQuery &Query, |
64 | std::initializer_list<LLT> SupportedValues) { |
65 | return llvm::is_contained(Set: SupportedValues, Element: Query.Types[N]); |
66 | } |
67 | |
68 | MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { |
69 | using namespace TargetOpcode; |
70 | |
71 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
72 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
73 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
74 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
75 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
76 | const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
77 | const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
78 | const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
79 | const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
80 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 32); |
81 | |
82 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_MUL}) |
83 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
84 | if (CheckTyN(N: 0, Query, SupportedValues: {s32})) |
85 | return true; |
86 | if (ST.hasMSA() && CheckTyN(N: 0, Query, SupportedValues: {v16s8, v8s16, v4s32, v2s64})) |
87 | return true; |
88 | return false; |
89 | }) |
90 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
91 | |
92 | getActionDefinitionsBuilder(Opcodes: {G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO}) |
93 | .lowerFor(Types: {{s32, s1}}); |
94 | |
95 | getActionDefinitionsBuilder(Opcode: G_UMULH) |
96 | .legalFor(Types: {s32}) |
97 | .maxScalar(TypeIdx: 0, Ty: s32); |
98 | |
99 | // MIPS32r6 does not have alignment restrictions for memory access. |
100 | // For MIPS32r5 and older memory access must be naturally-aligned i.e. aligned |
101 | // to at least a multiple of its own size. There is however a two instruction |
102 | // combination that performs 4 byte unaligned access (lwr/lwl and swl/swr) |
103 | // therefore 4 byte load and store are legal and will use NoAlignRequirements. |
104 | bool NoAlignRequirements = true; |
105 | |
106 | getActionDefinitionsBuilder(Opcodes: {G_LOAD, G_STORE}) |
107 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
108 | if (CheckTy0Ty1MemSizeAlign( |
109 | Query, SupportedValues: {{.ValTy: s32, .PtrTy: p0, .MemSize: 8, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
110 | {.ValTy: s32, .PtrTy: p0, .MemSize: 16, .SystemSupportsUnalignedAccess: ST.systemSupportsUnalignedAccess()}, |
111 | {.ValTy: s32, .PtrTy: p0, .MemSize: 32, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
112 | {.ValTy: p0, .PtrTy: p0, .MemSize: 32, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
113 | {.ValTy: s64, .PtrTy: p0, .MemSize: 64, .SystemSupportsUnalignedAccess: ST.systemSupportsUnalignedAccess()}})) |
114 | return true; |
115 | if (ST.hasMSA() && CheckTy0Ty1MemSizeAlign( |
116 | Query, SupportedValues: {{.ValTy: v16s8, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
117 | {.ValTy: v8s16, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
118 | {.ValTy: v4s32, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}, |
119 | {.ValTy: v2s64, .PtrTy: p0, .MemSize: 128, .SystemSupportsUnalignedAccess: NoAlignRequirements}})) |
120 | return true; |
121 | return false; |
122 | }) |
123 | // Custom lower scalar memory access, up to 8 bytes, for: |
124 | // - non-power-of-2 MemSizes |
125 | // - unaligned 2 or 8 byte MemSizes for MIPS32r5 and older |
126 | .customIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
127 | if (!Query.Types[0].isScalar() || Query.Types[1] != p0 || |
128 | Query.Types[0] == s1) |
129 | return false; |
130 | |
131 | unsigned Size = Query.Types[0].getSizeInBits(); |
132 | unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); |
133 | assert(QueryMemSize <= Size && "Scalar can't hold MemSize" ); |
134 | |
135 | if (Size > 64 || QueryMemSize > 64) |
136 | return false; |
137 | |
138 | if (!isPowerOf2_64(Value: Query.MMODescrs[0].MemoryTy.getSizeInBits())) |
139 | return true; |
140 | |
141 | if (!ST.systemSupportsUnalignedAccess() && |
142 | isUnalignedMemmoryAccess(MemSize: QueryMemSize, |
143 | AlignInBits: Query.MMODescrs[0].AlignInBits)) { |
144 | assert(QueryMemSize != 32 && "4 byte load and store are legal" ); |
145 | return true; |
146 | } |
147 | |
148 | return false; |
149 | }) |
150 | .minScalar(TypeIdx: 0, Ty: s32) |
151 | .lower(); |
152 | |
153 | getActionDefinitionsBuilder(Opcode: G_IMPLICIT_DEF) |
154 | .legalFor(Types: {s32, s64}); |
155 | |
156 | getActionDefinitionsBuilder(Opcode: G_UNMERGE_VALUES) |
157 | .legalFor(Types: {{s32, s64}}); |
158 | |
159 | getActionDefinitionsBuilder(Opcode: G_MERGE_VALUES) |
160 | .legalFor(Types: {{s64, s32}}); |
161 | |
162 | getActionDefinitionsBuilder(Opcodes: {G_ZEXTLOAD, G_SEXTLOAD}) |
163 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, |
164 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}}) |
165 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
166 | |
167 | getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT}) |
168 | .legalIf(Predicate: [](const LegalityQuery &Query) { return false; }) |
169 | .maxScalar(TypeIdx: 0, Ty: s32); |
170 | |
171 | getActionDefinitionsBuilder(Opcode: G_TRUNC) |
172 | .legalIf(Predicate: [](const LegalityQuery &Query) { return false; }) |
173 | .maxScalar(TypeIdx: 1, Ty: s32); |
174 | |
175 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
176 | .legalForCartesianProduct(Types0: {p0, s32, s64}, Types1: {s32}) |
177 | .minScalar(TypeIdx: 0, Ty: s32) |
178 | .minScalar(TypeIdx: 1, Ty: s32); |
179 | |
180 | getActionDefinitionsBuilder(Opcode: G_BRCOND) |
181 | .legalFor(Types: {s32}) |
182 | .minScalar(TypeIdx: 0, Ty: s32); |
183 | |
184 | getActionDefinitionsBuilder(Opcode: G_BRJT) |
185 | .legalFor(Types: {{p0, s32}}); |
186 | |
187 | getActionDefinitionsBuilder(Opcode: G_BRINDIRECT) |
188 | .legalFor(Types: {p0}); |
189 | |
190 | getActionDefinitionsBuilder(Opcode: G_PHI) |
191 | .legalFor(Types: {p0, s32, s64}) |
192 | .minScalar(TypeIdx: 0, Ty: s32); |
193 | |
194 | getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR}) |
195 | .legalFor(Types: {s32}) |
196 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
197 | |
198 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM}) |
199 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
200 | if (CheckTyN(N: 0, Query, SupportedValues: {s32})) |
201 | return true; |
202 | if (ST.hasMSA() && CheckTyN(N: 0, Query, SupportedValues: {v16s8, v8s16, v4s32, v2s64})) |
203 | return true; |
204 | return false; |
205 | }) |
206 | .minScalar(TypeIdx: 0, Ty: s32) |
207 | .libcallFor(Types: {s64}); |
208 | |
209 | getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR}) |
210 | .legalFor(Types: {{s32, s32}}) |
211 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
212 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
213 | |
214 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
215 | .legalForCartesianProduct(Types0: {s32}, Types1: {s32, p0}) |
216 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32) |
217 | .minScalar(TypeIdx: 0, Ty: s32); |
218 | |
219 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
220 | .legalFor(Types: {s32}) |
221 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32); |
222 | |
223 | getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_INTTOPTR}) |
224 | .legalFor(Types: {{p0, s32}}); |
225 | |
226 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
227 | .legalFor(Types: {{s32, p0}}); |
228 | |
229 | getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX) |
230 | .legalFor(Types: {p0}); |
231 | |
232 | getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE}) |
233 | .legalFor(Types: {p0}); |
234 | |
235 | getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC) |
236 | .lowerFor(Types: {{p0, s32}}); |
237 | |
238 | getActionDefinitionsBuilder(Opcode: G_VASTART) |
239 | .legalFor(Types: {p0}); |
240 | |
241 | getActionDefinitionsBuilder(Opcode: G_BSWAP) |
242 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
243 | if (ST.hasMips32r2() && CheckTyN(N: 0, Query, SupportedValues: {s32})) |
244 | return true; |
245 | return false; |
246 | }) |
247 | .lowerIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
248 | if (!ST.hasMips32r2() && CheckTyN(N: 0, Query, SupportedValues: {s32})) |
249 | return true; |
250 | return false; |
251 | }) |
252 | .maxScalar(TypeIdx: 0, Ty: s32); |
253 | |
254 | getActionDefinitionsBuilder(Opcode: G_BITREVERSE) |
255 | .lowerFor(Types: {s32}) |
256 | .maxScalar(TypeIdx: 0, Ty: s32); |
257 | |
258 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
259 | .legalFor(Types: {{s32, s32}}) |
260 | .maxScalar(TypeIdx: 0, Ty: s32) |
261 | .maxScalar(TypeIdx: 1, Ty: s32); |
262 | getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF) |
263 | .lowerFor(Types: {{s32, s32}}); |
264 | |
265 | getActionDefinitionsBuilder(Opcode: G_CTTZ) |
266 | .lowerFor(Types: {{s32, s32}}) |
267 | .maxScalar(TypeIdx: 0, Ty: s32) |
268 | .maxScalar(TypeIdx: 1, Ty: s32); |
269 | getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF) |
270 | .lowerFor(Types: {{s32, s32}, {s64, s64}}); |
271 | |
272 | getActionDefinitionsBuilder(Opcode: G_CTPOP) |
273 | .lowerFor(Types: {{s32, s32}}) |
274 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32) |
275 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
276 | |
277 | // FP instructions |
278 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
279 | .legalFor(Types: {s32, s64}); |
280 | |
281 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT}) |
282 | .legalIf(Predicate: [=, &ST](const LegalityQuery &Query) { |
283 | if (CheckTyN(N: 0, Query, SupportedValues: {s32, s64})) |
284 | return true; |
285 | if (ST.hasMSA() && CheckTyN(N: 0, Query, SupportedValues: {v16s8, v8s16, v4s32, v2s64})) |
286 | return true; |
287 | return false; |
288 | }); |
289 | |
290 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
291 | .legalFor(Types: {{s32, s32}, {s32, s64}}) |
292 | .minScalar(TypeIdx: 0, Ty: s32); |
293 | |
294 | getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR}) |
295 | .libcallFor(Types: {s32, s64}); |
296 | |
297 | getActionDefinitionsBuilder(Opcode: G_FPEXT) |
298 | .legalFor(Types: {{s64, s32}}); |
299 | |
300 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC) |
301 | .legalFor(Types: {{s32, s64}}); |
302 | |
303 | // FP to int conversion instructions |
304 | getActionDefinitionsBuilder(Opcode: G_FPTOSI) |
305 | .legalForCartesianProduct(Types0: {s32}, Types1: {s64, s32}) |
306 | .libcallForCartesianProduct(Types0: {s64}, Types1: {s64, s32}) |
307 | .minScalar(TypeIdx: 0, Ty: s32); |
308 | |
309 | getActionDefinitionsBuilder(Opcode: G_FPTOUI) |
310 | .libcallForCartesianProduct(Types0: {s64}, Types1: {s64, s32}) |
311 | .lowerForCartesianProduct(Types0: {s32}, Types1: {s64, s32}) |
312 | .minScalar(TypeIdx: 0, Ty: s32); |
313 | |
314 | // Int to FP conversion instructions |
315 | getActionDefinitionsBuilder(Opcode: G_SITOFP) |
316 | .legalForCartesianProduct(Types0: {s64, s32}, Types1: {s32}) |
317 | .libcallForCartesianProduct(Types0: {s64, s32}, Types1: {s64}) |
318 | .minScalar(TypeIdx: 1, Ty: s32); |
319 | |
320 | getActionDefinitionsBuilder(Opcode: G_UITOFP) |
321 | .libcallForCartesianProduct(Types0: {s64, s32}, Types1: {s64}) |
322 | .customForCartesianProduct(Types0: {s64, s32}, Types1: {s32}) |
323 | .minScalar(TypeIdx: 1, Ty: s32); |
324 | |
325 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
326 | |
327 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
328 | |
329 | getLegacyLegalizerInfo().computeTables(); |
330 | verify(MII: *ST.getInstrInfo()); |
331 | } |
332 | |
333 | bool MipsLegalizerInfo::legalizeCustom( |
334 | LegalizerHelper &Helper, MachineInstr &MI, |
335 | LostDebugLocObserver &LocObserver) const { |
336 | using namespace TargetOpcode; |
337 | |
338 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
339 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
340 | |
341 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
342 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
343 | |
344 | switch (MI.getOpcode()) { |
345 | case G_LOAD: |
346 | case G_STORE: { |
347 | unsigned MemSize = (**MI.memoperands_begin()).getSize().getValue(); |
348 | Register Val = MI.getOperand(i: 0).getReg(); |
349 | unsigned Size = MRI.getType(Reg: Val).getSizeInBits(); |
350 | |
351 | MachineMemOperand *MMOBase = *MI.memoperands_begin(); |
352 | |
353 | assert(MemSize <= 8 && "MemSize is too large" ); |
354 | assert(Size <= 64 && "Scalar size is too large" ); |
355 | |
356 | // Split MemSize into two, P2HalfMemSize is largest power of two smaller |
357 | // then MemSize. e.g. 8 = 4 + 4 , 6 = 4 + 2, 3 = 2 + 1. |
358 | unsigned P2HalfMemSize, RemMemSize; |
359 | if (isPowerOf2_64(Value: MemSize)) { |
360 | P2HalfMemSize = RemMemSize = MemSize / 2; |
361 | } else { |
362 | P2HalfMemSize = 1 << Log2_32(Value: MemSize); |
363 | RemMemSize = MemSize - P2HalfMemSize; |
364 | } |
365 | |
366 | Register BaseAddr = MI.getOperand(i: 1).getReg(); |
367 | LLT PtrTy = MRI.getType(Reg: BaseAddr); |
368 | MachineFunction &MF = MIRBuilder.getMF(); |
369 | |
370 | auto P2HalfMemOp = MF.getMachineMemOperand(MMO: MMOBase, Offset: 0, Size: P2HalfMemSize); |
371 | auto RemMemOp = MF.getMachineMemOperand(MMO: MMOBase, Offset: P2HalfMemSize, Size: RemMemSize); |
372 | |
373 | if (MI.getOpcode() == G_STORE) { |
374 | // Widen Val to s32 or s64 in order to create legal G_LSHR or G_UNMERGE. |
375 | if (Size < 32) |
376 | Val = MIRBuilder.buildAnyExt(Res: s32, Op: Val).getReg(Idx: 0); |
377 | if (Size > 32 && Size < 64) |
378 | Val = MIRBuilder.buildAnyExt(Res: s64, Op: Val).getReg(Idx: 0); |
379 | |
380 | auto C_P2HalfMemSize = MIRBuilder.buildConstant(Res: s32, Val: P2HalfMemSize); |
381 | auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: BaseAddr, Op1: C_P2HalfMemSize); |
382 | |
383 | if (MI.getOpcode() == G_STORE && MemSize <= 4) { |
384 | MIRBuilder.buildStore(Val, Addr: BaseAddr, MMO&: *P2HalfMemOp); |
385 | auto C_P2Half_InBits = MIRBuilder.buildConstant(Res: s32, Val: P2HalfMemSize * 8); |
386 | auto Shift = MIRBuilder.buildLShr(Dst: s32, Src0: Val, Src1: C_P2Half_InBits); |
387 | MIRBuilder.buildStore(Val: Shift, Addr, MMO&: *RemMemOp); |
388 | } else { |
389 | auto Unmerge = MIRBuilder.buildUnmerge(Res: s32, Op: Val); |
390 | MIRBuilder.buildStore(Val: Unmerge.getReg(Idx: 0), Addr: BaseAddr, MMO&: *P2HalfMemOp); |
391 | MIRBuilder.buildStore(Val: Unmerge.getReg(Idx: 1), Addr, MMO&: *RemMemOp); |
392 | } |
393 | } |
394 | |
395 | if (MI.getOpcode() == G_LOAD) { |
396 | |
397 | if (MemSize <= 4) { |
398 | // This is anyextending load, use 4 byte lwr/lwl. |
399 | auto *Load4MMO = MF.getMachineMemOperand(MMO: MMOBase, Offset: 0, Size: 4); |
400 | |
401 | if (Size == 32) |
402 | MIRBuilder.buildLoad(Res: Val, Addr: BaseAddr, MMO&: *Load4MMO); |
403 | else { |
404 | auto Load = MIRBuilder.buildLoad(Res: s32, Addr: BaseAddr, MMO&: *Load4MMO); |
405 | MIRBuilder.buildTrunc(Res: Val, Op: Load.getReg(Idx: 0)); |
406 | } |
407 | |
408 | } else { |
409 | auto C_P2HalfMemSize = MIRBuilder.buildConstant(Res: s32, Val: P2HalfMemSize); |
410 | auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: BaseAddr, Op1: C_P2HalfMemSize); |
411 | |
412 | auto Load_P2Half = MIRBuilder.buildLoad(Res: s32, Addr: BaseAddr, MMO&: *P2HalfMemOp); |
413 | auto Load_Rem = MIRBuilder.buildLoad(Res: s32, Addr, MMO&: *RemMemOp); |
414 | |
415 | if (Size == 64) |
416 | MIRBuilder.buildMergeLikeInstr(Res: Val, Ops: {Load_P2Half, Load_Rem}); |
417 | else { |
418 | auto Merge = |
419 | MIRBuilder.buildMergeLikeInstr(Res: s64, Ops: {Load_P2Half, Load_Rem}); |
420 | MIRBuilder.buildTrunc(Res: Val, Op: Merge); |
421 | } |
422 | } |
423 | } |
424 | MI.eraseFromParent(); |
425 | break; |
426 | } |
427 | case G_UITOFP: { |
428 | Register Dst = MI.getOperand(i: 0).getReg(); |
429 | Register Src = MI.getOperand(i: 1).getReg(); |
430 | LLT DstTy = MRI.getType(Reg: Dst); |
431 | LLT SrcTy = MRI.getType(Reg: Src); |
432 | |
433 | if (SrcTy != s32) |
434 | return false; |
435 | if (DstTy != s32 && DstTy != s64) |
436 | return false; |
437 | |
438 | // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert |
439 | // unsigned to double. Mantissa has 52 bits so we use following trick: |
440 | // First make floating point bit mask 0x43300000ABCDEFGH. |
441 | // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 . |
442 | // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it. |
443 | // Done. Trunc double to float if needed. |
444 | |
445 | auto C_HiMask = MIRBuilder.buildConstant(Res: s32, UINT32_C(0x43300000)); |
446 | auto Bitcast = |
447 | MIRBuilder.buildMergeLikeInstr(Res: s64, Ops: {Src, C_HiMask.getReg(Idx: 0)}); |
448 | |
449 | MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant( |
450 | Res: s64, Val: llvm::bit_cast<double>(UINT64_C(0x4330000000000000))); |
451 | |
452 | if (DstTy == s64) |
453 | MIRBuilder.buildFSub(Dst, Src0: Bitcast, Src1: TwoP52FP); |
454 | else { |
455 | MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(Dst: s64, Src0: Bitcast, Src1: TwoP52FP); |
456 | MIRBuilder.buildFPTrunc(Res: Dst, Op: ResF64); |
457 | } |
458 | |
459 | MI.eraseFromParent(); |
460 | break; |
461 | } |
462 | default: |
463 | return false; |
464 | } |
465 | |
466 | return true; |
467 | } |
468 | |
469 | static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode, |
470 | MachineIRBuilder &MIRBuilder, |
471 | const MipsSubtarget &ST) { |
472 | assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA." ); |
473 | if (!MIRBuilder.buildInstr(Opcode) |
474 | .add(MO: MI.getOperand(i: 0)) |
475 | .add(MO: MI.getOperand(i: 2)) |
476 | .add(MO: MI.getOperand(i: 3)) |
477 | .constrainAllUses(TII: MIRBuilder.getTII(), TRI: *ST.getRegisterInfo(), |
478 | RBI: *ST.getRegBankInfo())) |
479 | return false; |
480 | MI.eraseFromParent(); |
481 | return true; |
482 | } |
483 | |
484 | static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, |
485 | MachineIRBuilder &MIRBuilder, |
486 | const MipsSubtarget &ST) { |
487 | assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA." ); |
488 | MIRBuilder.buildInstr(Opcode) |
489 | .add(MO: MI.getOperand(i: 0)) |
490 | .add(MO: MI.getOperand(i: 2)) |
491 | .add(MO: MI.getOperand(i: 3)); |
492 | MI.eraseFromParent(); |
493 | return true; |
494 | } |
495 | |
496 | static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, |
497 | MachineIRBuilder &MIRBuilder, |
498 | const MipsSubtarget &ST) { |
499 | assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA." ); |
500 | MIRBuilder.buildInstr(Opcode) |
501 | .add(MO: MI.getOperand(i: 0)) |
502 | .add(MO: MI.getOperand(i: 2)); |
503 | MI.eraseFromParent(); |
504 | return true; |
505 | } |
506 | |
507 | bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
508 | MachineInstr &MI) const { |
509 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
510 | const MipsSubtarget &ST = MI.getMF()->getSubtarget<MipsSubtarget>(); |
511 | |
512 | switch (cast<GIntrinsic>(Val&: MI).getIntrinsicID()) { |
513 | case Intrinsic::vacopy: { |
514 | MachinePointerInfo MPO; |
515 | LLT PtrTy = LLT::pointer(AddressSpace: 0, SizeInBits: 32); |
516 | auto Tmp = |
517 | MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: 2), |
518 | MMO&: *MI.getMF()->getMachineMemOperand( |
519 | PtrInfo: MPO, f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Align(4))); |
520 | MIRBuilder.buildStore(Val: Tmp, Addr: MI.getOperand(i: 1), |
521 | MMO&: *MI.getMF()->getMachineMemOperand( |
522 | PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Align(4))); |
523 | MI.eraseFromParent(); |
524 | return true; |
525 | } |
526 | case Intrinsic::mips_addv_b: |
527 | case Intrinsic::mips_addv_h: |
528 | case Intrinsic::mips_addv_w: |
529 | case Intrinsic::mips_addv_d: |
530 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_ADD, MIRBuilder, ST); |
531 | case Intrinsic::mips_addvi_b: |
532 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_B, MIRBuilder, ST); |
533 | case Intrinsic::mips_addvi_h: |
534 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_H, MIRBuilder, ST); |
535 | case Intrinsic::mips_addvi_w: |
536 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_W, MIRBuilder, ST); |
537 | case Intrinsic::mips_addvi_d: |
538 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::ADDVI_D, MIRBuilder, ST); |
539 | case Intrinsic::mips_subv_b: |
540 | case Intrinsic::mips_subv_h: |
541 | case Intrinsic::mips_subv_w: |
542 | case Intrinsic::mips_subv_d: |
543 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_SUB, MIRBuilder, ST); |
544 | case Intrinsic::mips_subvi_b: |
545 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_B, MIRBuilder, ST); |
546 | case Intrinsic::mips_subvi_h: |
547 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_H, MIRBuilder, ST); |
548 | case Intrinsic::mips_subvi_w: |
549 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_W, MIRBuilder, ST); |
550 | case Intrinsic::mips_subvi_d: |
551 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::SUBVI_D, MIRBuilder, ST); |
552 | case Intrinsic::mips_mulv_b: |
553 | case Intrinsic::mips_mulv_h: |
554 | case Intrinsic::mips_mulv_w: |
555 | case Intrinsic::mips_mulv_d: |
556 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_MUL, MIRBuilder, ST); |
557 | case Intrinsic::mips_div_s_b: |
558 | case Intrinsic::mips_div_s_h: |
559 | case Intrinsic::mips_div_s_w: |
560 | case Intrinsic::mips_div_s_d: |
561 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_SDIV, MIRBuilder, ST); |
562 | case Intrinsic::mips_mod_s_b: |
563 | case Intrinsic::mips_mod_s_h: |
564 | case Intrinsic::mips_mod_s_w: |
565 | case Intrinsic::mips_mod_s_d: |
566 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_SREM, MIRBuilder, ST); |
567 | case Intrinsic::mips_div_u_b: |
568 | case Intrinsic::mips_div_u_h: |
569 | case Intrinsic::mips_div_u_w: |
570 | case Intrinsic::mips_div_u_d: |
571 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_UDIV, MIRBuilder, ST); |
572 | case Intrinsic::mips_mod_u_b: |
573 | case Intrinsic::mips_mod_u_h: |
574 | case Intrinsic::mips_mod_u_w: |
575 | case Intrinsic::mips_mod_u_d: |
576 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_UREM, MIRBuilder, ST); |
577 | case Intrinsic::mips_fadd_w: |
578 | case Intrinsic::mips_fadd_d: |
579 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FADD, MIRBuilder, ST); |
580 | case Intrinsic::mips_fsub_w: |
581 | case Intrinsic::mips_fsub_d: |
582 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FSUB, MIRBuilder, ST); |
583 | case Intrinsic::mips_fmul_w: |
584 | case Intrinsic::mips_fmul_d: |
585 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FMUL, MIRBuilder, ST); |
586 | case Intrinsic::mips_fdiv_w: |
587 | case Intrinsic::mips_fdiv_d: |
588 | return MSA3OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FDIV, MIRBuilder, ST); |
589 | case Intrinsic::mips_fmax_a_w: |
590 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::FMAX_A_W, MIRBuilder, ST); |
591 | case Intrinsic::mips_fmax_a_d: |
592 | return SelectMSA3OpIntrinsic(MI, Opcode: Mips::FMAX_A_D, MIRBuilder, ST); |
593 | case Intrinsic::mips_fsqrt_w: |
594 | return MSA2OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FSQRT, MIRBuilder, ST); |
595 | case Intrinsic::mips_fsqrt_d: |
596 | return MSA2OpIntrinsicToGeneric(MI, Opcode: TargetOpcode::G_FSQRT, MIRBuilder, ST); |
597 | default: |
598 | break; |
599 | } |
600 | return true; |
601 | } |
602 | |