1 | //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the auto-upgrade helper functions. |
10 | // This is where deprecated IR intrinsics and other IR features are updated to |
11 | // current specifications. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/IR/AutoUpgrade.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/ADT/StringSwitch.h" |
18 | #include "llvm/BinaryFormat/Dwarf.h" |
19 | #include "llvm/IR/AttributeMask.h" |
20 | #include "llvm/IR/Constants.h" |
21 | #include "llvm/IR/DebugInfo.h" |
22 | #include "llvm/IR/DebugInfoMetadata.h" |
23 | #include "llvm/IR/DiagnosticInfo.h" |
24 | #include "llvm/IR/Function.h" |
25 | #include "llvm/IR/IRBuilder.h" |
26 | #include "llvm/IR/InstVisitor.h" |
27 | #include "llvm/IR/Instruction.h" |
28 | #include "llvm/IR/IntrinsicInst.h" |
29 | #include "llvm/IR/Intrinsics.h" |
30 | #include "llvm/IR/IntrinsicsAArch64.h" |
31 | #include "llvm/IR/IntrinsicsARM.h" |
32 | #include "llvm/IR/IntrinsicsNVPTX.h" |
33 | #include "llvm/IR/IntrinsicsRISCV.h" |
34 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
35 | #include "llvm/IR/IntrinsicsX86.h" |
36 | #include "llvm/IR/LLVMContext.h" |
37 | #include "llvm/IR/Metadata.h" |
38 | #include "llvm/IR/Module.h" |
39 | #include "llvm/IR/Verifier.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include "llvm/Support/Regex.h" |
43 | #include "llvm/TargetParser/Triple.h" |
44 | #include <cstring> |
45 | |
46 | using namespace llvm; |
47 | |
48 | static cl::opt<bool> |
49 | DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info" , |
50 | cl::desc("Disable autoupgrade of debug info" )); |
51 | |
52 | static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old" ); } |
53 | |
54 | // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have |
55 | // changed their type from v4f32 to v2i64. |
56 | static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, |
57 | Function *&NewFn) { |
58 | // Check whether this is an old version of the function, which received |
59 | // v4f32 arguments. |
60 | Type *Arg0Type = F->getFunctionType()->getParamType(i: 0); |
61 | if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4)) |
62 | return false; |
63 | |
64 | // Yes, it's old, replace it with new version. |
65 | rename(GV: F); |
66 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
67 | return true; |
68 | } |
69 | |
70 | // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask |
71 | // arguments have changed their type from i32 to i8. |
72 | static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, |
73 | Function *&NewFn) { |
74 | // Check that the last argument is an i32. |
75 | Type *LastArgType = F->getFunctionType()->getParamType( |
76 | i: F->getFunctionType()->getNumParams() - 1); |
77 | if (!LastArgType->isIntegerTy(Bitwidth: 32)) |
78 | return false; |
79 | |
80 | // Move this function aside and map down. |
81 | rename(GV: F); |
82 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
83 | return true; |
84 | } |
85 | |
86 | // Upgrade the declaration of fp compare intrinsics that change return type |
87 | // from scalar to vXi1 mask. |
88 | static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, |
89 | Function *&NewFn) { |
90 | // Check if the return type is a vector. |
91 | if (F->getReturnType()->isVectorTy()) |
92 | return false; |
93 | |
94 | rename(GV: F); |
95 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
96 | return true; |
97 | } |
98 | |
99 | static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, |
100 | Function *&NewFn) { |
101 | if (F->getReturnType()->getScalarType()->isBFloatTy()) |
102 | return false; |
103 | |
104 | rename(GV: F); |
105 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
106 | return true; |
107 | } |
108 | |
109 | static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, |
110 | Function *&NewFn) { |
111 | if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy()) |
112 | return false; |
113 | |
114 | rename(GV: F); |
115 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
116 | return true; |
117 | } |
118 | |
119 | static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) { |
120 | // All of the intrinsics matches below should be marked with which llvm |
121 | // version started autoupgrading them. At some point in the future we would |
122 | // like to use this information to remove upgrade code for some older |
123 | // intrinsics. It is currently undecided how we will determine that future |
124 | // point. |
125 | if (Name.consume_front(Prefix: "avx." )) |
126 | return (Name.starts_with(Prefix: "blend.p" ) || // Added in 3.7 |
127 | Name == "cvt.ps2.pd.256" || // Added in 3.9 |
128 | Name == "cvtdq2.pd.256" || // Added in 3.9 |
129 | Name == "cvtdq2.ps.256" || // Added in 7.0 |
130 | Name.starts_with(Prefix: "movnt." ) || // Added in 3.2 |
131 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
132 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
133 | Name.starts_with(Prefix: "vbroadcast.s" ) || // Added in 3.5 |
134 | Name.starts_with(Prefix: "vbroadcastf128" ) || // Added in 4.0 |
135 | Name.starts_with(Prefix: "vextractf128." ) || // Added in 3.7 |
136 | Name.starts_with(Prefix: "vinsertf128." ) || // Added in 3.7 |
137 | Name.starts_with(Prefix: "vperm2f128." ) || // Added in 6.0 |
138 | Name.starts_with(Prefix: "vpermil." )); // Added in 3.1 |
139 | |
140 | if (Name.consume_front(Prefix: "avx2." )) |
141 | return (Name == "movntdqa" || // Added in 5.0 |
142 | Name.starts_with(Prefix: "pabs." ) || // Added in 6.0 |
143 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
144 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
145 | Name.starts_with(Prefix: "pblendd." ) || // Added in 3.7 |
146 | Name == "pblendw" || // Added in 3.7 |
147 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 3.8 |
148 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.1 |
149 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.1 |
150 | Name.starts_with(Prefix: "pmax" ) || // Added in 3.9 |
151 | Name.starts_with(Prefix: "pmin" ) || // Added in 3.9 |
152 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 3.9 |
153 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 3.9 |
154 | Name == "pmul.dq" || // Added in 7.0 |
155 | Name == "pmulu.dq" || // Added in 7.0 |
156 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.7 |
157 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.7 |
158 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
159 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
160 | Name.starts_with(Prefix: "vbroadcast" ) || // Added in 3.8 |
161 | Name == "vbroadcasti128" || // Added in 3.7 |
162 | Name == "vextracti128" || // Added in 3.7 |
163 | Name == "vinserti128" || // Added in 3.7 |
164 | Name == "vperm2i128" ); // Added in 6.0 |
165 | |
166 | if (Name.consume_front(Prefix: "avx512." )) { |
167 | if (Name.consume_front(Prefix: "mask." )) |
168 | // 'avx512.mask.*' |
169 | return (Name.starts_with(Prefix: "add.p" ) || // Added in 7.0. 128/256 in 4.0 |
170 | Name.starts_with(Prefix: "and." ) || // Added in 3.9 |
171 | Name.starts_with(Prefix: "andn." ) || // Added in 3.9 |
172 | Name.starts_with(Prefix: "broadcast.s" ) || // Added in 3.9 |
173 | Name.starts_with(Prefix: "broadcastf32x4." ) || // Added in 6.0 |
174 | Name.starts_with(Prefix: "broadcastf32x8." ) || // Added in 6.0 |
175 | Name.starts_with(Prefix: "broadcastf64x2." ) || // Added in 6.0 |
176 | Name.starts_with(Prefix: "broadcastf64x4." ) || // Added in 6.0 |
177 | Name.starts_with(Prefix: "broadcasti32x4." ) || // Added in 6.0 |
178 | Name.starts_with(Prefix: "broadcasti32x8." ) || // Added in 6.0 |
179 | Name.starts_with(Prefix: "broadcasti64x2." ) || // Added in 6.0 |
180 | Name.starts_with(Prefix: "broadcasti64x4." ) || // Added in 6.0 |
181 | Name.starts_with(Prefix: "cmp.b" ) || // Added in 5.0 |
182 | Name.starts_with(Prefix: "cmp.d" ) || // Added in 5.0 |
183 | Name.starts_with(Prefix: "cmp.q" ) || // Added in 5.0 |
184 | Name.starts_with(Prefix: "cmp.w" ) || // Added in 5.0 |
185 | Name.starts_with(Prefix: "compress.b" ) || // Added in 9.0 |
186 | Name.starts_with(Prefix: "compress.d" ) || // Added in 9.0 |
187 | Name.starts_with(Prefix: "compress.p" ) || // Added in 9.0 |
188 | Name.starts_with(Prefix: "compress.q" ) || // Added in 9.0 |
189 | Name.starts_with(Prefix: "compress.store." ) || // Added in 7.0 |
190 | Name.starts_with(Prefix: "compress.w" ) || // Added in 9.0 |
191 | Name.starts_with(Prefix: "conflict." ) || // Added in 9.0 |
192 | Name.starts_with(Prefix: "cvtdq2pd." ) || // Added in 4.0 |
193 | Name.starts_with(Prefix: "cvtdq2ps." ) || // Added in 7.0 updated 9.0 |
194 | Name == "cvtpd2dq.256" || // Added in 7.0 |
195 | Name == "cvtpd2ps.256" || // Added in 7.0 |
196 | Name == "cvtps2pd.128" || // Added in 7.0 |
197 | Name == "cvtps2pd.256" || // Added in 7.0 |
198 | Name.starts_with(Prefix: "cvtqq2pd." ) || // Added in 7.0 updated 9.0 |
199 | Name == "cvtqq2ps.256" || // Added in 9.0 |
200 | Name == "cvtqq2ps.512" || // Added in 9.0 |
201 | Name == "cvttpd2dq.256" || // Added in 7.0 |
202 | Name == "cvttps2dq.128" || // Added in 7.0 |
203 | Name == "cvttps2dq.256" || // Added in 7.0 |
204 | Name.starts_with(Prefix: "cvtudq2pd." ) || // Added in 4.0 |
205 | Name.starts_with(Prefix: "cvtudq2ps." ) || // Added in 7.0 updated 9.0 |
206 | Name.starts_with(Prefix: "cvtuqq2pd." ) || // Added in 7.0 updated 9.0 |
207 | Name == "cvtuqq2ps.256" || // Added in 9.0 |
208 | Name == "cvtuqq2ps.512" || // Added in 9.0 |
209 | Name.starts_with(Prefix: "dbpsadbw." ) || // Added in 7.0 |
210 | Name.starts_with(Prefix: "div.p" ) || // Added in 7.0. 128/256 in 4.0 |
211 | Name.starts_with(Prefix: "expand.b" ) || // Added in 9.0 |
212 | Name.starts_with(Prefix: "expand.d" ) || // Added in 9.0 |
213 | Name.starts_with(Prefix: "expand.load." ) || // Added in 7.0 |
214 | Name.starts_with(Prefix: "expand.p" ) || // Added in 9.0 |
215 | Name.starts_with(Prefix: "expand.q" ) || // Added in 9.0 |
216 | Name.starts_with(Prefix: "expand.w" ) || // Added in 9.0 |
217 | Name.starts_with(Prefix: "fpclass.p" ) || // Added in 7.0 |
218 | Name.starts_with(Prefix: "insert" ) || // Added in 4.0 |
219 | Name.starts_with(Prefix: "load." ) || // Added in 3.9 |
220 | Name.starts_with(Prefix: "loadu." ) || // Added in 3.9 |
221 | Name.starts_with(Prefix: "lzcnt." ) || // Added in 5.0 |
222 | Name.starts_with(Prefix: "max.p" ) || // Added in 7.0. 128/256 in 5.0 |
223 | Name.starts_with(Prefix: "min.p" ) || // Added in 7.0. 128/256 in 5.0 |
224 | Name.starts_with(Prefix: "movddup" ) || // Added in 3.9 |
225 | Name.starts_with(Prefix: "move.s" ) || // Added in 4.0 |
226 | Name.starts_with(Prefix: "movshdup" ) || // Added in 3.9 |
227 | Name.starts_with(Prefix: "movsldup" ) || // Added in 3.9 |
228 | Name.starts_with(Prefix: "mul.p" ) || // Added in 7.0. 128/256 in 4.0 |
229 | Name.starts_with(Prefix: "or." ) || // Added in 3.9 |
230 | Name.starts_with(Prefix: "pabs." ) || // Added in 6.0 |
231 | Name.starts_with(Prefix: "packssdw." ) || // Added in 5.0 |
232 | Name.starts_with(Prefix: "packsswb." ) || // Added in 5.0 |
233 | Name.starts_with(Prefix: "packusdw." ) || // Added in 5.0 |
234 | Name.starts_with(Prefix: "packuswb." ) || // Added in 5.0 |
235 | Name.starts_with(Prefix: "padd." ) || // Added in 4.0 |
236 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
237 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
238 | Name.starts_with(Prefix: "palignr." ) || // Added in 3.9 |
239 | Name.starts_with(Prefix: "pand." ) || // Added in 3.9 |
240 | Name.starts_with(Prefix: "pandn." ) || // Added in 3.9 |
241 | Name.starts_with(Prefix: "pavg" ) || // Added in 6.0 |
242 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 6.0 |
243 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.9 |
244 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.9 |
245 | Name.starts_with(Prefix: "perm.df." ) || // Added in 3.9 |
246 | Name.starts_with(Prefix: "perm.di." ) || // Added in 3.9 |
247 | Name.starts_with(Prefix: "permvar." ) || // Added in 7.0 |
248 | Name.starts_with(Prefix: "pmaddubs.w." ) || // Added in 7.0 |
249 | Name.starts_with(Prefix: "pmaddw.d." ) || // Added in 7.0 |
250 | Name.starts_with(Prefix: "pmax" ) || // Added in 4.0 |
251 | Name.starts_with(Prefix: "pmin" ) || // Added in 4.0 |
252 | Name == "pmov.qd.256" || // Added in 9.0 |
253 | Name == "pmov.qd.512" || // Added in 9.0 |
254 | Name == "pmov.wb.256" || // Added in 9.0 |
255 | Name == "pmov.wb.512" || // Added in 9.0 |
256 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 4.0 |
257 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 4.0 |
258 | Name.starts_with(Prefix: "pmul.dq." ) || // Added in 4.0 |
259 | Name.starts_with(Prefix: "pmul.hr.sw." ) || // Added in 7.0 |
260 | Name.starts_with(Prefix: "pmulh.w." ) || // Added in 7.0 |
261 | Name.starts_with(Prefix: "pmulhu.w." ) || // Added in 7.0 |
262 | Name.starts_with(Prefix: "pmull." ) || // Added in 4.0 |
263 | Name.starts_with(Prefix: "pmultishift.qb." ) || // Added in 8.0 |
264 | Name.starts_with(Prefix: "pmulu.dq." ) || // Added in 4.0 |
265 | Name.starts_with(Prefix: "por." ) || // Added in 3.9 |
266 | Name.starts_with(Prefix: "prol." ) || // Added in 8.0 |
267 | Name.starts_with(Prefix: "prolv." ) || // Added in 8.0 |
268 | Name.starts_with(Prefix: "pror." ) || // Added in 8.0 |
269 | Name.starts_with(Prefix: "prorv." ) || // Added in 8.0 |
270 | Name.starts_with(Prefix: "pshuf.b." ) || // Added in 4.0 |
271 | Name.starts_with(Prefix: "pshuf.d." ) || // Added in 3.9 |
272 | Name.starts_with(Prefix: "pshufh.w." ) || // Added in 3.9 |
273 | Name.starts_with(Prefix: "pshufl.w." ) || // Added in 3.9 |
274 | Name.starts_with(Prefix: "psll.d" ) || // Added in 4.0 |
275 | Name.starts_with(Prefix: "psll.q" ) || // Added in 4.0 |
276 | Name.starts_with(Prefix: "psll.w" ) || // Added in 4.0 |
277 | Name.starts_with(Prefix: "pslli" ) || // Added in 4.0 |
278 | Name.starts_with(Prefix: "psllv" ) || // Added in 4.0 |
279 | Name.starts_with(Prefix: "psra.d" ) || // Added in 4.0 |
280 | Name.starts_with(Prefix: "psra.q" ) || // Added in 4.0 |
281 | Name.starts_with(Prefix: "psra.w" ) || // Added in 4.0 |
282 | Name.starts_with(Prefix: "psrai" ) || // Added in 4.0 |
283 | Name.starts_with(Prefix: "psrav" ) || // Added in 4.0 |
284 | Name.starts_with(Prefix: "psrl.d" ) || // Added in 4.0 |
285 | Name.starts_with(Prefix: "psrl.q" ) || // Added in 4.0 |
286 | Name.starts_with(Prefix: "psrl.w" ) || // Added in 4.0 |
287 | Name.starts_with(Prefix: "psrli" ) || // Added in 4.0 |
288 | Name.starts_with(Prefix: "psrlv" ) || // Added in 4.0 |
289 | Name.starts_with(Prefix: "psub." ) || // Added in 4.0 |
290 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
291 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
292 | Name.starts_with(Prefix: "pternlog." ) || // Added in 7.0 |
293 | Name.starts_with(Prefix: "punpckh" ) || // Added in 3.9 |
294 | Name.starts_with(Prefix: "punpckl" ) || // Added in 3.9 |
295 | Name.starts_with(Prefix: "pxor." ) || // Added in 3.9 |
296 | Name.starts_with(Prefix: "shuf.f" ) || // Added in 6.0 |
297 | Name.starts_with(Prefix: "shuf.i" ) || // Added in 6.0 |
298 | Name.starts_with(Prefix: "shuf.p" ) || // Added in 4.0 |
299 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
300 | Name.starts_with(Prefix: "store.b." ) || // Added in 3.9 |
301 | Name.starts_with(Prefix: "store.d." ) || // Added in 3.9 |
302 | Name.starts_with(Prefix: "store.p" ) || // Added in 3.9 |
303 | Name.starts_with(Prefix: "store.q." ) || // Added in 3.9 |
304 | Name.starts_with(Prefix: "store.w." ) || // Added in 3.9 |
305 | Name == "store.ss" || // Added in 7.0 |
306 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
307 | Name.starts_with(Prefix: "sub.p" ) || // Added in 7.0. 128/256 in 4.0 |
308 | Name.starts_with(Prefix: "ucmp." ) || // Added in 5.0 |
309 | Name.starts_with(Prefix: "unpckh." ) || // Added in 3.9 |
310 | Name.starts_with(Prefix: "unpckl." ) || // Added in 3.9 |
311 | Name.starts_with(Prefix: "valign." ) || // Added in 4.0 |
312 | Name == "vcvtph2ps.128" || // Added in 11.0 |
313 | Name == "vcvtph2ps.256" || // Added in 11.0 |
314 | Name.starts_with(Prefix: "vextract" ) || // Added in 4.0 |
315 | Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
316 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
317 | Name.starts_with(Prefix: "vfnmadd." ) || // Added in 7.0 |
318 | Name.starts_with(Prefix: "vfnmsub." ) || // Added in 7.0 |
319 | Name.starts_with(Prefix: "vpdpbusd." ) || // Added in 7.0 |
320 | Name.starts_with(Prefix: "vpdpbusds." ) || // Added in 7.0 |
321 | Name.starts_with(Prefix: "vpdpwssd." ) || // Added in 7.0 |
322 | Name.starts_with(Prefix: "vpdpwssds." ) || // Added in 7.0 |
323 | Name.starts_with(Prefix: "vpermi2var." ) || // Added in 7.0 |
324 | Name.starts_with(Prefix: "vpermil.p" ) || // Added in 3.9 |
325 | Name.starts_with(Prefix: "vpermilvar." ) || // Added in 4.0 |
326 | Name.starts_with(Prefix: "vpermt2var." ) || // Added in 7.0 |
327 | Name.starts_with(Prefix: "vpmadd52" ) || // Added in 7.0 |
328 | Name.starts_with(Prefix: "vpshld." ) || // Added in 7.0 |
329 | Name.starts_with(Prefix: "vpshldv." ) || // Added in 8.0 |
330 | Name.starts_with(Prefix: "vpshrd." ) || // Added in 7.0 |
331 | Name.starts_with(Prefix: "vpshrdv." ) || // Added in 8.0 |
332 | Name.starts_with(Prefix: "vpshufbitqmb." ) || // Added in 8.0 |
333 | Name.starts_with(Prefix: "xor." )); // Added in 3.9 |
334 | |
335 | if (Name.consume_front(Prefix: "mask3." )) |
336 | // 'avx512.mask3.*' |
337 | return (Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
338 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
339 | Name.starts_with(Prefix: "vfmsub." ) || // Added in 7.0 |
340 | Name.starts_with(Prefix: "vfmsubadd." ) || // Added in 7.0 |
341 | Name.starts_with(Prefix: "vfnmsub." )); // Added in 7.0 |
342 | |
343 | if (Name.consume_front(Prefix: "maskz." )) |
344 | // 'avx512.maskz.*' |
345 | return (Name.starts_with(Prefix: "pternlog." ) || // Added in 7.0 |
346 | Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
347 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
348 | Name.starts_with(Prefix: "vpdpbusd." ) || // Added in 7.0 |
349 | Name.starts_with(Prefix: "vpdpbusds." ) || // Added in 7.0 |
350 | Name.starts_with(Prefix: "vpdpwssd." ) || // Added in 7.0 |
351 | Name.starts_with(Prefix: "vpdpwssds." ) || // Added in 7.0 |
352 | Name.starts_with(Prefix: "vpermt2var." ) || // Added in 7.0 |
353 | Name.starts_with(Prefix: "vpmadd52" ) || // Added in 7.0 |
354 | Name.starts_with(Prefix: "vpshldv." ) || // Added in 8.0 |
355 | Name.starts_with(Prefix: "vpshrdv." )); // Added in 8.0 |
356 | |
357 | // 'avx512.*' |
358 | return (Name == "movntdqa" || // Added in 5.0 |
359 | Name == "pmul.dq.512" || // Added in 7.0 |
360 | Name == "pmulu.dq.512" || // Added in 7.0 |
361 | Name.starts_with(Prefix: "broadcastm" ) || // Added in 6.0 |
362 | Name.starts_with(Prefix: "cmp.p" ) || // Added in 12.0 |
363 | Name.starts_with(Prefix: "cvtb2mask." ) || // Added in 7.0 |
364 | Name.starts_with(Prefix: "cvtd2mask." ) || // Added in 7.0 |
365 | Name.starts_with(Prefix: "cvtmask2" ) || // Added in 5.0 |
366 | Name.starts_with(Prefix: "cvtq2mask." ) || // Added in 7.0 |
367 | Name == "cvtusi2sd" || // Added in 7.0 |
368 | Name.starts_with(Prefix: "cvtw2mask." ) || // Added in 7.0 |
369 | Name == "kand.w" || // Added in 7.0 |
370 | Name == "kandn.w" || // Added in 7.0 |
371 | Name == "knot.w" || // Added in 7.0 |
372 | Name == "kor.w" || // Added in 7.0 |
373 | Name == "kortestc.w" || // Added in 7.0 |
374 | Name == "kortestz.w" || // Added in 7.0 |
375 | Name.starts_with(Prefix: "kunpck" ) || // added in 6.0 |
376 | Name == "kxnor.w" || // Added in 7.0 |
377 | Name == "kxor.w" || // Added in 7.0 |
378 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
379 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 3.9 |
380 | Name.starts_with(Prefix: "prol" ) || // Added in 8.0 |
381 | Name.starts_with(Prefix: "pror" ) || // Added in 8.0 |
382 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.9 |
383 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.9 |
384 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
385 | Name.starts_with(Prefix: "ptestm" ) || // Added in 6.0 |
386 | Name.starts_with(Prefix: "ptestnm" ) || // Added in 6.0 |
387 | Name.starts_with(Prefix: "storent." ) || // Added in 3.9 |
388 | Name.starts_with(Prefix: "vbroadcast.s" ) || // Added in 7.0 |
389 | Name.starts_with(Prefix: "vpshld." ) || // Added in 8.0 |
390 | Name.starts_with(Prefix: "vpshrd." )); // Added in 8.0 |
391 | } |
392 | |
393 | if (Name.consume_front(Prefix: "fma." )) |
394 | return (Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
395 | Name.starts_with(Prefix: "vfmsub." ) || // Added in 7.0 |
396 | Name.starts_with(Prefix: "vfmsubadd." ) || // Added in 7.0 |
397 | Name.starts_with(Prefix: "vfnmadd." ) || // Added in 7.0 |
398 | Name.starts_with(Prefix: "vfnmsub." )); // Added in 7.0 |
399 | |
400 | if (Name.consume_front(Prefix: "fma4." )) |
401 | return Name.starts_with(Prefix: "vfmadd.s" ); // Added in 7.0 |
402 | |
403 | if (Name.consume_front(Prefix: "sse." )) |
404 | return (Name == "add.ss" || // Added in 4.0 |
405 | Name == "cvtsi2ss" || // Added in 7.0 |
406 | Name == "cvtsi642ss" || // Added in 7.0 |
407 | Name == "div.ss" || // Added in 4.0 |
408 | Name == "mul.ss" || // Added in 4.0 |
409 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
410 | Name == "sqrt.ss" || // Added in 7.0 |
411 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
412 | Name == "sub.ss" ); // Added in 4.0 |
413 | |
414 | if (Name.consume_front(Prefix: "sse2." )) |
415 | return (Name == "add.sd" || // Added in 4.0 |
416 | Name == "cvtdq2pd" || // Added in 3.9 |
417 | Name == "cvtdq2ps" || // Added in 7.0 |
418 | Name == "cvtps2pd" || // Added in 3.9 |
419 | Name == "cvtsi2sd" || // Added in 7.0 |
420 | Name == "cvtsi642sd" || // Added in 7.0 |
421 | Name == "cvtss2sd" || // Added in 7.0 |
422 | Name == "div.sd" || // Added in 4.0 |
423 | Name == "mul.sd" || // Added in 4.0 |
424 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
425 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
426 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.1 |
427 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.1 |
428 | Name == "pmaxs.w" || // Added in 3.9 |
429 | Name == "pmaxu.b" || // Added in 3.9 |
430 | Name == "pmins.w" || // Added in 3.9 |
431 | Name == "pminu.b" || // Added in 3.9 |
432 | Name == "pmulu.dq" || // Added in 7.0 |
433 | Name.starts_with(Prefix: "pshuf" ) || // Added in 3.9 |
434 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.7 |
435 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.7 |
436 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
437 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
438 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
439 | Name == "sqrt.sd" || // Added in 7.0 |
440 | Name == "storel.dq" || // Added in 3.9 |
441 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
442 | Name == "sub.sd" ); // Added in 4.0 |
443 | |
444 | if (Name.consume_front(Prefix: "sse41." )) |
445 | return (Name.starts_with(Prefix: "blendp" ) || // Added in 3.7 |
446 | Name == "movntdqa" || // Added in 5.0 |
447 | Name == "pblendw" || // Added in 3.7 |
448 | Name == "pmaxsb" || // Added in 3.9 |
449 | Name == "pmaxsd" || // Added in 3.9 |
450 | Name == "pmaxud" || // Added in 3.9 |
451 | Name == "pmaxuw" || // Added in 3.9 |
452 | Name == "pminsb" || // Added in 3.9 |
453 | Name == "pminsd" || // Added in 3.9 |
454 | Name == "pminud" || // Added in 3.9 |
455 | Name == "pminuw" || // Added in 3.9 |
456 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 3.8 |
457 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 3.9 |
458 | Name == "pmuldq" ); // Added in 7.0 |
459 | |
460 | if (Name.consume_front(Prefix: "sse42." )) |
461 | return Name == "crc32.64.8" ; // Added in 3.4 |
462 | |
463 | if (Name.consume_front(Prefix: "sse4a." )) |
464 | return Name.starts_with(Prefix: "movnt." ); // Added in 3.9 |
465 | |
466 | if (Name.consume_front(Prefix: "ssse3." )) |
467 | return (Name == "pabs.b.128" || // Added in 6.0 |
468 | Name == "pabs.d.128" || // Added in 6.0 |
469 | Name == "pabs.w.128" ); // Added in 6.0 |
470 | |
471 | if (Name.consume_front(Prefix: "xop." )) |
472 | return (Name == "vpcmov" || // Added in 3.8 |
473 | Name == "vpcmov.256" || // Added in 5.0 |
474 | Name.starts_with(Prefix: "vpcom" ) || // Added in 3.2, Updated in 9.0 |
475 | Name.starts_with(Prefix: "vprot" )); // Added in 8.0 |
476 | |
477 | return (Name == "addcarry.u32" || // Added in 8.0 |
478 | Name == "addcarry.u64" || // Added in 8.0 |
479 | Name == "addcarryx.u32" || // Added in 8.0 |
480 | Name == "addcarryx.u64" || // Added in 8.0 |
481 | Name == "subborrow.u32" || // Added in 8.0 |
482 | Name == "subborrow.u64" || // Added in 8.0 |
483 | Name.starts_with(Prefix: "vcvtph2ps." )); // Added in 11.0 |
484 | } |
485 | |
486 | static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, |
487 | Function *&NewFn) { |
488 | // Only handle intrinsics that start with "x86.". |
489 | if (!Name.consume_front(Prefix: "x86." )) |
490 | return false; |
491 | |
492 | if (shouldUpgradeX86Intrinsic(F, Name)) { |
493 | NewFn = nullptr; |
494 | return true; |
495 | } |
496 | |
497 | if (Name == "rdtscp" ) { // Added in 8.0 |
498 | // If this intrinsic has 0 operands, it's the new version. |
499 | if (F->getFunctionType()->getNumParams() == 0) |
500 | return false; |
501 | |
502 | rename(GV: F); |
503 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
504 | id: Intrinsic::x86_rdtscp); |
505 | return true; |
506 | } |
507 | |
508 | Intrinsic::ID ID; |
509 | |
510 | // SSE4.1 ptest functions may have an old signature. |
511 | if (Name.consume_front(Prefix: "sse41.ptest" )) { // Added in 3.2 |
512 | ID = StringSwitch<Intrinsic::ID>(Name) |
513 | .Case(S: "c" , Value: Intrinsic::x86_sse41_ptestc) |
514 | .Case(S: "z" , Value: Intrinsic::x86_sse41_ptestz) |
515 | .Case(S: "nzc" , Value: Intrinsic::x86_sse41_ptestnzc) |
516 | .Default(Value: Intrinsic::not_intrinsic); |
517 | if (ID != Intrinsic::not_intrinsic) |
518 | return upgradePTESTIntrinsic(F, IID: ID, NewFn); |
519 | |
520 | return false; |
521 | } |
522 | |
523 | // Several blend and other instructions with masks used the wrong number of |
524 | // bits. |
525 | |
526 | // Added in 3.6 |
527 | ID = StringSwitch<Intrinsic::ID>(Name) |
528 | .Case(S: "sse41.insertps" , Value: Intrinsic::x86_sse41_insertps) |
529 | .Case(S: "sse41.dppd" , Value: Intrinsic::x86_sse41_dppd) |
530 | .Case(S: "sse41.dpps" , Value: Intrinsic::x86_sse41_dpps) |
531 | .Case(S: "sse41.mpsadbw" , Value: Intrinsic::x86_sse41_mpsadbw) |
532 | .Case(S: "avx.dp.ps.256" , Value: Intrinsic::x86_avx_dp_ps_256) |
533 | .Case(S: "avx2.mpsadbw" , Value: Intrinsic::x86_avx2_mpsadbw) |
534 | .Default(Value: Intrinsic::not_intrinsic); |
535 | if (ID != Intrinsic::not_intrinsic) |
536 | return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn); |
537 | |
538 | if (Name.consume_front(Prefix: "avx512.mask.cmp." )) { |
539 | // Added in 7.0 |
540 | ID = StringSwitch<Intrinsic::ID>(Name) |
541 | .Case(S: "pd.128" , Value: Intrinsic::x86_avx512_mask_cmp_pd_128) |
542 | .Case(S: "pd.256" , Value: Intrinsic::x86_avx512_mask_cmp_pd_256) |
543 | .Case(S: "pd.512" , Value: Intrinsic::x86_avx512_mask_cmp_pd_512) |
544 | .Case(S: "ps.128" , Value: Intrinsic::x86_avx512_mask_cmp_ps_128) |
545 | .Case(S: "ps.256" , Value: Intrinsic::x86_avx512_mask_cmp_ps_256) |
546 | .Case(S: "ps.512" , Value: Intrinsic::x86_avx512_mask_cmp_ps_512) |
547 | .Default(Value: Intrinsic::not_intrinsic); |
548 | if (ID != Intrinsic::not_intrinsic) |
549 | return upgradeX86MaskedFPCompare(F, IID: ID, NewFn); |
550 | return false; // No other 'x86.avx523.mask.cmp.*'. |
551 | } |
552 | |
553 | if (Name.consume_front(Prefix: "avx512bf16." )) { |
554 | // Added in 9.0 |
555 | ID = StringSwitch<Intrinsic::ID>(Name) |
556 | .Case(S: "cvtne2ps2bf16.128" , |
557 | Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128) |
558 | .Case(S: "cvtne2ps2bf16.256" , |
559 | Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256) |
560 | .Case(S: "cvtne2ps2bf16.512" , |
561 | Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512) |
562 | .Case(S: "mask.cvtneps2bf16.128" , |
563 | Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) |
564 | .Case(S: "cvtneps2bf16.256" , |
565 | Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256) |
566 | .Case(S: "cvtneps2bf16.512" , |
567 | Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512) |
568 | .Default(Value: Intrinsic::not_intrinsic); |
569 | if (ID != Intrinsic::not_intrinsic) |
570 | return upgradeX86BF16Intrinsic(F, IID: ID, NewFn); |
571 | |
572 | // Added in 9.0 |
573 | ID = StringSwitch<Intrinsic::ID>(Name) |
574 | .Case(S: "dpbf16ps.128" , Value: Intrinsic::x86_avx512bf16_dpbf16ps_128) |
575 | .Case(S: "dpbf16ps.256" , Value: Intrinsic::x86_avx512bf16_dpbf16ps_256) |
576 | .Case(S: "dpbf16ps.512" , Value: Intrinsic::x86_avx512bf16_dpbf16ps_512) |
577 | .Default(Value: Intrinsic::not_intrinsic); |
578 | if (ID != Intrinsic::not_intrinsic) |
579 | return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn); |
580 | return false; // No other 'x86.avx512bf16.*'. |
581 | } |
582 | |
583 | if (Name.consume_front(Prefix: "xop." )) { |
584 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
585 | if (Name.starts_with(Prefix: "vpermil2" )) { // Added in 3.9 |
586 | // Upgrade any XOP PERMIL2 index operand still using a float/double |
587 | // vector. |
588 | auto Idx = F->getFunctionType()->getParamType(i: 2); |
589 | if (Idx->isFPOrFPVectorTy()) { |
590 | unsigned IdxSize = Idx->getPrimitiveSizeInBits(); |
591 | unsigned EltSize = Idx->getScalarSizeInBits(); |
592 | if (EltSize == 64 && IdxSize == 128) |
593 | ID = Intrinsic::x86_xop_vpermil2pd; |
594 | else if (EltSize == 32 && IdxSize == 128) |
595 | ID = Intrinsic::x86_xop_vpermil2ps; |
596 | else if (EltSize == 64 && IdxSize == 256) |
597 | ID = Intrinsic::x86_xop_vpermil2pd_256; |
598 | else |
599 | ID = Intrinsic::x86_xop_vpermil2ps_256; |
600 | } |
601 | } else if (F->arg_size() == 2) |
602 | // frcz.ss/sd may need to have an argument dropped. Added in 3.2 |
603 | ID = StringSwitch<Intrinsic::ID>(Name) |
604 | .Case(S: "vfrcz.ss" , Value: Intrinsic::x86_xop_vfrcz_ss) |
605 | .Case(S: "vfrcz.sd" , Value: Intrinsic::x86_xop_vfrcz_sd) |
606 | .Default(Value: Intrinsic::not_intrinsic); |
607 | |
608 | if (ID != Intrinsic::not_intrinsic) { |
609 | rename(GV: F); |
610 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
611 | return true; |
612 | } |
613 | return false; // No other 'x86.xop.*' |
614 | } |
615 | |
616 | if (Name == "seh.recoverfp" ) { |
617 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::eh_recoverfp); |
618 | return true; |
619 | } |
620 | |
621 | return false; |
622 | } |
623 | |
624 | // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so. |
625 | // IsArm: 'arm.*', !IsArm: 'aarch64.*'. |
626 | static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, |
627 | StringRef Name, |
628 | Function *&NewFn) { |
629 | if (Name.starts_with(Prefix: "rbit" )) { |
630 | // '(arm|aarch64).rbit'. |
631 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::bitreverse, |
632 | Tys: F->arg_begin()->getType()); |
633 | return true; |
634 | } |
635 | |
636 | if (Name == "thread.pointer" ) { |
637 | // '(arm|aarch64).thread.pointer'. |
638 | NewFn = |
639 | Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::thread_pointer); |
640 | return true; |
641 | } |
642 | |
643 | bool Neon = Name.consume_front(Prefix: "neon." ); |
644 | if (Neon) { |
645 | // '(arm|aarch64).neon.*'. |
646 | // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and |
647 | // v16i8 respectively. |
648 | if (Name.consume_front(Prefix: "bfdot." )) { |
649 | // (arm|aarch64).neon.bfdot.*'. |
650 | Intrinsic::ID ID = |
651 | StringSwitch<Intrinsic::ID>(Name) |
652 | .Cases(S0: "v2f32.v8i8" , S1: "v4f32.v16i8" , |
653 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot |
654 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot) |
655 | .Default(Value: Intrinsic::not_intrinsic); |
656 | if (ID != Intrinsic::not_intrinsic) { |
657 | size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits(); |
658 | assert((OperandWidth == 64 || OperandWidth == 128) && |
659 | "Unexpected operand width" ); |
660 | LLVMContext &Ctx = F->getParent()->getContext(); |
661 | std::array<Type *, 2> Tys{ |
662 | ._M_elems: {F->getReturnType(), |
663 | FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}}; |
664 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
665 | return true; |
666 | } |
667 | return false; // No other '(arm|aarch64).neon.bfdot.*'. |
668 | } |
669 | |
670 | // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic |
671 | // anymore and accept v8bf16 instead of v16i8. |
672 | if (Name.consume_front(Prefix: "bfm" )) { |
673 | // (arm|aarch64).neon.bfm*'. |
674 | if (Name.consume_back(Suffix: ".v4f32.v16i8" )) { |
675 | // (arm|aarch64).neon.bfm*.v4f32.v16i8'. |
676 | Intrinsic::ID ID = |
677 | StringSwitch<Intrinsic::ID>(Name) |
678 | .Case(S: "mla" , |
679 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla |
680 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla) |
681 | .Case(S: "lalb" , |
682 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb |
683 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb) |
684 | .Case(S: "lalt" , |
685 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt |
686 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt) |
687 | .Default(Value: Intrinsic::not_intrinsic); |
688 | if (ID != Intrinsic::not_intrinsic) { |
689 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
690 | return true; |
691 | } |
692 | return false; // No other '(arm|aarch64).neon.bfm*.v16i8'. |
693 | } |
694 | return false; // No other '(arm|aarch64).neon.bfm*. |
695 | } |
696 | // Continue on to Aarch64 Neon or Arm Neon. |
697 | } |
698 | // Continue on to Arm or Aarch64. |
699 | |
700 | if (IsArm) { |
701 | // 'arm.*'. |
702 | if (Neon) { |
703 | // 'arm.neon.*'. |
704 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
705 | .StartsWith(S: "vclz." , Value: Intrinsic::ctlz) |
706 | .StartsWith(S: "vcnt." , Value: Intrinsic::ctpop) |
707 | .StartsWith(S: "vqadds." , Value: Intrinsic::sadd_sat) |
708 | .StartsWith(S: "vqaddu." , Value: Intrinsic::uadd_sat) |
709 | .StartsWith(S: "vqsubs." , Value: Intrinsic::ssub_sat) |
710 | .StartsWith(S: "vqsubu." , Value: Intrinsic::usub_sat) |
711 | .Default(Value: Intrinsic::not_intrinsic); |
712 | if (ID != Intrinsic::not_intrinsic) { |
713 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, |
714 | Tys: F->arg_begin()->getType()); |
715 | return true; |
716 | } |
717 | |
718 | if (Name.consume_front(Prefix: "vst" )) { |
719 | // 'arm.neon.vst*'. |
720 | static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$" ); |
721 | SmallVector<StringRef, 2> Groups; |
722 | if (vstRegex.match(String: Name, Matches: &Groups)) { |
723 | static const Intrinsic::ID StoreInts[] = { |
724 | Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, |
725 | Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; |
726 | |
727 | static const Intrinsic::ID StoreLaneInts[] = { |
728 | Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, |
729 | Intrinsic::arm_neon_vst4lane}; |
730 | |
731 | auto fArgs = F->getFunctionType()->params(); |
732 | Type *Tys[] = {fArgs[0], fArgs[1]}; |
733 | if (Groups[1].size() == 1) |
734 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
735 | id: StoreInts[fArgs.size() - 3], Tys); |
736 | else |
737 | NewFn = Intrinsic::getDeclaration( |
738 | M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys); |
739 | return true; |
740 | } |
741 | return false; // No other 'arm.neon.vst*'. |
742 | } |
743 | |
744 | return false; // No other 'arm.neon.*'. |
745 | } |
746 | |
747 | if (Name.consume_front(Prefix: "mve." )) { |
748 | // 'arm.mve.*'. |
749 | if (Name == "vctp64" ) { |
750 | if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) { |
751 | // A vctp64 returning a v4i1 is converted to return a v2i1. Rename |
752 | // the function and deal with it below in UpgradeIntrinsicCall. |
753 | rename(GV: F); |
754 | return true; |
755 | } |
756 | return false; // Not 'arm.mve.vctp64'. |
757 | } |
758 | |
759 | // These too are changed to accept a v2i1 instead of the old v4i1. |
760 | if (Name.consume_back(Suffix: ".v4i1" )) { |
761 | // 'arm.mve.*.v4i1'. |
762 | if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32" )) |
763 | // 'arm.mve.*.predicated.v2i64.v4i32.v4i1' |
764 | return Name == "mull.int" || Name == "vqdmull" ; |
765 | |
766 | if (Name.consume_back(Suffix: ".v2i64" )) { |
767 | // 'arm.mve.*.v2i64.v4i1' |
768 | bool IsGather = Name.consume_front(Prefix: "vldr.gather." ); |
769 | if (IsGather || Name.consume_front(Prefix: "vstr.scatter." )) { |
770 | if (Name.consume_front(Prefix: "base." )) { |
771 | // Optional 'wb.' prefix. |
772 | Name.consume_front(Prefix: "wb." ); |
773 | // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)? |
774 | // predicated.v2i64.v2i64.v4i1'. |
775 | return Name == "predicated.v2i64" ; |
776 | } |
777 | |
778 | if (Name.consume_front(Prefix: "offset.predicated." )) |
779 | return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64" ) || |
780 | Name == (IsGather ? "v2i64.p0" : "p0.v2i64" ); |
781 | |
782 | // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'. |
783 | return false; |
784 | } |
785 | |
786 | return false; // No other 'arm.mve.*.v2i64.v4i1'. |
787 | } |
788 | return false; // No other 'arm.mve.*.v4i1'. |
789 | } |
790 | return false; // No other 'arm.mve.*'. |
791 | } |
792 | |
793 | if (Name.consume_front(Prefix: "cde.vcx" )) { |
794 | // 'arm.cde.vcx*'. |
795 | if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1" )) |
796 | // 'arm.cde.vcx*.predicated.v2i64.v4i1'. |
797 | return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" || |
798 | Name == "3q" || Name == "3qa" ; |
799 | |
800 | return false; // No other 'arm.cde.vcx*'. |
801 | } |
802 | } else { |
803 | // 'aarch64.*'. |
804 | if (Neon) { |
805 | // 'aarch64.neon.*'. |
806 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
807 | .StartsWith(S: "frintn" , Value: Intrinsic::roundeven) |
808 | .StartsWith(S: "rbit" , Value: Intrinsic::bitreverse) |
809 | .Default(Value: Intrinsic::not_intrinsic); |
810 | if (ID != Intrinsic::not_intrinsic) { |
811 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, |
812 | Tys: F->arg_begin()->getType()); |
813 | return true; |
814 | } |
815 | |
816 | if (Name.starts_with(Prefix: "addp" )) { |
817 | // 'aarch64.neon.addp*'. |
818 | if (F->arg_size() != 2) |
819 | return false; // Invalid IR. |
820 | VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType()); |
821 | if (Ty && Ty->getElementType()->isFloatingPointTy()) { |
822 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
823 | id: Intrinsic::aarch64_neon_faddp, Tys: Ty); |
824 | return true; |
825 | } |
826 | } |
827 | return false; // No other 'aarch64.neon.*'. |
828 | } |
829 | if (Name.consume_front(Prefix: "sve." )) { |
830 | // 'aarch64.sve.*'. |
831 | if (Name.consume_front(Prefix: "bf" )) { |
832 | if (Name.consume_back(Suffix: ".lane" )) { |
833 | // 'aarch64.sve.bf*.lane'. |
834 | Intrinsic::ID ID = |
835 | StringSwitch<Intrinsic::ID>(Name) |
836 | .Case(S: "dot" , Value: Intrinsic::aarch64_sve_bfdot_lane_v2) |
837 | .Case(S: "mlalb" , Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2) |
838 | .Case(S: "mlalt" , Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2) |
839 | .Default(Value: Intrinsic::not_intrinsic); |
840 | if (ID != Intrinsic::not_intrinsic) { |
841 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
842 | return true; |
843 | } |
844 | return false; // No other 'aarch64.sve.bf*.lane'. |
845 | } |
846 | return false; // No other 'aarch64.sve.bf*'. |
847 | } |
848 | |
849 | if (Name.consume_front(Prefix: "addqv" )) { |
850 | // 'aarch64.sve.addqv'. |
851 | if (!F->getReturnType()->isFPOrFPVectorTy()) |
852 | return false; |
853 | |
854 | auto Args = F->getFunctionType()->params(); |
855 | Type *Tys[] = {F->getReturnType(), Args[1]}; |
856 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
857 | id: Intrinsic::aarch64_sve_faddqv, Tys); |
858 | return true; |
859 | } |
860 | |
861 | if (Name.consume_front(Prefix: "ld" )) { |
862 | // 'aarch64.sve.ld*'. |
863 | static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)" ); |
864 | if (LdRegex.match(String: Name)) { |
865 | Type *ScalarTy = |
866 | cast<VectorType>(Val: F->getReturnType())->getElementType(); |
867 | ElementCount EC = |
868 | cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount(); |
869 | Type *Ty = VectorType::get(ElementType: ScalarTy, EC); |
870 | static const Intrinsic::ID LoadIDs[] = { |
871 | Intrinsic::aarch64_sve_ld2_sret, |
872 | Intrinsic::aarch64_sve_ld3_sret, |
873 | Intrinsic::aarch64_sve_ld4_sret, |
874 | }; |
875 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
876 | id: LoadIDs[Name[0] - '2'], Tys: Ty); |
877 | return true; |
878 | } |
879 | return false; // No other 'aarch64.sve.ld*'. |
880 | } |
881 | |
882 | if (Name.consume_front(Prefix: "tuple." )) { |
883 | // 'aarch64.sve.tuple.*'. |
884 | if (Name.starts_with(Prefix: "get" )) { |
885 | // 'aarch64.sve.tuple.get*'. |
886 | Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; |
887 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
888 | id: Intrinsic::vector_extract, Tys); |
889 | return true; |
890 | } |
891 | |
892 | if (Name.starts_with(Prefix: "set" )) { |
893 | // 'aarch64.sve.tuple.set*'. |
894 | auto Args = F->getFunctionType()->params(); |
895 | Type *Tys[] = {Args[0], Args[2], Args[1]}; |
896 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
897 | id: Intrinsic::vector_insert, Tys); |
898 | return true; |
899 | } |
900 | |
901 | static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)" ); |
902 | if (CreateTupleRegex.match(String: Name)) { |
903 | // 'aarch64.sve.tuple.create*'. |
904 | auto Args = F->getFunctionType()->params(); |
905 | Type *Tys[] = {F->getReturnType(), Args[1]}; |
906 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
907 | id: Intrinsic::vector_insert, Tys); |
908 | return true; |
909 | } |
910 | return false; // No other 'aarch64.sve.tuple.*'. |
911 | } |
912 | return false; // No other 'aarch64.sve.*'. |
913 | } |
914 | } |
915 | return false; // No other 'arm.*', 'aarch64.*'. |
916 | } |
917 | |
918 | static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) { |
919 | if (Name.consume_front(Prefix: "abs." )) |
920 | return StringSwitch<Intrinsic::ID>(Name) |
921 | .Case(S: "bf16" , Value: Intrinsic::nvvm_abs_bf16) |
922 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_abs_bf16x2) |
923 | .Default(Value: Intrinsic::not_intrinsic); |
924 | |
925 | if (Name.consume_front(Prefix: "fma.rn." )) |
926 | return StringSwitch<Intrinsic::ID>(Name) |
927 | .Case(S: "bf16" , Value: Intrinsic::nvvm_fma_rn_bf16) |
928 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_fma_rn_bf16x2) |
929 | .Case(S: "ftz.bf16" , Value: Intrinsic::nvvm_fma_rn_ftz_bf16) |
930 | .Case(S: "ftz.bf16x2" , Value: Intrinsic::nvvm_fma_rn_ftz_bf16x2) |
931 | .Case(S: "ftz.relu.bf16" , Value: Intrinsic::nvvm_fma_rn_ftz_relu_bf16) |
932 | .Case(S: "ftz.relu.bf16x2" , Value: Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2) |
933 | .Case(S: "ftz.sat.bf16" , Value: Intrinsic::nvvm_fma_rn_ftz_sat_bf16) |
934 | .Case(S: "ftz.sat.bf16x2" , Value: Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2) |
935 | .Case(S: "relu.bf16" , Value: Intrinsic::nvvm_fma_rn_relu_bf16) |
936 | .Case(S: "relu.bf16x2" , Value: Intrinsic::nvvm_fma_rn_relu_bf16x2) |
937 | .Case(S: "sat.bf16" , Value: Intrinsic::nvvm_fma_rn_sat_bf16) |
938 | .Case(S: "sat.bf16x2" , Value: Intrinsic::nvvm_fma_rn_sat_bf16x2) |
939 | .Default(Value: Intrinsic::not_intrinsic); |
940 | |
941 | if (Name.consume_front(Prefix: "fmax." )) |
942 | return StringSwitch<Intrinsic::ID>(Name) |
943 | .Case(S: "bf16" , Value: Intrinsic::nvvm_fmax_bf16) |
944 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_fmax_bf16x2) |
945 | .Case(S: "ftz.bf16" , Value: Intrinsic::nvvm_fmax_ftz_bf16) |
946 | .Case(S: "ftz.bf16x2" , Value: Intrinsic::nvvm_fmax_ftz_bf16x2) |
947 | .Case(S: "ftz.nan.bf16" , Value: Intrinsic::nvvm_fmax_ftz_nan_bf16) |
948 | .Case(S: "ftz.nan.bf16x2" , Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2) |
949 | .Case(S: "ftz.nan.xorsign.abs.bf16" , |
950 | Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16) |
951 | .Case(S: "ftz.nan.xorsign.abs.bf16x2" , |
952 | Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2) |
953 | .Case(S: "ftz.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16) |
954 | .Case(S: "ftz.xorsign.abs.bf16x2" , |
955 | Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2) |
956 | .Case(S: "nan.bf16" , Value: Intrinsic::nvvm_fmax_nan_bf16) |
957 | .Case(S: "nan.bf16x2" , Value: Intrinsic::nvvm_fmax_nan_bf16x2) |
958 | .Case(S: "nan.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16) |
959 | .Case(S: "nan.xorsign.abs.bf16x2" , |
960 | Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2) |
961 | .Case(S: "xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16) |
962 | .Case(S: "xorsign.abs.bf16x2" , Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2) |
963 | .Default(Value: Intrinsic::not_intrinsic); |
964 | |
965 | if (Name.consume_front(Prefix: "fmin." )) |
966 | return StringSwitch<Intrinsic::ID>(Name) |
967 | .Case(S: "bf16" , Value: Intrinsic::nvvm_fmin_bf16) |
968 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_fmin_bf16x2) |
969 | .Case(S: "ftz.bf16" , Value: Intrinsic::nvvm_fmin_ftz_bf16) |
970 | .Case(S: "ftz.bf16x2" , Value: Intrinsic::nvvm_fmin_ftz_bf16x2) |
971 | .Case(S: "ftz.nan.bf16" , Value: Intrinsic::nvvm_fmin_ftz_nan_bf16) |
972 | .Case(S: "ftz.nan.bf16x2" , Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2) |
973 | .Case(S: "ftz.nan.xorsign.abs.bf16" , |
974 | Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16) |
975 | .Case(S: "ftz.nan.xorsign.abs.bf16x2" , |
976 | Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2) |
977 | .Case(S: "ftz.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16) |
978 | .Case(S: "ftz.xorsign.abs.bf16x2" , |
979 | Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2) |
980 | .Case(S: "nan.bf16" , Value: Intrinsic::nvvm_fmin_nan_bf16) |
981 | .Case(S: "nan.bf16x2" , Value: Intrinsic::nvvm_fmin_nan_bf16x2) |
982 | .Case(S: "nan.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16) |
983 | .Case(S: "nan.xorsign.abs.bf16x2" , |
984 | Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2) |
985 | .Case(S: "xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16) |
986 | .Case(S: "xorsign.abs.bf16x2" , Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2) |
987 | .Default(Value: Intrinsic::not_intrinsic); |
988 | |
989 | if (Name.consume_front(Prefix: "neg." )) |
990 | return StringSwitch<Intrinsic::ID>(Name) |
991 | .Case(S: "bf16" , Value: Intrinsic::nvvm_neg_bf16) |
992 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_neg_bf16x2) |
993 | .Default(Value: Intrinsic::not_intrinsic); |
994 | |
995 | return Intrinsic::not_intrinsic; |
996 | } |
997 | |
998 | static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, |
999 | bool CanUpgradeDebugIntrinsicsToRecords) { |
1000 | assert(F && "Illegal to upgrade a non-existent Function." ); |
1001 | |
1002 | StringRef Name = F->getName(); |
1003 | |
1004 | // Quickly eliminate it, if it's not a candidate. |
1005 | if (!Name.consume_front(Prefix: "llvm." ) || Name.empty()) |
1006 | return false; |
1007 | |
1008 | switch (Name[0]) { |
1009 | default: break; |
1010 | case 'a': { |
1011 | bool IsArm = Name.consume_front(Prefix: "arm." ); |
1012 | if (IsArm || Name.consume_front(Prefix: "aarch64." )) { |
1013 | if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn)) |
1014 | return true; |
1015 | break; |
1016 | } |
1017 | |
1018 | if (Name.consume_front(Prefix: "amdgcn." )) { |
1019 | if (Name == "alignbit" ) { |
1020 | // Target specific intrinsic became redundant |
1021 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::fshr, |
1022 | Tys: {F->getReturnType()}); |
1023 | return true; |
1024 | } |
1025 | |
1026 | if (Name.consume_front(Prefix: "atomic." )) { |
1027 | if (Name.starts_with(Prefix: "inc" ) || Name.starts_with(Prefix: "dec" )) { |
1028 | // These were replaced with atomicrmw uinc_wrap and udec_wrap, so |
1029 | // there's no new declaration. |
1030 | NewFn = nullptr; |
1031 | return true; |
1032 | } |
1033 | break; // No other 'amdgcn.atomic.*' |
1034 | } |
1035 | |
1036 | if (Name.starts_with(Prefix: "ds.fadd" ) || Name.starts_with(Prefix: "ds.fmin" ) || |
1037 | Name.starts_with(Prefix: "ds.fmax" )) { |
1038 | // Replaced with atomicrmw fadd/fmin/fmax, so there's no new |
1039 | // declaration. |
1040 | NewFn = nullptr; |
1041 | return true; |
1042 | } |
1043 | |
1044 | if (Name.starts_with(Prefix: "ldexp." )) { |
1045 | // Target specific intrinsic became redundant |
1046 | NewFn = Intrinsic::getDeclaration( |
1047 | M: F->getParent(), id: Intrinsic::ldexp, |
1048 | Tys: {F->getReturnType(), F->getArg(i: 1)->getType()}); |
1049 | return true; |
1050 | } |
1051 | break; // No other 'amdgcn.*' |
1052 | } |
1053 | |
1054 | break; |
1055 | } |
1056 | case 'c': { |
1057 | if (F->arg_size() == 1) { |
1058 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
1059 | .StartsWith(S: "ctlz." , Value: Intrinsic::ctlz) |
1060 | .StartsWith(S: "cttz." , Value: Intrinsic::cttz) |
1061 | .Default(Value: Intrinsic::not_intrinsic); |
1062 | if (ID != Intrinsic::not_intrinsic) { |
1063 | rename(GV: F); |
1064 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, |
1065 | Tys: F->arg_begin()->getType()); |
1066 | return true; |
1067 | } |
1068 | } |
1069 | |
1070 | if (F->arg_size() == 2 && Name == "coro.end" ) { |
1071 | rename(GV: F); |
1072 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::coro_end); |
1073 | return true; |
1074 | } |
1075 | |
1076 | break; |
1077 | } |
1078 | case 'd': |
1079 | if (Name.consume_front(Prefix: "dbg." )) { |
1080 | // Mark debug intrinsics for upgrade to new debug format. |
1081 | if (CanUpgradeDebugIntrinsicsToRecords && |
1082 | F->getParent()->IsNewDbgInfoFormat) { |
1083 | if (Name == "addr" || Name == "value" || Name == "assign" || |
1084 | Name == "declare" || Name == "label" ) { |
1085 | // There's no function to replace these with. |
1086 | NewFn = nullptr; |
1087 | // But we do want these to get upgraded. |
1088 | return true; |
1089 | } |
1090 | } |
1091 | // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get |
1092 | // converted to DbgVariableRecords later. |
1093 | if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) { |
1094 | rename(GV: F); |
1095 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::dbg_value); |
1096 | return true; |
1097 | } |
1098 | break; // No other 'dbg.*'. |
1099 | } |
1100 | break; |
1101 | case 'e': |
1102 | if (Name.consume_front(Prefix: "experimental.vector." )) { |
1103 | Intrinsic::ID ID = |
1104 | StringSwitch<Intrinsic::ID>(Name) |
1105 | .StartsWith(S: "extract." , Value: Intrinsic::vector_extract) |
1106 | .StartsWith(S: "insert." , Value: Intrinsic::vector_insert) |
1107 | .StartsWith(S: "splice." , Value: Intrinsic::vector_splice) |
1108 | .StartsWith(S: "reverse." , Value: Intrinsic::vector_reverse) |
1109 | .StartsWith(S: "interleave2." , Value: Intrinsic::vector_interleave2) |
1110 | .StartsWith(S: "deinterleave2." , Value: Intrinsic::vector_deinterleave2) |
1111 | .Default(Value: Intrinsic::not_intrinsic); |
1112 | if (ID != Intrinsic::not_intrinsic) { |
1113 | const auto *FT = F->getFunctionType(); |
1114 | SmallVector<Type *, 2> Tys; |
1115 | if (ID == Intrinsic::vector_extract || |
1116 | ID == Intrinsic::vector_interleave2) |
1117 | // Extracting overloads the return type. |
1118 | Tys.push_back(Elt: FT->getReturnType()); |
1119 | if (ID != Intrinsic::vector_interleave2) |
1120 | Tys.push_back(Elt: FT->getParamType(i: 0)); |
1121 | if (ID == Intrinsic::vector_insert) |
1122 | // Inserting overloads the inserted type. |
1123 | Tys.push_back(Elt: FT->getParamType(i: 1)); |
1124 | rename(GV: F); |
1125 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
1126 | return true; |
1127 | } |
1128 | |
1129 | if (Name.consume_front(Prefix: "reduce." )) { |
1130 | SmallVector<StringRef, 2> Groups; |
1131 | static const Regex R("^([a-z]+)\\.[a-z][0-9]+" ); |
1132 | if (R.match(String: Name, Matches: &Groups)) |
1133 | ID = StringSwitch<Intrinsic::ID>(Groups[1]) |
1134 | .Case(S: "add" , Value: Intrinsic::vector_reduce_add) |
1135 | .Case(S: "mul" , Value: Intrinsic::vector_reduce_mul) |
1136 | .Case(S: "and" , Value: Intrinsic::vector_reduce_and) |
1137 | .Case(S: "or" , Value: Intrinsic::vector_reduce_or) |
1138 | .Case(S: "xor" , Value: Intrinsic::vector_reduce_xor) |
1139 | .Case(S: "smax" , Value: Intrinsic::vector_reduce_smax) |
1140 | .Case(S: "smin" , Value: Intrinsic::vector_reduce_smin) |
1141 | .Case(S: "umax" , Value: Intrinsic::vector_reduce_umax) |
1142 | .Case(S: "umin" , Value: Intrinsic::vector_reduce_umin) |
1143 | .Case(S: "fmax" , Value: Intrinsic::vector_reduce_fmax) |
1144 | .Case(S: "fmin" , Value: Intrinsic::vector_reduce_fmin) |
1145 | .Default(Value: Intrinsic::not_intrinsic); |
1146 | |
1147 | bool V2 = false; |
1148 | if (ID == Intrinsic::not_intrinsic) { |
1149 | static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+" ); |
1150 | Groups.clear(); |
1151 | V2 = true; |
1152 | if (R2.match(String: Name, Matches: &Groups)) |
1153 | ID = StringSwitch<Intrinsic::ID>(Groups[1]) |
1154 | .Case(S: "fadd" , Value: Intrinsic::vector_reduce_fadd) |
1155 | .Case(S: "fmul" , Value: Intrinsic::vector_reduce_fmul) |
1156 | .Default(Value: Intrinsic::not_intrinsic); |
1157 | } |
1158 | if (ID != Intrinsic::not_intrinsic) { |
1159 | rename(GV: F); |
1160 | auto Args = F->getFunctionType()->params(); |
1161 | NewFn = |
1162 | Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: {Args[V2 ? 1 : 0]}); |
1163 | return true; |
1164 | } |
1165 | break; // No other 'expermental.vector.reduce.*'. |
1166 | } |
1167 | break; // No other 'experimental.vector.*'. |
1168 | } |
1169 | break; // No other 'e*'. |
1170 | case 'f': |
1171 | if (Name.starts_with(Prefix: "flt.rounds" )) { |
1172 | rename(GV: F); |
1173 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::get_rounding); |
1174 | return true; |
1175 | } |
1176 | break; |
1177 | case 'i': |
1178 | if (Name.starts_with(Prefix: "invariant.group.barrier" )) { |
1179 | // Rename invariant.group.barrier to launder.invariant.group |
1180 | auto Args = F->getFunctionType()->params(); |
1181 | Type* ObjectPtr[1] = {Args[0]}; |
1182 | rename(GV: F); |
1183 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
1184 | id: Intrinsic::launder_invariant_group, Tys: ObjectPtr); |
1185 | return true; |
1186 | } |
1187 | break; |
1188 | case 'm': { |
1189 | // Updating the memory intrinsics (memcpy/memmove/memset) that have an |
1190 | // alignment parameter to embedding the alignment as an attribute of |
1191 | // the pointer args. |
1192 | if (unsigned ID = StringSwitch<unsigned>(Name) |
1193 | .StartsWith(S: "memcpy." , Value: Intrinsic::memcpy) |
1194 | .StartsWith(S: "memmove." , Value: Intrinsic::memmove) |
1195 | .Default(Value: 0)) { |
1196 | if (F->arg_size() == 5) { |
1197 | rename(GV: F); |
1198 | // Get the types of dest, src, and len |
1199 | ArrayRef<Type *> ParamTypes = |
1200 | F->getFunctionType()->params().slice(N: 0, M: 3); |
1201 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes); |
1202 | return true; |
1203 | } |
1204 | } |
1205 | if (Name.starts_with(Prefix: "memset." ) && F->arg_size() == 5) { |
1206 | rename(GV: F); |
1207 | // Get the types of dest, and len |
1208 | const auto *FT = F->getFunctionType(); |
1209 | Type *ParamTypes[2] = { |
1210 | FT->getParamType(i: 0), // Dest |
1211 | FT->getParamType(i: 2) // len |
1212 | }; |
1213 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::memset, |
1214 | Tys: ParamTypes); |
1215 | return true; |
1216 | } |
1217 | break; |
1218 | } |
1219 | case 'n': { |
1220 | if (Name.consume_front(Prefix: "nvvm." )) { |
1221 | // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic. |
1222 | if (F->arg_size() == 1) { |
1223 | Intrinsic::ID IID = |
1224 | StringSwitch<Intrinsic::ID>(Name) |
1225 | .Cases(S0: "brev32" , S1: "brev64" , Value: Intrinsic::bitreverse) |
1226 | .Case(S: "clz.i" , Value: Intrinsic::ctlz) |
1227 | .Case(S: "popc.i" , Value: Intrinsic::ctpop) |
1228 | .Default(Value: Intrinsic::not_intrinsic); |
1229 | if (IID != Intrinsic::not_intrinsic) { |
1230 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID, |
1231 | Tys: {F->getReturnType()}); |
1232 | return true; |
1233 | } |
1234 | } |
1235 | |
1236 | // Check for nvvm intrinsics that need a return type adjustment. |
1237 | if (!F->getReturnType()->getScalarType()->isBFloatTy()) { |
1238 | Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); |
1239 | if (IID != Intrinsic::not_intrinsic) { |
1240 | NewFn = nullptr; |
1241 | return true; |
1242 | } |
1243 | } |
1244 | |
1245 | // The following nvvm intrinsics correspond exactly to an LLVM idiom, but |
1246 | // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. |
1247 | // |
1248 | // TODO: We could add lohi.i2d. |
1249 | bool Expand = false; |
1250 | if (Name.consume_front(Prefix: "abs." )) |
1251 | // nvvm.abs.{i,ii} |
1252 | Expand = Name == "i" || Name == "ll" ; |
1253 | else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ) |
1254 | Expand = true; |
1255 | else if (Name.consume_front(Prefix: "max." ) || Name.consume_front(Prefix: "min." )) |
1256 | // nvvm.{min,max}.{i,ii,ui,ull} |
1257 | Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
1258 | Name == "ui" || Name == "ull" ; |
1259 | else if (Name.consume_front(Prefix: "atomic.load.add." )) |
1260 | // nvvm.atomic.load.add.{f32.p,f64.p} |
1261 | Expand = Name.starts_with(Prefix: "f32.p" ) || Name.starts_with(Prefix: "f64.p" ); |
1262 | else |
1263 | Expand = false; |
1264 | |
1265 | if (Expand) { |
1266 | NewFn = nullptr; |
1267 | return true; |
1268 | } |
1269 | break; // No other 'nvvm.*'. |
1270 | } |
1271 | break; |
1272 | } |
1273 | case 'o': |
1274 | // We only need to change the name to match the mangling including the |
1275 | // address space. |
1276 | if (Name.starts_with(Prefix: "objectsize." )) { |
1277 | Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; |
1278 | if (F->arg_size() == 2 || F->arg_size() == 3 || |
1279 | F->getName() != |
1280 | Intrinsic::getName(Id: Intrinsic::objectsize, Tys, M: F->getParent())) { |
1281 | rename(GV: F); |
1282 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::objectsize, |
1283 | Tys); |
1284 | return true; |
1285 | } |
1286 | } |
1287 | break; |
1288 | |
1289 | case 'p': |
1290 | if (Name.starts_with(Prefix: "ptr.annotation." ) && F->arg_size() == 4) { |
1291 | rename(GV: F); |
1292 | NewFn = Intrinsic::getDeclaration( |
1293 | M: F->getParent(), id: Intrinsic::ptr_annotation, |
1294 | Tys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()}); |
1295 | return true; |
1296 | } |
1297 | break; |
1298 | |
1299 | case 'r': { |
1300 | if (Name.consume_front(Prefix: "riscv." )) { |
1301 | Intrinsic::ID ID; |
1302 | ID = StringSwitch<Intrinsic::ID>(Name) |
1303 | .Case(S: "aes32dsi" , Value: Intrinsic::riscv_aes32dsi) |
1304 | .Case(S: "aes32dsmi" , Value: Intrinsic::riscv_aes32dsmi) |
1305 | .Case(S: "aes32esi" , Value: Intrinsic::riscv_aes32esi) |
1306 | .Case(S: "aes32esmi" , Value: Intrinsic::riscv_aes32esmi) |
1307 | .Default(Value: Intrinsic::not_intrinsic); |
1308 | if (ID != Intrinsic::not_intrinsic) { |
1309 | if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) { |
1310 | rename(GV: F); |
1311 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1312 | return true; |
1313 | } |
1314 | break; // No other applicable upgrades. |
1315 | } |
1316 | |
1317 | ID = StringSwitch<Intrinsic::ID>(Name) |
1318 | .StartsWith(S: "sm4ks" , Value: Intrinsic::riscv_sm4ks) |
1319 | .StartsWith(S: "sm4ed" , Value: Intrinsic::riscv_sm4ed) |
1320 | .Default(Value: Intrinsic::not_intrinsic); |
1321 | if (ID != Intrinsic::not_intrinsic) { |
1322 | if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) || |
1323 | F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) { |
1324 | rename(GV: F); |
1325 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1326 | return true; |
1327 | } |
1328 | break; // No other applicable upgrades. |
1329 | } |
1330 | |
1331 | ID = StringSwitch<Intrinsic::ID>(Name) |
1332 | .StartsWith(S: "sha256sig0" , Value: Intrinsic::riscv_sha256sig0) |
1333 | .StartsWith(S: "sha256sig1" , Value: Intrinsic::riscv_sha256sig1) |
1334 | .StartsWith(S: "sha256sum0" , Value: Intrinsic::riscv_sha256sum0) |
1335 | .StartsWith(S: "sha256sum1" , Value: Intrinsic::riscv_sha256sum1) |
1336 | .StartsWith(S: "sm3p0" , Value: Intrinsic::riscv_sm3p0) |
1337 | .StartsWith(S: "sm3p1" , Value: Intrinsic::riscv_sm3p1) |
1338 | .Default(Value: Intrinsic::not_intrinsic); |
1339 | if (ID != Intrinsic::not_intrinsic) { |
1340 | if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) { |
1341 | rename(GV: F); |
1342 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1343 | return true; |
1344 | } |
1345 | break; // No other applicable upgrades. |
1346 | } |
1347 | break; // No other 'riscv.*' intrinsics |
1348 | } |
1349 | } break; |
1350 | |
1351 | case 's': |
1352 | if (Name == "stackprotectorcheck" ) { |
1353 | NewFn = nullptr; |
1354 | return true; |
1355 | } |
1356 | break; |
1357 | |
1358 | case 'v': { |
1359 | if (Name == "var.annotation" && F->arg_size() == 4) { |
1360 | rename(GV: F); |
1361 | NewFn = Intrinsic::getDeclaration( |
1362 | M: F->getParent(), id: Intrinsic::var_annotation, |
1363 | Tys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}}); |
1364 | return true; |
1365 | } |
1366 | break; |
1367 | } |
1368 | |
1369 | case 'w': |
1370 | if (Name.consume_front(Prefix: "wasm." )) { |
1371 | Intrinsic::ID ID = |
1372 | StringSwitch<Intrinsic::ID>(Name) |
1373 | .StartsWith(S: "fma." , Value: Intrinsic::wasm_relaxed_madd) |
1374 | .StartsWith(S: "fms." , Value: Intrinsic::wasm_relaxed_nmadd) |
1375 | .StartsWith(S: "laneselect." , Value: Intrinsic::wasm_relaxed_laneselect) |
1376 | .Default(Value: Intrinsic::not_intrinsic); |
1377 | if (ID != Intrinsic::not_intrinsic) { |
1378 | rename(GV: F); |
1379 | NewFn = |
1380 | Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: F->getReturnType()); |
1381 | return true; |
1382 | } |
1383 | |
1384 | if (Name.consume_front(Prefix: "dot.i8x16.i7x16." )) { |
1385 | ID = StringSwitch<Intrinsic::ID>(Name) |
1386 | .Case(S: "signed" , Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed) |
1387 | .Case(S: "add.signed" , |
1388 | Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed) |
1389 | .Default(Value: Intrinsic::not_intrinsic); |
1390 | if (ID != Intrinsic::not_intrinsic) { |
1391 | rename(GV: F); |
1392 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1393 | return true; |
1394 | } |
1395 | break; // No other 'wasm.dot.i8x16.i7x16.*'. |
1396 | } |
1397 | break; // No other 'wasm.*'. |
1398 | } |
1399 | break; |
1400 | |
1401 | case 'x': |
1402 | if (upgradeX86IntrinsicFunction(F, Name, NewFn)) |
1403 | return true; |
1404 | } |
1405 | |
1406 | auto *ST = dyn_cast<StructType>(Val: F->getReturnType()); |
1407 | if (ST && (!ST->isLiteral() || ST->isPacked()) && |
1408 | F->getIntrinsicID() != Intrinsic::not_intrinsic) { |
1409 | // Replace return type with literal non-packed struct. Only do this for |
1410 | // intrinsics declared to return a struct, not for intrinsics with |
1411 | // overloaded return type, in which case the exact struct type will be |
1412 | // mangled into the name. |
1413 | SmallVector<Intrinsic::IITDescriptor> Desc; |
1414 | Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc); |
1415 | if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) { |
1416 | auto *FT = F->getFunctionType(); |
1417 | auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements()); |
1418 | auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg()); |
1419 | std::string Name = F->getName().str(); |
1420 | rename(GV: F); |
1421 | NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(), |
1422 | N: Name, M: F->getParent()); |
1423 | |
1424 | // The new function may also need remangling. |
1425 | if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn)) |
1426 | NewFn = *Result; |
1427 | return true; |
1428 | } |
1429 | } |
1430 | |
1431 | // Remangle our intrinsic since we upgrade the mangling |
1432 | auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); |
1433 | if (Result != std::nullopt) { |
1434 | NewFn = *Result; |
1435 | return true; |
1436 | } |
1437 | |
1438 | // This may not belong here. This function is effectively being overloaded |
1439 | // to both detect an intrinsic which needs upgrading, and to provide the |
1440 | // upgraded form of the intrinsic. We should perhaps have two separate |
1441 | // functions for this. |
1442 | return false; |
1443 | } |
1444 | |
1445 | bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn, |
1446 | bool CanUpgradeDebugIntrinsicsToRecords) { |
1447 | NewFn = nullptr; |
1448 | bool Upgraded = |
1449 | upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords); |
1450 | assert(F != NewFn && "Intrinsic function upgraded to the same function" ); |
1451 | |
1452 | // Upgrade intrinsic attributes. This does not change the function. |
1453 | if (NewFn) |
1454 | F = NewFn; |
1455 | if (Intrinsic::ID id = F->getIntrinsicID()) |
1456 | F->setAttributes(Intrinsic::getAttributes(C&: F->getContext(), id)); |
1457 | return Upgraded; |
1458 | } |
1459 | |
1460 | GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { |
1461 | if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" || |
1462 | GV->getName() == "llvm.global_dtors" )) || |
1463 | !GV->hasInitializer()) |
1464 | return nullptr; |
1465 | ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType()); |
1466 | if (!ATy) |
1467 | return nullptr; |
1468 | StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType()); |
1469 | if (!STy || STy->getNumElements() != 2) |
1470 | return nullptr; |
1471 | |
1472 | LLVMContext &C = GV->getContext(); |
1473 | IRBuilder<> IRB(C); |
1474 | auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1), |
1475 | elts: IRB.getPtrTy()); |
1476 | Constant *Init = GV->getInitializer(); |
1477 | unsigned N = Init->getNumOperands(); |
1478 | std::vector<Constant *> NewCtors(N); |
1479 | for (unsigned i = 0; i != N; ++i) { |
1480 | auto Ctor = cast<Constant>(Val: Init->getOperand(i)); |
1481 | NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u), |
1482 | Vs: Ctor->getAggregateElement(Elt: 1), |
1483 | Vs: Constant::getNullValue(Ty: IRB.getPtrTy())); |
1484 | } |
1485 | Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors); |
1486 | |
1487 | return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), |
1488 | NewInit, GV->getName()); |
1489 | } |
1490 | |
1491 | // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them |
1492 | // to byte shuffles. |
1493 | static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
1494 | unsigned Shift) { |
1495 | auto *ResultTy = cast<FixedVectorType>(Val: Op->getType()); |
1496 | unsigned NumElts = ResultTy->getNumElements() * 8; |
1497 | |
1498 | // Bitcast from a 64-bit element type to a byte element type. |
1499 | Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts); |
1500 | Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast" ); |
1501 | |
1502 | // We'll be shuffling in zeroes. |
1503 | Value *Res = Constant::getNullValue(Ty: VecTy); |
1504 | |
1505 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
1506 | // we'll just return the zero vector. |
1507 | if (Shift < 16) { |
1508 | int Idxs[64]; |
1509 | // 256/512-bit version is split into 2/4 16-byte lanes. |
1510 | for (unsigned l = 0; l != NumElts; l += 16) |
1511 | for (unsigned i = 0; i != 16; ++i) { |
1512 | unsigned Idx = NumElts + i - Shift; |
1513 | if (Idx < NumElts) |
1514 | Idx -= NumElts - 16; // end of lane, switch operand. |
1515 | Idxs[l + i] = Idx + l; |
1516 | } |
1517 | |
1518 | Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts)); |
1519 | } |
1520 | |
1521 | // Bitcast back to a 64-bit element type. |
1522 | return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast" ); |
1523 | } |
1524 | |
1525 | // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them |
1526 | // to byte shuffles. |
1527 | static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
1528 | unsigned Shift) { |
1529 | auto *ResultTy = cast<FixedVectorType>(Val: Op->getType()); |
1530 | unsigned NumElts = ResultTy->getNumElements() * 8; |
1531 | |
1532 | // Bitcast from a 64-bit element type to a byte element type. |
1533 | Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts); |
1534 | Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast" ); |
1535 | |
1536 | // We'll be shuffling in zeroes. |
1537 | Value *Res = Constant::getNullValue(Ty: VecTy); |
1538 | |
1539 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
1540 | // we'll just return the zero vector. |
1541 | if (Shift < 16) { |
1542 | int Idxs[64]; |
1543 | // 256/512-bit version is split into 2/4 16-byte lanes. |
1544 | for (unsigned l = 0; l != NumElts; l += 16) |
1545 | for (unsigned i = 0; i != 16; ++i) { |
1546 | unsigned Idx = i + Shift; |
1547 | if (Idx >= 16) |
1548 | Idx += NumElts - 16; // end of lane, switch operand. |
1549 | Idxs[l + i] = Idx + l; |
1550 | } |
1551 | |
1552 | Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts)); |
1553 | } |
1554 | |
1555 | // Bitcast back to a 64-bit element type. |
1556 | return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast" ); |
1557 | } |
1558 | |
1559 | static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, |
1560 | unsigned NumElts) { |
1561 | assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements" ); |
1562 | llvm::VectorType *MaskTy = FixedVectorType::get( |
1563 | ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth()); |
1564 | Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy); |
1565 | |
1566 | // If we have less than 8 elements (1, 2 or 4), then the starting mask was an |
1567 | // i8 and we need to extract down to the right number of elements. |
1568 | if (NumElts <= 4) { |
1569 | int Indices[4]; |
1570 | for (unsigned i = 0; i != NumElts; ++i) |
1571 | Indices[i] = i; |
1572 | Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts), |
1573 | Name: "extract" ); |
1574 | } |
1575 | |
1576 | return Mask; |
1577 | } |
1578 | |
1579 | static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, |
1580 | Value *Op1) { |
1581 | // If the mask is all ones just emit the first operation. |
1582 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1583 | if (C->isAllOnesValue()) |
1584 | return Op0; |
1585 | |
1586 | Mask = getX86MaskVec(Builder, Mask, |
1587 | NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements()); |
1588 | return Builder.CreateSelect(C: Mask, True: Op0, False: Op1); |
1589 | } |
1590 | |
1591 | static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, |
1592 | Value *Op1) { |
1593 | // If the mask is all ones just emit the first operation. |
1594 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1595 | if (C->isAllOnesValue()) |
1596 | return Op0; |
1597 | |
1598 | auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(), |
1599 | NumElts: Mask->getType()->getIntegerBitWidth()); |
1600 | Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy); |
1601 | Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0); |
1602 | return Builder.CreateSelect(C: Mask, True: Op0, False: Op1); |
1603 | } |
1604 | |
1605 | // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. |
1606 | // PALIGNR handles large immediates by shifting while VALIGN masks the immediate |
1607 | // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. |
1608 | static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, |
1609 | Value *Op1, Value *Shift, |
1610 | Value *Passthru, Value *Mask, |
1611 | bool IsVALIGN) { |
1612 | unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue(); |
1613 | |
1614 | unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
1615 | assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!" ); |
1616 | assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!" ); |
1617 | assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!" ); |
1618 | |
1619 | // Mask the immediate for VALIGN. |
1620 | if (IsVALIGN) |
1621 | ShiftVal &= (NumElts - 1); |
1622 | |
1623 | // If palignr is shifting the pair of vectors more than the size of two |
1624 | // lanes, emit zero. |
1625 | if (ShiftVal >= 32) |
1626 | return llvm::Constant::getNullValue(Ty: Op0->getType()); |
1627 | |
1628 | // If palignr is shifting the pair of input vectors more than one lane, |
1629 | // but less than two lanes, convert to shifting in zeroes. |
1630 | if (ShiftVal > 16) { |
1631 | ShiftVal -= 16; |
1632 | Op1 = Op0; |
1633 | Op0 = llvm::Constant::getNullValue(Ty: Op0->getType()); |
1634 | } |
1635 | |
1636 | int Indices[64]; |
1637 | // 256-bit palignr operates on 128-bit lanes so we need to handle that |
1638 | for (unsigned l = 0; l < NumElts; l += 16) { |
1639 | for (unsigned i = 0; i != 16; ++i) { |
1640 | unsigned Idx = ShiftVal + i; |
1641 | if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. |
1642 | Idx += NumElts - 16; // End of lane, switch operand. |
1643 | Indices[l + i] = Idx + l; |
1644 | } |
1645 | } |
1646 | |
1647 | Value *Align = Builder.CreateShuffleVector( |
1648 | V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr" ); |
1649 | |
1650 | return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru); |
1651 | } |
1652 | |
1653 | static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, |
1654 | bool ZeroMask, bool IndexForm) { |
1655 | Type *Ty = CI.getType(); |
1656 | unsigned VecWidth = Ty->getPrimitiveSizeInBits(); |
1657 | unsigned EltWidth = Ty->getScalarSizeInBits(); |
1658 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
1659 | Intrinsic::ID IID; |
1660 | if (VecWidth == 128 && EltWidth == 32 && IsFloat) |
1661 | IID = Intrinsic::x86_avx512_vpermi2var_ps_128; |
1662 | else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) |
1663 | IID = Intrinsic::x86_avx512_vpermi2var_d_128; |
1664 | else if (VecWidth == 128 && EltWidth == 64 && IsFloat) |
1665 | IID = Intrinsic::x86_avx512_vpermi2var_pd_128; |
1666 | else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) |
1667 | IID = Intrinsic::x86_avx512_vpermi2var_q_128; |
1668 | else if (VecWidth == 256 && EltWidth == 32 && IsFloat) |
1669 | IID = Intrinsic::x86_avx512_vpermi2var_ps_256; |
1670 | else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) |
1671 | IID = Intrinsic::x86_avx512_vpermi2var_d_256; |
1672 | else if (VecWidth == 256 && EltWidth == 64 && IsFloat) |
1673 | IID = Intrinsic::x86_avx512_vpermi2var_pd_256; |
1674 | else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) |
1675 | IID = Intrinsic::x86_avx512_vpermi2var_q_256; |
1676 | else if (VecWidth == 512 && EltWidth == 32 && IsFloat) |
1677 | IID = Intrinsic::x86_avx512_vpermi2var_ps_512; |
1678 | else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) |
1679 | IID = Intrinsic::x86_avx512_vpermi2var_d_512; |
1680 | else if (VecWidth == 512 && EltWidth == 64 && IsFloat) |
1681 | IID = Intrinsic::x86_avx512_vpermi2var_pd_512; |
1682 | else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) |
1683 | IID = Intrinsic::x86_avx512_vpermi2var_q_512; |
1684 | else if (VecWidth == 128 && EltWidth == 16) |
1685 | IID = Intrinsic::x86_avx512_vpermi2var_hi_128; |
1686 | else if (VecWidth == 256 && EltWidth == 16) |
1687 | IID = Intrinsic::x86_avx512_vpermi2var_hi_256; |
1688 | else if (VecWidth == 512 && EltWidth == 16) |
1689 | IID = Intrinsic::x86_avx512_vpermi2var_hi_512; |
1690 | else if (VecWidth == 128 && EltWidth == 8) |
1691 | IID = Intrinsic::x86_avx512_vpermi2var_qi_128; |
1692 | else if (VecWidth == 256 && EltWidth == 8) |
1693 | IID = Intrinsic::x86_avx512_vpermi2var_qi_256; |
1694 | else if (VecWidth == 512 && EltWidth == 8) |
1695 | IID = Intrinsic::x86_avx512_vpermi2var_qi_512; |
1696 | else |
1697 | llvm_unreachable("Unexpected intrinsic" ); |
1698 | |
1699 | Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1), |
1700 | CI.getArgOperand(i: 2) }; |
1701 | |
1702 | // If this isn't index form we need to swap operand 0 and 1. |
1703 | if (!IndexForm) |
1704 | std::swap(a&: Args[0], b&: Args[1]); |
1705 | |
1706 | Value *V = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI.getModule(), id: IID), |
1707 | Args); |
1708 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) |
1709 | : Builder.CreateBitCast(V: CI.getArgOperand(i: 1), |
1710 | DestTy: Ty); |
1711 | return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru); |
1712 | } |
1713 | |
1714 | static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, |
1715 | Intrinsic::ID IID) { |
1716 | Type *Ty = CI.getType(); |
1717 | Value *Op0 = CI.getOperand(i_nocapture: 0); |
1718 | Value *Op1 = CI.getOperand(i_nocapture: 1); |
1719 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty); |
1720 | Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Op0, Op1}); |
1721 | |
1722 | if (CI.arg_size() == 4) { // For masked intrinsics. |
1723 | Value *VecSrc = CI.getOperand(i_nocapture: 2); |
1724 | Value *Mask = CI.getOperand(i_nocapture: 3); |
1725 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1726 | } |
1727 | return Res; |
1728 | } |
1729 | |
1730 | static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, |
1731 | bool IsRotateRight) { |
1732 | Type *Ty = CI.getType(); |
1733 | Value *Src = CI.getArgOperand(i: 0); |
1734 | Value *Amt = CI.getArgOperand(i: 1); |
1735 | |
1736 | // Amount may be scalar immediate, in which case create a splat vector. |
1737 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1738 | // we only care about the lowest log2 bits anyway. |
1739 | if (Amt->getType() != Ty) { |
1740 | unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements(); |
1741 | Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false); |
1742 | Amt = Builder.CreateVectorSplat(NumElts, V: Amt); |
1743 | } |
1744 | |
1745 | Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; |
1746 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty); |
1747 | Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Src, Src, Amt}); |
1748 | |
1749 | if (CI.arg_size() == 4) { // For masked intrinsics. |
1750 | Value *VecSrc = CI.getOperand(i_nocapture: 2); |
1751 | Value *Mask = CI.getOperand(i_nocapture: 3); |
1752 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1753 | } |
1754 | return Res; |
1755 | } |
1756 | |
1757 | static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, |
1758 | bool IsSigned) { |
1759 | Type *Ty = CI.getType(); |
1760 | Value *LHS = CI.getArgOperand(i: 0); |
1761 | Value *RHS = CI.getArgOperand(i: 1); |
1762 | |
1763 | CmpInst::Predicate Pred; |
1764 | switch (Imm) { |
1765 | case 0x0: |
1766 | Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; |
1767 | break; |
1768 | case 0x1: |
1769 | Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; |
1770 | break; |
1771 | case 0x2: |
1772 | Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; |
1773 | break; |
1774 | case 0x3: |
1775 | Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; |
1776 | break; |
1777 | case 0x4: |
1778 | Pred = ICmpInst::ICMP_EQ; |
1779 | break; |
1780 | case 0x5: |
1781 | Pred = ICmpInst::ICMP_NE; |
1782 | break; |
1783 | case 0x6: |
1784 | return Constant::getNullValue(Ty); // FALSE |
1785 | case 0x7: |
1786 | return Constant::getAllOnesValue(Ty); // TRUE |
1787 | default: |
1788 | llvm_unreachable("Unknown XOP vpcom/vpcomu predicate" ); |
1789 | } |
1790 | |
1791 | Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS); |
1792 | Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty); |
1793 | return Ext; |
1794 | } |
1795 | |
1796 | static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, |
1797 | bool IsShiftRight, bool ZeroMask) { |
1798 | Type *Ty = CI.getType(); |
1799 | Value *Op0 = CI.getArgOperand(i: 0); |
1800 | Value *Op1 = CI.getArgOperand(i: 1); |
1801 | Value *Amt = CI.getArgOperand(i: 2); |
1802 | |
1803 | if (IsShiftRight) |
1804 | std::swap(a&: Op0, b&: Op1); |
1805 | |
1806 | // Amount may be scalar immediate, in which case create a splat vector. |
1807 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1808 | // we only care about the lowest log2 bits anyway. |
1809 | if (Amt->getType() != Ty) { |
1810 | unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements(); |
1811 | Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false); |
1812 | Amt = Builder.CreateVectorSplat(NumElts, V: Amt); |
1813 | } |
1814 | |
1815 | Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl; |
1816 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty); |
1817 | Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Op0, Op1, Amt}); |
1818 | |
1819 | unsigned NumArgs = CI.arg_size(); |
1820 | if (NumArgs >= 4) { // For masked intrinsics. |
1821 | Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) : |
1822 | ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) : |
1823 | CI.getArgOperand(i: 0); |
1824 | Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1); |
1825 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1826 | } |
1827 | return Res; |
1828 | } |
1829 | |
1830 | static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, |
1831 | Value *Mask, bool Aligned) { |
1832 | // Cast the pointer to the right type. |
1833 | Ptr = Builder.CreateBitCast(V: Ptr, |
1834 | DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType())); |
1835 | const Align Alignment = |
1836 | Aligned |
1837 | ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8) |
1838 | : Align(1); |
1839 | |
1840 | // If the mask is all ones just emit a regular store. |
1841 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1842 | if (C->isAllOnesValue()) |
1843 | return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment); |
1844 | |
1845 | // Convert the mask from an integer type to a vector of i1. |
1846 | unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements(); |
1847 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
1848 | return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask); |
1849 | } |
1850 | |
1851 | static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, |
1852 | Value *Passthru, Value *Mask, bool Aligned) { |
1853 | Type *ValTy = Passthru->getType(); |
1854 | // Cast the pointer to the right type. |
1855 | Ptr = Builder.CreateBitCast(V: Ptr, DestTy: llvm::PointerType::getUnqual(ElementType: ValTy)); |
1856 | const Align Alignment = |
1857 | Aligned |
1858 | ? Align( |
1859 | Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() / |
1860 | 8) |
1861 | : Align(1); |
1862 | |
1863 | // If the mask is all ones just emit a regular store. |
1864 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1865 | if (C->isAllOnesValue()) |
1866 | return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment); |
1867 | |
1868 | // Convert the mask from an integer type to a vector of i1. |
1869 | unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements(); |
1870 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
1871 | return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru); |
1872 | } |
1873 | |
1874 | static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) { |
1875 | Type *Ty = CI.getType(); |
1876 | Value *Op0 = CI.getArgOperand(i: 0); |
1877 | Function *F = Intrinsic::getDeclaration(M: CI.getModule(), id: Intrinsic::abs, Tys: Ty); |
1878 | Value *Res = Builder.CreateCall(Callee: F, Args: {Op0, Builder.getInt1(V: false)}); |
1879 | if (CI.arg_size() == 3) |
1880 | Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1)); |
1881 | return Res; |
1882 | } |
1883 | |
1884 | static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) { |
1885 | Type *Ty = CI.getType(); |
1886 | |
1887 | // Arguments have a vXi32 type so cast to vXi64. |
1888 | Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty); |
1889 | Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty); |
1890 | |
1891 | if (IsSigned) { |
1892 | // Shift left then arithmetic shift right. |
1893 | Constant *ShiftAmt = ConstantInt::get(Ty, V: 32); |
1894 | LHS = Builder.CreateShl(LHS, RHS: ShiftAmt); |
1895 | LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt); |
1896 | RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt); |
1897 | RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt); |
1898 | } else { |
1899 | // Clear the upper bits. |
1900 | Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff); |
1901 | LHS = Builder.CreateAnd(LHS, RHS: Mask); |
1902 | RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask); |
1903 | } |
1904 | |
1905 | Value *Res = Builder.CreateMul(LHS, RHS); |
1906 | |
1907 | if (CI.arg_size() == 4) |
1908 | Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2)); |
1909 | |
1910 | return Res; |
1911 | } |
1912 | |
1913 | // Applying mask on vector of i1's and make sure result is at least 8 bits wide. |
1914 | static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, |
1915 | Value *Mask) { |
1916 | unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements(); |
1917 | if (Mask) { |
1918 | const auto *C = dyn_cast<Constant>(Val: Mask); |
1919 | if (!C || !C->isAllOnesValue()) |
1920 | Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts)); |
1921 | } |
1922 | |
1923 | if (NumElts < 8) { |
1924 | int Indices[8]; |
1925 | for (unsigned i = 0; i != NumElts; ++i) |
1926 | Indices[i] = i; |
1927 | for (unsigned i = NumElts; i != 8; ++i) |
1928 | Indices[i] = NumElts + i % NumElts; |
1929 | Vec = Builder.CreateShuffleVector(V1: Vec, |
1930 | V2: Constant::getNullValue(Ty: Vec->getType()), |
1931 | Mask: Indices); |
1932 | } |
1933 | return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U))); |
1934 | } |
1935 | |
1936 | static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, |
1937 | unsigned CC, bool Signed) { |
1938 | Value *Op0 = CI.getArgOperand(i: 0); |
1939 | unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
1940 | |
1941 | Value *Cmp; |
1942 | if (CC == 3) { |
1943 | Cmp = Constant::getNullValue( |
1944 | Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts)); |
1945 | } else if (CC == 7) { |
1946 | Cmp = Constant::getAllOnesValue( |
1947 | Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts)); |
1948 | } else { |
1949 | ICmpInst::Predicate Pred; |
1950 | switch (CC) { |
1951 | default: llvm_unreachable("Unknown condition code" ); |
1952 | case 0: Pred = ICmpInst::ICMP_EQ; break; |
1953 | case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; |
1954 | case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; |
1955 | case 4: Pred = ICmpInst::ICMP_NE; break; |
1956 | case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; |
1957 | case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; |
1958 | } |
1959 | Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1)); |
1960 | } |
1961 | |
1962 | Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1); |
1963 | |
1964 | return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask); |
1965 | } |
1966 | |
1967 | // Replace a masked intrinsic with an older unmasked intrinsic. |
1968 | static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, |
1969 | Intrinsic::ID IID) { |
1970 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID); |
1971 | Value *Rep = Builder.CreateCall(Callee: Intrin, |
1972 | Args: { CI.getArgOperand(i: 0), CI.getArgOperand(i: 1) }); |
1973 | return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2)); |
1974 | } |
1975 | |
1976 | static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) { |
1977 | Value* A = CI.getArgOperand(i: 0); |
1978 | Value* B = CI.getArgOperand(i: 1); |
1979 | Value* Src = CI.getArgOperand(i: 2); |
1980 | Value* Mask = CI.getArgOperand(i: 3); |
1981 | |
1982 | Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1)); |
1983 | Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode); |
1984 | Value* = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0); |
1985 | Value* = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0); |
1986 | Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2); |
1987 | return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0); |
1988 | } |
1989 | |
1990 | static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) { |
1991 | Value* Op = CI.getArgOperand(i: 0); |
1992 | Type* ReturnOp = CI.getType(); |
1993 | unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements(); |
1994 | Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts); |
1995 | return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2" ); |
1996 | } |
1997 | |
1998 | // Replace intrinsic with unmasked version and a select. |
1999 | static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, |
2000 | CallBase &CI, Value *&Rep) { |
2001 | Name = Name.substr(Start: 12); // Remove avx512.mask. |
2002 | |
2003 | unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); |
2004 | unsigned EltWidth = CI.getType()->getScalarSizeInBits(); |
2005 | Intrinsic::ID IID; |
2006 | if (Name.starts_with(Prefix: "max.p" )) { |
2007 | if (VecWidth == 128 && EltWidth == 32) |
2008 | IID = Intrinsic::x86_sse_max_ps; |
2009 | else if (VecWidth == 128 && EltWidth == 64) |
2010 | IID = Intrinsic::x86_sse2_max_pd; |
2011 | else if (VecWidth == 256 && EltWidth == 32) |
2012 | IID = Intrinsic::x86_avx_max_ps_256; |
2013 | else if (VecWidth == 256 && EltWidth == 64) |
2014 | IID = Intrinsic::x86_avx_max_pd_256; |
2015 | else |
2016 | llvm_unreachable("Unexpected intrinsic" ); |
2017 | } else if (Name.starts_with(Prefix: "min.p" )) { |
2018 | if (VecWidth == 128 && EltWidth == 32) |
2019 | IID = Intrinsic::x86_sse_min_ps; |
2020 | else if (VecWidth == 128 && EltWidth == 64) |
2021 | IID = Intrinsic::x86_sse2_min_pd; |
2022 | else if (VecWidth == 256 && EltWidth == 32) |
2023 | IID = Intrinsic::x86_avx_min_ps_256; |
2024 | else if (VecWidth == 256 && EltWidth == 64) |
2025 | IID = Intrinsic::x86_avx_min_pd_256; |
2026 | else |
2027 | llvm_unreachable("Unexpected intrinsic" ); |
2028 | } else if (Name.starts_with(Prefix: "pshuf.b." )) { |
2029 | if (VecWidth == 128) |
2030 | IID = Intrinsic::x86_ssse3_pshuf_b_128; |
2031 | else if (VecWidth == 256) |
2032 | IID = Intrinsic::x86_avx2_pshuf_b; |
2033 | else if (VecWidth == 512) |
2034 | IID = Intrinsic::x86_avx512_pshuf_b_512; |
2035 | else |
2036 | llvm_unreachable("Unexpected intrinsic" ); |
2037 | } else if (Name.starts_with(Prefix: "pmul.hr.sw." )) { |
2038 | if (VecWidth == 128) |
2039 | IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; |
2040 | else if (VecWidth == 256) |
2041 | IID = Intrinsic::x86_avx2_pmul_hr_sw; |
2042 | else if (VecWidth == 512) |
2043 | IID = Intrinsic::x86_avx512_pmul_hr_sw_512; |
2044 | else |
2045 | llvm_unreachable("Unexpected intrinsic" ); |
2046 | } else if (Name.starts_with(Prefix: "pmulh.w." )) { |
2047 | if (VecWidth == 128) |
2048 | IID = Intrinsic::x86_sse2_pmulh_w; |
2049 | else if (VecWidth == 256) |
2050 | IID = Intrinsic::x86_avx2_pmulh_w; |
2051 | else if (VecWidth == 512) |
2052 | IID = Intrinsic::x86_avx512_pmulh_w_512; |
2053 | else |
2054 | llvm_unreachable("Unexpected intrinsic" ); |
2055 | } else if (Name.starts_with(Prefix: "pmulhu.w." )) { |
2056 | if (VecWidth == 128) |
2057 | IID = Intrinsic::x86_sse2_pmulhu_w; |
2058 | else if (VecWidth == 256) |
2059 | IID = Intrinsic::x86_avx2_pmulhu_w; |
2060 | else if (VecWidth == 512) |
2061 | IID = Intrinsic::x86_avx512_pmulhu_w_512; |
2062 | else |
2063 | llvm_unreachable("Unexpected intrinsic" ); |
2064 | } else if (Name.starts_with(Prefix: "pmaddw.d." )) { |
2065 | if (VecWidth == 128) |
2066 | IID = Intrinsic::x86_sse2_pmadd_wd; |
2067 | else if (VecWidth == 256) |
2068 | IID = Intrinsic::x86_avx2_pmadd_wd; |
2069 | else if (VecWidth == 512) |
2070 | IID = Intrinsic::x86_avx512_pmaddw_d_512; |
2071 | else |
2072 | llvm_unreachable("Unexpected intrinsic" ); |
2073 | } else if (Name.starts_with(Prefix: "pmaddubs.w." )) { |
2074 | if (VecWidth == 128) |
2075 | IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; |
2076 | else if (VecWidth == 256) |
2077 | IID = Intrinsic::x86_avx2_pmadd_ub_sw; |
2078 | else if (VecWidth == 512) |
2079 | IID = Intrinsic::x86_avx512_pmaddubs_w_512; |
2080 | else |
2081 | llvm_unreachable("Unexpected intrinsic" ); |
2082 | } else if (Name.starts_with(Prefix: "packsswb." )) { |
2083 | if (VecWidth == 128) |
2084 | IID = Intrinsic::x86_sse2_packsswb_128; |
2085 | else if (VecWidth == 256) |
2086 | IID = Intrinsic::x86_avx2_packsswb; |
2087 | else if (VecWidth == 512) |
2088 | IID = Intrinsic::x86_avx512_packsswb_512; |
2089 | else |
2090 | llvm_unreachable("Unexpected intrinsic" ); |
2091 | } else if (Name.starts_with(Prefix: "packssdw." )) { |
2092 | if (VecWidth == 128) |
2093 | IID = Intrinsic::x86_sse2_packssdw_128; |
2094 | else if (VecWidth == 256) |
2095 | IID = Intrinsic::x86_avx2_packssdw; |
2096 | else if (VecWidth == 512) |
2097 | IID = Intrinsic::x86_avx512_packssdw_512; |
2098 | else |
2099 | llvm_unreachable("Unexpected intrinsic" ); |
2100 | } else if (Name.starts_with(Prefix: "packuswb." )) { |
2101 | if (VecWidth == 128) |
2102 | IID = Intrinsic::x86_sse2_packuswb_128; |
2103 | else if (VecWidth == 256) |
2104 | IID = Intrinsic::x86_avx2_packuswb; |
2105 | else if (VecWidth == 512) |
2106 | IID = Intrinsic::x86_avx512_packuswb_512; |
2107 | else |
2108 | llvm_unreachable("Unexpected intrinsic" ); |
2109 | } else if (Name.starts_with(Prefix: "packusdw." )) { |
2110 | if (VecWidth == 128) |
2111 | IID = Intrinsic::x86_sse41_packusdw; |
2112 | else if (VecWidth == 256) |
2113 | IID = Intrinsic::x86_avx2_packusdw; |
2114 | else if (VecWidth == 512) |
2115 | IID = Intrinsic::x86_avx512_packusdw_512; |
2116 | else |
2117 | llvm_unreachable("Unexpected intrinsic" ); |
2118 | } else if (Name.starts_with(Prefix: "vpermilvar." )) { |
2119 | if (VecWidth == 128 && EltWidth == 32) |
2120 | IID = Intrinsic::x86_avx_vpermilvar_ps; |
2121 | else if (VecWidth == 128 && EltWidth == 64) |
2122 | IID = Intrinsic::x86_avx_vpermilvar_pd; |
2123 | else if (VecWidth == 256 && EltWidth == 32) |
2124 | IID = Intrinsic::x86_avx_vpermilvar_ps_256; |
2125 | else if (VecWidth == 256 && EltWidth == 64) |
2126 | IID = Intrinsic::x86_avx_vpermilvar_pd_256; |
2127 | else if (VecWidth == 512 && EltWidth == 32) |
2128 | IID = Intrinsic::x86_avx512_vpermilvar_ps_512; |
2129 | else if (VecWidth == 512 && EltWidth == 64) |
2130 | IID = Intrinsic::x86_avx512_vpermilvar_pd_512; |
2131 | else |
2132 | llvm_unreachable("Unexpected intrinsic" ); |
2133 | } else if (Name == "cvtpd2dq.256" ) { |
2134 | IID = Intrinsic::x86_avx_cvt_pd2dq_256; |
2135 | } else if (Name == "cvtpd2ps.256" ) { |
2136 | IID = Intrinsic::x86_avx_cvt_pd2_ps_256; |
2137 | } else if (Name == "cvttpd2dq.256" ) { |
2138 | IID = Intrinsic::x86_avx_cvtt_pd2dq_256; |
2139 | } else if (Name == "cvttps2dq.128" ) { |
2140 | IID = Intrinsic::x86_sse2_cvttps2dq; |
2141 | } else if (Name == "cvttps2dq.256" ) { |
2142 | IID = Intrinsic::x86_avx_cvtt_ps2dq_256; |
2143 | } else if (Name.starts_with(Prefix: "permvar." )) { |
2144 | bool IsFloat = CI.getType()->isFPOrFPVectorTy(); |
2145 | if (VecWidth == 256 && EltWidth == 32 && IsFloat) |
2146 | IID = Intrinsic::x86_avx2_permps; |
2147 | else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) |
2148 | IID = Intrinsic::x86_avx2_permd; |
2149 | else if (VecWidth == 256 && EltWidth == 64 && IsFloat) |
2150 | IID = Intrinsic::x86_avx512_permvar_df_256; |
2151 | else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) |
2152 | IID = Intrinsic::x86_avx512_permvar_di_256; |
2153 | else if (VecWidth == 512 && EltWidth == 32 && IsFloat) |
2154 | IID = Intrinsic::x86_avx512_permvar_sf_512; |
2155 | else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) |
2156 | IID = Intrinsic::x86_avx512_permvar_si_512; |
2157 | else if (VecWidth == 512 && EltWidth == 64 && IsFloat) |
2158 | IID = Intrinsic::x86_avx512_permvar_df_512; |
2159 | else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) |
2160 | IID = Intrinsic::x86_avx512_permvar_di_512; |
2161 | else if (VecWidth == 128 && EltWidth == 16) |
2162 | IID = Intrinsic::x86_avx512_permvar_hi_128; |
2163 | else if (VecWidth == 256 && EltWidth == 16) |
2164 | IID = Intrinsic::x86_avx512_permvar_hi_256; |
2165 | else if (VecWidth == 512 && EltWidth == 16) |
2166 | IID = Intrinsic::x86_avx512_permvar_hi_512; |
2167 | else if (VecWidth == 128 && EltWidth == 8) |
2168 | IID = Intrinsic::x86_avx512_permvar_qi_128; |
2169 | else if (VecWidth == 256 && EltWidth == 8) |
2170 | IID = Intrinsic::x86_avx512_permvar_qi_256; |
2171 | else if (VecWidth == 512 && EltWidth == 8) |
2172 | IID = Intrinsic::x86_avx512_permvar_qi_512; |
2173 | else |
2174 | llvm_unreachable("Unexpected intrinsic" ); |
2175 | } else if (Name.starts_with(Prefix: "dbpsadbw." )) { |
2176 | if (VecWidth == 128) |
2177 | IID = Intrinsic::x86_avx512_dbpsadbw_128; |
2178 | else if (VecWidth == 256) |
2179 | IID = Intrinsic::x86_avx512_dbpsadbw_256; |
2180 | else if (VecWidth == 512) |
2181 | IID = Intrinsic::x86_avx512_dbpsadbw_512; |
2182 | else |
2183 | llvm_unreachable("Unexpected intrinsic" ); |
2184 | } else if (Name.starts_with(Prefix: "pmultishift.qb." )) { |
2185 | if (VecWidth == 128) |
2186 | IID = Intrinsic::x86_avx512_pmultishift_qb_128; |
2187 | else if (VecWidth == 256) |
2188 | IID = Intrinsic::x86_avx512_pmultishift_qb_256; |
2189 | else if (VecWidth == 512) |
2190 | IID = Intrinsic::x86_avx512_pmultishift_qb_512; |
2191 | else |
2192 | llvm_unreachable("Unexpected intrinsic" ); |
2193 | } else if (Name.starts_with(Prefix: "conflict." )) { |
2194 | if (Name[9] == 'd' && VecWidth == 128) |
2195 | IID = Intrinsic::x86_avx512_conflict_d_128; |
2196 | else if (Name[9] == 'd' && VecWidth == 256) |
2197 | IID = Intrinsic::x86_avx512_conflict_d_256; |
2198 | else if (Name[9] == 'd' && VecWidth == 512) |
2199 | IID = Intrinsic::x86_avx512_conflict_d_512; |
2200 | else if (Name[9] == 'q' && VecWidth == 128) |
2201 | IID = Intrinsic::x86_avx512_conflict_q_128; |
2202 | else if (Name[9] == 'q' && VecWidth == 256) |
2203 | IID = Intrinsic::x86_avx512_conflict_q_256; |
2204 | else if (Name[9] == 'q' && VecWidth == 512) |
2205 | IID = Intrinsic::x86_avx512_conflict_q_512; |
2206 | else |
2207 | llvm_unreachable("Unexpected intrinsic" ); |
2208 | } else if (Name.starts_with(Prefix: "pavg." )) { |
2209 | if (Name[5] == 'b' && VecWidth == 128) |
2210 | IID = Intrinsic::x86_sse2_pavg_b; |
2211 | else if (Name[5] == 'b' && VecWidth == 256) |
2212 | IID = Intrinsic::x86_avx2_pavg_b; |
2213 | else if (Name[5] == 'b' && VecWidth == 512) |
2214 | IID = Intrinsic::x86_avx512_pavg_b_512; |
2215 | else if (Name[5] == 'w' && VecWidth == 128) |
2216 | IID = Intrinsic::x86_sse2_pavg_w; |
2217 | else if (Name[5] == 'w' && VecWidth == 256) |
2218 | IID = Intrinsic::x86_avx2_pavg_w; |
2219 | else if (Name[5] == 'w' && VecWidth == 512) |
2220 | IID = Intrinsic::x86_avx512_pavg_w_512; |
2221 | else |
2222 | llvm_unreachable("Unexpected intrinsic" ); |
2223 | } else |
2224 | return false; |
2225 | |
2226 | SmallVector<Value *, 4> Args(CI.args()); |
2227 | Args.pop_back(); |
2228 | Args.pop_back(); |
2229 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI.getModule(), id: IID), |
2230 | Args); |
2231 | unsigned NumArgs = CI.arg_size(); |
2232 | Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep, |
2233 | Op1: CI.getArgOperand(i: NumArgs - 2)); |
2234 | return true; |
2235 | } |
2236 | |
2237 | /// Upgrade comment in call to inline asm that represents an objc retain release |
2238 | /// marker. |
2239 | void llvm::UpgradeInlineAsmString(std::string *AsmStr) { |
2240 | size_t Pos; |
2241 | if (AsmStr->find(s: "mov\tfp" ) == 0 && |
2242 | AsmStr->find(s: "objc_retainAutoreleaseReturnValue" ) != std::string::npos && |
2243 | (Pos = AsmStr->find(s: "# marker" )) != std::string::npos) { |
2244 | AsmStr->replace(pos: Pos, n1: 1, s: ";" ); |
2245 | } |
2246 | } |
2247 | |
2248 | static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, |
2249 | IRBuilder<> &Builder) { |
2250 | LLVMContext &C = F->getContext(); |
2251 | Value *Rep = nullptr; |
2252 | |
2253 | if (Name.starts_with(Prefix: "sse4a.movnt." )) { |
2254 | SmallVector<Metadata *, 1> Elts; |
2255 | Elts.push_back( |
2256 | Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
2257 | MDNode *Node = MDNode::get(Context&: C, MDs: Elts); |
2258 | |
2259 | Value *Arg0 = CI->getArgOperand(i: 0); |
2260 | Value *Arg1 = CI->getArgOperand(i: 1); |
2261 | |
2262 | // Nontemporal (unaligned) store of the 0'th element of the float/double |
2263 | // vector. |
2264 | Type *SrcEltTy = cast<VectorType>(Val: Arg1->getType())->getElementType(); |
2265 | PointerType *EltPtrTy = PointerType::getUnqual(ElementType: SrcEltTy); |
2266 | Value *Addr = Builder.CreateBitCast(V: Arg0, DestTy: EltPtrTy, Name: "cast" ); |
2267 | Value * = |
2268 | Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement" ); |
2269 | |
2270 | StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Addr, Align: Align(1)); |
2271 | SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
2272 | } else if (Name.starts_with(Prefix: "avx.movnt." ) || |
2273 | Name.starts_with(Prefix: "avx512.storent." )) { |
2274 | SmallVector<Metadata *, 1> Elts; |
2275 | Elts.push_back( |
2276 | Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
2277 | MDNode *Node = MDNode::get(Context&: C, MDs: Elts); |
2278 | |
2279 | Value *Arg0 = CI->getArgOperand(i: 0); |
2280 | Value *Arg1 = CI->getArgOperand(i: 1); |
2281 | |
2282 | // Convert the type of the pointer to a pointer to the stored type. |
2283 | Value *BC = Builder.CreateBitCast( |
2284 | V: Arg0, DestTy: PointerType::getUnqual(ElementType: Arg1->getType()), Name: "cast" ); |
2285 | StoreInst *SI = Builder.CreateAlignedStore( |
2286 | Val: Arg1, Ptr: BC, |
2287 | Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); |
2288 | SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
2289 | } else if (Name == "sse2.storel.dq" ) { |
2290 | Value *Arg0 = CI->getArgOperand(i: 0); |
2291 | Value *Arg1 = CI->getArgOperand(i: 1); |
2292 | |
2293 | auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2); |
2294 | Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast" ); |
2295 | Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0); |
2296 | Value *BC = Builder.CreateBitCast( |
2297 | V: Arg0, DestTy: PointerType::getUnqual(ElementType: Elt->getType()), Name: "cast" ); |
2298 | Builder.CreateAlignedStore(Val: Elt, Ptr: BC, Align: Align(1)); |
2299 | } else if (Name.starts_with(Prefix: "sse.storeu." ) || |
2300 | Name.starts_with(Prefix: "sse2.storeu." ) || |
2301 | Name.starts_with(Prefix: "avx.storeu." )) { |
2302 | Value *Arg0 = CI->getArgOperand(i: 0); |
2303 | Value *Arg1 = CI->getArgOperand(i: 1); |
2304 | |
2305 | Arg0 = Builder.CreateBitCast(V: Arg0, DestTy: PointerType::getUnqual(ElementType: Arg1->getType()), |
2306 | Name: "cast" ); |
2307 | Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1)); |
2308 | } else if (Name == "avx512.mask.store.ss" ) { |
2309 | Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1)); |
2310 | upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1), |
2311 | Mask, Aligned: false); |
2312 | } else if (Name.starts_with(Prefix: "avx512.mask.store" )) { |
2313 | // "avx512.mask.storeu." or "avx512.mask.store." |
2314 | bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". |
2315 | upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1), |
2316 | Mask: CI->getArgOperand(i: 2), Aligned); |
2317 | } else if (Name.starts_with(Prefix: "sse2.pcmp" ) || Name.starts_with(Prefix: "avx2.pcmp" )) { |
2318 | // Upgrade packed integer vector compare intrinsics to compare instructions. |
2319 | // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." |
2320 | bool CmpEq = Name[9] == 'e'; |
2321 | Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, |
2322 | LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
2323 | Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "" ); |
2324 | } else if (Name.starts_with(Prefix: "avx512.broadcastm" )) { |
2325 | Type *ExtTy = Type::getInt32Ty(C); |
2326 | if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8)) |
2327 | ExtTy = Type::getInt64Ty(C); |
2328 | unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / |
2329 | ExtTy->getPrimitiveSizeInBits(); |
2330 | Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy); |
2331 | Rep = Builder.CreateVectorSplat(NumElts, V: Rep); |
2332 | } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd" ) { |
2333 | Value *Vec = CI->getArgOperand(i: 0); |
2334 | Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0); |
2335 | Function *Intr = Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::sqrt, |
2336 | Tys: Elt0->getType()); |
2337 | Elt0 = Builder.CreateCall(Callee: Intr, Args: Elt0); |
2338 | Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0); |
2339 | } else if (Name.starts_with(Prefix: "avx.sqrt.p" ) || |
2340 | Name.starts_with(Prefix: "sse2.sqrt.p" ) || |
2341 | Name.starts_with(Prefix: "sse.sqrt.p" )) { |
2342 | Rep = |
2343 | Builder.CreateCall(Callee: Intrinsic::getDeclaration( |
2344 | M: F->getParent(), id: Intrinsic::sqrt, Tys: CI->getType()), |
2345 | Args: {CI->getArgOperand(i: 0)}); |
2346 | } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p" )) { |
2347 | if (CI->arg_size() == 4 && |
2348 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) || |
2349 | cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) { |
2350 | Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 |
2351 | : Intrinsic::x86_avx512_sqrt_pd_512; |
2352 | |
2353 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)}; |
2354 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
2355 | Args); |
2356 | } else { |
2357 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), |
2358 | id: Intrinsic::sqrt, |
2359 | Tys: CI->getType()), |
2360 | Args: {CI->getArgOperand(i: 0)}); |
2361 | } |
2362 | Rep = |
2363 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
2364 | } else if (Name.starts_with(Prefix: "avx512.ptestm" ) || |
2365 | Name.starts_with(Prefix: "avx512.ptestnm" )) { |
2366 | Value *Op0 = CI->getArgOperand(i: 0); |
2367 | Value *Op1 = CI->getArgOperand(i: 1); |
2368 | Value *Mask = CI->getArgOperand(i: 2); |
2369 | Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1); |
2370 | llvm::Type *Ty = Op0->getType(); |
2371 | Value *Zero = llvm::Constant::getNullValue(Ty); |
2372 | ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm" ) |
2373 | ? ICmpInst::ICMP_NE |
2374 | : ICmpInst::ICMP_EQ; |
2375 | Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero); |
2376 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask); |
2377 | } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast" )) { |
2378 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType()) |
2379 | ->getNumElements(); |
2380 | Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0)); |
2381 | Rep = |
2382 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
2383 | } else if (Name.starts_with(Prefix: "avx512.kunpck" )) { |
2384 | unsigned NumElts = CI->getType()->getScalarSizeInBits(); |
2385 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts); |
2386 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts); |
2387 | int Indices[64]; |
2388 | for (unsigned i = 0; i != NumElts; ++i) |
2389 | Indices[i] = i; |
2390 | |
2391 | // First extract half of each vector. This gives better codegen than |
2392 | // doing it in a single shuffle. |
2393 | LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2)); |
2394 | RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2)); |
2395 | // Concat the vectors. |
2396 | // NOTE: Operands have to be swapped to match intrinsic definition. |
2397 | Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts)); |
2398 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2399 | } else if (Name == "avx512.kand.w" ) { |
2400 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2401 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2402 | Rep = Builder.CreateAnd(LHS, RHS); |
2403 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2404 | } else if (Name == "avx512.kandn.w" ) { |
2405 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2406 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2407 | LHS = Builder.CreateNot(V: LHS); |
2408 | Rep = Builder.CreateAnd(LHS, RHS); |
2409 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2410 | } else if (Name == "avx512.kor.w" ) { |
2411 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2412 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2413 | Rep = Builder.CreateOr(LHS, RHS); |
2414 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2415 | } else if (Name == "avx512.kxor.w" ) { |
2416 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2417 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2418 | Rep = Builder.CreateXor(LHS, RHS); |
2419 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2420 | } else if (Name == "avx512.kxnor.w" ) { |
2421 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2422 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2423 | LHS = Builder.CreateNot(V: LHS); |
2424 | Rep = Builder.CreateXor(LHS, RHS); |
2425 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2426 | } else if (Name == "avx512.knot.w" ) { |
2427 | Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2428 | Rep = Builder.CreateNot(V: Rep); |
2429 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2430 | } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w" ) { |
2431 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2432 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2433 | Rep = Builder.CreateOr(LHS, RHS); |
2434 | Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty()); |
2435 | Value *C; |
2436 | if (Name[14] == 'c') |
2437 | C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty()); |
2438 | else |
2439 | C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty()); |
2440 | Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C); |
2441 | Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty()); |
2442 | } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" || |
2443 | Name == "sse.sub.ss" || Name == "sse2.sub.sd" || |
2444 | Name == "sse.mul.ss" || Name == "sse2.mul.sd" || |
2445 | Name == "sse.div.ss" || Name == "sse2.div.sd" ) { |
2446 | Type *I32Ty = Type::getInt32Ty(C); |
2447 | Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0), |
2448 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2449 | Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), |
2450 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2451 | Value *EltOp; |
2452 | if (Name.contains(Other: ".add." )) |
2453 | EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1); |
2454 | else if (Name.contains(Other: ".sub." )) |
2455 | EltOp = Builder.CreateFSub(L: Elt0, R: Elt1); |
2456 | else if (Name.contains(Other: ".mul." )) |
2457 | EltOp = Builder.CreateFMul(L: Elt0, R: Elt1); |
2458 | else |
2459 | EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1); |
2460 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp, |
2461 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2462 | } else if (Name.starts_with(Prefix: "avx512.mask.pcmp" )) { |
2463 | // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." |
2464 | bool CmpEq = Name[16] == 'e'; |
2465 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true); |
2466 | } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb." )) { |
2467 | Type *OpTy = CI->getArgOperand(i: 0)->getType(); |
2468 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2469 | Intrinsic::ID IID; |
2470 | switch (VecWidth) { |
2471 | default: |
2472 | llvm_unreachable("Unexpected intrinsic" ); |
2473 | case 128: |
2474 | IID = Intrinsic::x86_avx512_vpshufbitqmb_128; |
2475 | break; |
2476 | case 256: |
2477 | IID = Intrinsic::x86_avx512_vpshufbitqmb_256; |
2478 | break; |
2479 | case 512: |
2480 | IID = Intrinsic::x86_avx512_vpshufbitqmb_512; |
2481 | break; |
2482 | } |
2483 | |
2484 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
2485 | Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)}); |
2486 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2)); |
2487 | } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p" )) { |
2488 | Type *OpTy = CI->getArgOperand(i: 0)->getType(); |
2489 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2490 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
2491 | Intrinsic::ID IID; |
2492 | if (VecWidth == 128 && EltWidth == 32) |
2493 | IID = Intrinsic::x86_avx512_fpclass_ps_128; |
2494 | else if (VecWidth == 256 && EltWidth == 32) |
2495 | IID = Intrinsic::x86_avx512_fpclass_ps_256; |
2496 | else if (VecWidth == 512 && EltWidth == 32) |
2497 | IID = Intrinsic::x86_avx512_fpclass_ps_512; |
2498 | else if (VecWidth == 128 && EltWidth == 64) |
2499 | IID = Intrinsic::x86_avx512_fpclass_pd_128; |
2500 | else if (VecWidth == 256 && EltWidth == 64) |
2501 | IID = Intrinsic::x86_avx512_fpclass_pd_256; |
2502 | else if (VecWidth == 512 && EltWidth == 64) |
2503 | IID = Intrinsic::x86_avx512_fpclass_pd_512; |
2504 | else |
2505 | llvm_unreachable("Unexpected intrinsic" ); |
2506 | |
2507 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
2508 | Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)}); |
2509 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2)); |
2510 | } else if (Name.starts_with(Prefix: "avx512.cmp.p" )) { |
2511 | SmallVector<Value *, 4> Args(CI->args()); |
2512 | Type *OpTy = Args[0]->getType(); |
2513 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2514 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
2515 | Intrinsic::ID IID; |
2516 | if (VecWidth == 128 && EltWidth == 32) |
2517 | IID = Intrinsic::x86_avx512_mask_cmp_ps_128; |
2518 | else if (VecWidth == 256 && EltWidth == 32) |
2519 | IID = Intrinsic::x86_avx512_mask_cmp_ps_256; |
2520 | else if (VecWidth == 512 && EltWidth == 32) |
2521 | IID = Intrinsic::x86_avx512_mask_cmp_ps_512; |
2522 | else if (VecWidth == 128 && EltWidth == 64) |
2523 | IID = Intrinsic::x86_avx512_mask_cmp_pd_128; |
2524 | else if (VecWidth == 256 && EltWidth == 64) |
2525 | IID = Intrinsic::x86_avx512_mask_cmp_pd_256; |
2526 | else if (VecWidth == 512 && EltWidth == 64) |
2527 | IID = Intrinsic::x86_avx512_mask_cmp_pd_512; |
2528 | else |
2529 | llvm_unreachable("Unexpected intrinsic" ); |
2530 | |
2531 | Value *Mask = Constant::getAllOnesValue(Ty: CI->getType()); |
2532 | if (VecWidth == 512) |
2533 | std::swap(a&: Mask, b&: Args.back()); |
2534 | Args.push_back(Elt: Mask); |
2535 | |
2536 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
2537 | Args); |
2538 | } else if (Name.starts_with(Prefix: "avx512.mask.cmp." )) { |
2539 | // Integer compare intrinsics. |
2540 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2541 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true); |
2542 | } else if (Name.starts_with(Prefix: "avx512.mask.ucmp." )) { |
2543 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2544 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false); |
2545 | } else if (Name.starts_with(Prefix: "avx512.cvtb2mask." ) || |
2546 | Name.starts_with(Prefix: "avx512.cvtw2mask." ) || |
2547 | Name.starts_with(Prefix: "avx512.cvtd2mask." ) || |
2548 | Name.starts_with(Prefix: "avx512.cvtq2mask." )) { |
2549 | Value *Op = CI->getArgOperand(i: 0); |
2550 | Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType()); |
2551 | Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero); |
2552 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr); |
2553 | } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || |
2554 | Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs" ) || |
2555 | Name.starts_with(Prefix: "avx512.mask.pabs" )) { |
2556 | Rep = upgradeAbs(Builder, CI&: *CI); |
2557 | } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || |
2558 | Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs" ) || |
2559 | Name.starts_with(Prefix: "avx512.mask.pmaxs" )) { |
2560 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax); |
2561 | } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" || |
2562 | Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu" ) || |
2563 | Name.starts_with(Prefix: "avx512.mask.pmaxu" )) { |
2564 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax); |
2565 | } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" || |
2566 | Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins" ) || |
2567 | Name.starts_with(Prefix: "avx512.mask.pmins" )) { |
2568 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin); |
2569 | } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" || |
2570 | Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu" ) || |
2571 | Name.starts_with(Prefix: "avx512.mask.pminu" )) { |
2572 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin); |
2573 | } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" || |
2574 | Name == "avx512.pmulu.dq.512" || |
2575 | Name.starts_with(Prefix: "avx512.mask.pmulu.dq." )) { |
2576 | Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false); |
2577 | } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" || |
2578 | Name == "avx512.pmul.dq.512" || |
2579 | Name.starts_with(Prefix: "avx512.mask.pmul.dq." )) { |
2580 | Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true); |
2581 | } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" || |
2582 | Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd" ) { |
2583 | Rep = |
2584 | Builder.CreateSIToFP(V: CI->getArgOperand(i: 1), |
2585 | DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2586 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2587 | } else if (Name == "avx512.cvtusi2sd" ) { |
2588 | Rep = |
2589 | Builder.CreateUIToFP(V: CI->getArgOperand(i: 1), |
2590 | DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2591 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2592 | } else if (Name == "sse2.cvtss2sd" ) { |
2593 | Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0); |
2594 | Rep = Builder.CreateFPExt( |
2595 | V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2596 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2597 | } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" || |
2598 | Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" || |
2599 | Name.starts_with(Prefix: "avx512.mask.cvtdq2pd." ) || |
2600 | Name.starts_with(Prefix: "avx512.mask.cvtudq2pd." ) || |
2601 | Name.starts_with(Prefix: "avx512.mask.cvtdq2ps." ) || |
2602 | Name.starts_with(Prefix: "avx512.mask.cvtudq2ps." ) || |
2603 | Name.starts_with(Prefix: "avx512.mask.cvtqq2pd." ) || |
2604 | Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd." ) || |
2605 | Name == "avx512.mask.cvtqq2ps.256" || |
2606 | Name == "avx512.mask.cvtqq2ps.512" || |
2607 | Name == "avx512.mask.cvtuqq2ps.256" || |
2608 | Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" || |
2609 | Name == "avx.cvt.ps2.pd.256" || |
2610 | Name == "avx512.mask.cvtps2pd.128" || |
2611 | Name == "avx512.mask.cvtps2pd.256" ) { |
2612 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2613 | Rep = CI->getArgOperand(i: 0); |
2614 | auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType()); |
2615 | |
2616 | unsigned NumDstElts = DstTy->getNumElements(); |
2617 | if (NumDstElts < SrcTy->getNumElements()) { |
2618 | assert(NumDstElts == 2 && "Unexpected vector size" ); |
2619 | Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1}); |
2620 | } |
2621 | |
2622 | bool IsPS2PD = SrcTy->getElementType()->isFloatTy(); |
2623 | bool IsUnsigned = Name.contains(Other: "cvtu" ); |
2624 | if (IsPS2PD) |
2625 | Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd" ); |
2626 | else if (CI->arg_size() == 4 && |
2627 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) || |
2628 | cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) { |
2629 | Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round |
2630 | : Intrinsic::x86_avx512_sitofp_round; |
2631 | Function *F = |
2632 | Intrinsic::getDeclaration(M: CI->getModule(), id: IID, Tys: {DstTy, SrcTy}); |
2633 | Rep = Builder.CreateCall(Callee: F, Args: {Rep, CI->getArgOperand(i: 3)}); |
2634 | } else { |
2635 | Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt" ) |
2636 | : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt" ); |
2637 | } |
2638 | |
2639 | if (CI->arg_size() >= 3) |
2640 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2641 | Op1: CI->getArgOperand(i: 1)); |
2642 | } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps." ) || |
2643 | Name.starts_with(Prefix: "vcvtph2ps." )) { |
2644 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2645 | Rep = CI->getArgOperand(i: 0); |
2646 | auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType()); |
2647 | unsigned NumDstElts = DstTy->getNumElements(); |
2648 | if (NumDstElts != SrcTy->getNumElements()) { |
2649 | assert(NumDstElts == 4 && "Unexpected vector size" ); |
2650 | Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3}); |
2651 | } |
2652 | Rep = Builder.CreateBitCast( |
2653 | V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts)); |
2654 | Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps" ); |
2655 | if (CI->arg_size() >= 3) |
2656 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2657 | Op1: CI->getArgOperand(i: 1)); |
2658 | } else if (Name.starts_with(Prefix: "avx512.mask.load" )) { |
2659 | // "avx512.mask.loadu." or "avx512.mask.load." |
2660 | bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu". |
2661 | Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1), |
2662 | Mask: CI->getArgOperand(i: 2), Aligned); |
2663 | } else if (Name.starts_with(Prefix: "avx512.mask.expand.load." )) { |
2664 | auto *ResultTy = cast<FixedVectorType>(Val: CI->getType()); |
2665 | Type *PtrTy = ResultTy->getElementType(); |
2666 | |
2667 | // Cast the pointer to element type. |
2668 | Value *Ptr = Builder.CreateBitCast(V: CI->getOperand(i_nocapture: 0), |
2669 | DestTy: llvm::PointerType::getUnqual(ElementType: PtrTy)); |
2670 | |
2671 | Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2672 | NumElts: ResultTy->getNumElements()); |
2673 | |
2674 | Function *ELd = Intrinsic::getDeclaration( |
2675 | M: F->getParent(), id: Intrinsic::masked_expandload, Tys: ResultTy); |
2676 | Rep = Builder.CreateCall(Callee: ELd, Args: {Ptr, MaskVec, CI->getOperand(i_nocapture: 1)}); |
2677 | } else if (Name.starts_with(Prefix: "avx512.mask.compress.store." )) { |
2678 | auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType()); |
2679 | Type *PtrTy = ResultTy->getElementType(); |
2680 | |
2681 | // Cast the pointer to element type. |
2682 | Value *Ptr = Builder.CreateBitCast(V: CI->getOperand(i_nocapture: 0), |
2683 | DestTy: llvm::PointerType::getUnqual(ElementType: PtrTy)); |
2684 | |
2685 | Value *MaskVec = |
2686 | getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2687 | NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements()); |
2688 | |
2689 | Function *CSt = Intrinsic::getDeclaration( |
2690 | M: F->getParent(), id: Intrinsic::masked_compressstore, Tys: ResultTy); |
2691 | Rep = Builder.CreateCall(Callee: CSt, Args: {CI->getArgOperand(i: 1), Ptr, MaskVec}); |
2692 | } else if (Name.starts_with(Prefix: "avx512.mask.compress." ) || |
2693 | Name.starts_with(Prefix: "avx512.mask.expand." )) { |
2694 | auto *ResultTy = cast<FixedVectorType>(Val: CI->getType()); |
2695 | |
2696 | Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2697 | NumElts: ResultTy->getNumElements()); |
2698 | |
2699 | bool IsCompress = Name[12] == 'c'; |
2700 | Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress |
2701 | : Intrinsic::x86_avx512_mask_expand; |
2702 | Function *Intr = Intrinsic::getDeclaration(M: F->getParent(), id: IID, Tys: ResultTy); |
2703 | Rep = Builder.CreateCall(Callee: Intr, |
2704 | Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec}); |
2705 | } else if (Name.starts_with(Prefix: "xop.vpcom" )) { |
2706 | bool IsSigned; |
2707 | if (Name.ends_with(Suffix: "ub" ) || Name.ends_with(Suffix: "uw" ) || Name.ends_with(Suffix: "ud" ) || |
2708 | Name.ends_with(Suffix: "uq" )) |
2709 | IsSigned = false; |
2710 | else if (Name.ends_with(Suffix: "b" ) || Name.ends_with(Suffix: "w" ) || |
2711 | Name.ends_with(Suffix: "d" ) || Name.ends_with(Suffix: "q" )) |
2712 | IsSigned = true; |
2713 | else |
2714 | llvm_unreachable("Unknown suffix" ); |
2715 | |
2716 | unsigned Imm; |
2717 | if (CI->arg_size() == 3) { |
2718 | Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2719 | } else { |
2720 | Name = Name.substr(Start: 9); // strip off "xop.vpcom" |
2721 | if (Name.starts_with(Prefix: "lt" )) |
2722 | Imm = 0; |
2723 | else if (Name.starts_with(Prefix: "le" )) |
2724 | Imm = 1; |
2725 | else if (Name.starts_with(Prefix: "gt" )) |
2726 | Imm = 2; |
2727 | else if (Name.starts_with(Prefix: "ge" )) |
2728 | Imm = 3; |
2729 | else if (Name.starts_with(Prefix: "eq" )) |
2730 | Imm = 4; |
2731 | else if (Name.starts_with(Prefix: "ne" )) |
2732 | Imm = 5; |
2733 | else if (Name.starts_with(Prefix: "false" )) |
2734 | Imm = 6; |
2735 | else if (Name.starts_with(Prefix: "true" )) |
2736 | Imm = 7; |
2737 | else |
2738 | llvm_unreachable("Unknown condition" ); |
2739 | } |
2740 | |
2741 | Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned); |
2742 | } else if (Name.starts_with(Prefix: "xop.vpcmov" )) { |
2743 | Value *Sel = CI->getArgOperand(i: 2); |
2744 | Value *NotSel = Builder.CreateNot(V: Sel); |
2745 | Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel); |
2746 | Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel); |
2747 | Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1); |
2748 | } else if (Name.starts_with(Prefix: "xop.vprot" ) || Name.starts_with(Prefix: "avx512.prol" ) || |
2749 | Name.starts_with(Prefix: "avx512.mask.prol" )) { |
2750 | Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false); |
2751 | } else if (Name.starts_with(Prefix: "avx512.pror" ) || |
2752 | Name.starts_with(Prefix: "avx512.mask.pror" )) { |
2753 | Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true); |
2754 | } else if (Name.starts_with(Prefix: "avx512.vpshld." ) || |
2755 | Name.starts_with(Prefix: "avx512.mask.vpshld" ) || |
2756 | Name.starts_with(Prefix: "avx512.maskz.vpshld" )) { |
2757 | bool ZeroMask = Name[11] == 'z'; |
2758 | Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask); |
2759 | } else if (Name.starts_with(Prefix: "avx512.vpshrd." ) || |
2760 | Name.starts_with(Prefix: "avx512.mask.vpshrd" ) || |
2761 | Name.starts_with(Prefix: "avx512.maskz.vpshrd" )) { |
2762 | bool ZeroMask = Name[11] == 'z'; |
2763 | Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask); |
2764 | } else if (Name == "sse42.crc32.64.8" ) { |
2765 | Function *CRC32 = Intrinsic::getDeclaration( |
2766 | M: F->getParent(), id: Intrinsic::x86_sse42_crc32_32_8); |
2767 | Value *Trunc0 = |
2768 | Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C)); |
2769 | Rep = Builder.CreateCall(Callee: CRC32, Args: {Trunc0, CI->getArgOperand(i: 1)}); |
2770 | Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "" ); |
2771 | } else if (Name.starts_with(Prefix: "avx.vbroadcast.s" ) || |
2772 | Name.starts_with(Prefix: "avx512.vbroadcast.s" )) { |
2773 | // Replace broadcasts with a series of insertelements. |
2774 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
2775 | Type *EltTy = VecTy->getElementType(); |
2776 | unsigned EltNum = VecTy->getNumElements(); |
2777 | Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0)); |
2778 | Type *I32Ty = Type::getInt32Ty(C); |
2779 | Rep = PoisonValue::get(T: VecTy); |
2780 | for (unsigned I = 0; I < EltNum; ++I) |
2781 | Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I)); |
2782 | } else if (Name.starts_with(Prefix: "sse41.pmovsx" ) || |
2783 | Name.starts_with(Prefix: "sse41.pmovzx" ) || |
2784 | Name.starts_with(Prefix: "avx2.pmovsx" ) || |
2785 | Name.starts_with(Prefix: "avx2.pmovzx" ) || |
2786 | Name.starts_with(Prefix: "avx512.mask.pmovsx" ) || |
2787 | Name.starts_with(Prefix: "avx512.mask.pmovzx" )) { |
2788 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2789 | unsigned NumDstElts = DstTy->getNumElements(); |
2790 | |
2791 | // Extract a subvector of the first NumDstElts lanes and sign/zero extend. |
2792 | SmallVector<int, 8> ShuffleMask(NumDstElts); |
2793 | for (unsigned i = 0; i != NumDstElts; ++i) |
2794 | ShuffleMask[i] = i; |
2795 | |
2796 | Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask); |
2797 | |
2798 | bool DoSext = Name.contains(Other: "pmovsx" ); |
2799 | Rep = |
2800 | DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy); |
2801 | // If there are 3 arguments, it's a masked intrinsic so we need a select. |
2802 | if (CI->arg_size() == 3) |
2803 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2804 | Op1: CI->getArgOperand(i: 1)); |
2805 | } else if (Name == "avx512.mask.pmov.qd.256" || |
2806 | Name == "avx512.mask.pmov.qd.512" || |
2807 | Name == "avx512.mask.pmov.wb.256" || |
2808 | Name == "avx512.mask.pmov.wb.512" ) { |
2809 | Type *Ty = CI->getArgOperand(i: 1)->getType(); |
2810 | Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty); |
2811 | Rep = |
2812 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
2813 | } else if (Name.starts_with(Prefix: "avx.vbroadcastf128" ) || |
2814 | Name == "avx2.vbroadcasti128" ) { |
2815 | // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. |
2816 | Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType(); |
2817 | unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); |
2818 | auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts); |
2819 | Value *Op = Builder.CreatePointerCast(V: CI->getArgOperand(i: 0), |
2820 | DestTy: PointerType::getUnqual(ElementType: VT)); |
2821 | Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: Op, Align: Align(1)); |
2822 | if (NumSrcElts == 2) |
2823 | Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1}); |
2824 | else |
2825 | Rep = Builder.CreateShuffleVector(V: Load, |
2826 | Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3}); |
2827 | } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i" ) || |
2828 | Name.starts_with(Prefix: "avx512.mask.shuf.f" )) { |
2829 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2830 | Type *VT = CI->getType(); |
2831 | unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; |
2832 | unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); |
2833 | unsigned ControlBitsMask = NumLanes - 1; |
2834 | unsigned NumControlBits = NumLanes / 2; |
2835 | SmallVector<int, 8> ShuffleMask(0); |
2836 | |
2837 | for (unsigned l = 0; l != NumLanes; ++l) { |
2838 | unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; |
2839 | // We actually need the other source. |
2840 | if (l >= NumLanes / 2) |
2841 | LaneMask += NumLanes; |
2842 | for (unsigned i = 0; i != NumElementsInLane; ++i) |
2843 | ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i); |
2844 | } |
2845 | Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0), |
2846 | V2: CI->getArgOperand(i: 1), Mask: ShuffleMask); |
2847 | Rep = |
2848 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3)); |
2849 | } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf" ) || |
2850 | Name.starts_with(Prefix: "avx512.mask.broadcasti" )) { |
2851 | unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType()) |
2852 | ->getNumElements(); |
2853 | unsigned NumDstElts = |
2854 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
2855 | |
2856 | SmallVector<int, 8> ShuffleMask(NumDstElts); |
2857 | for (unsigned i = 0; i != NumDstElts; ++i) |
2858 | ShuffleMask[i] = i % NumSrcElts; |
2859 | |
2860 | Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0), |
2861 | V2: CI->getArgOperand(i: 0), Mask: ShuffleMask); |
2862 | Rep = |
2863 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
2864 | } else if (Name.starts_with(Prefix: "avx2.pbroadcast" ) || |
2865 | Name.starts_with(Prefix: "avx2.vbroadcast" ) || |
2866 | Name.starts_with(Prefix: "avx512.pbroadcast" ) || |
2867 | Name.starts_with(Prefix: "avx512.mask.broadcast.s" )) { |
2868 | // Replace vp?broadcasts with a vector shuffle. |
2869 | Value *Op = CI->getArgOperand(i: 0); |
2870 | ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount(); |
2871 | Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC); |
2872 | SmallVector<int, 8> M; |
2873 | ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M); |
2874 | Rep = Builder.CreateShuffleVector(V: Op, Mask: M); |
2875 | |
2876 | if (CI->arg_size() == 3) |
2877 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2878 | Op1: CI->getArgOperand(i: 1)); |
2879 | } else if (Name.starts_with(Prefix: "sse2.padds." ) || |
2880 | Name.starts_with(Prefix: "avx2.padds." ) || |
2881 | Name.starts_with(Prefix: "avx512.padds." ) || |
2882 | Name.starts_with(Prefix: "avx512.mask.padds." )) { |
2883 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat); |
2884 | } else if (Name.starts_with(Prefix: "sse2.psubs." ) || |
2885 | Name.starts_with(Prefix: "avx2.psubs." ) || |
2886 | Name.starts_with(Prefix: "avx512.psubs." ) || |
2887 | Name.starts_with(Prefix: "avx512.mask.psubs." )) { |
2888 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat); |
2889 | } else if (Name.starts_with(Prefix: "sse2.paddus." ) || |
2890 | Name.starts_with(Prefix: "avx2.paddus." ) || |
2891 | Name.starts_with(Prefix: "avx512.mask.paddus." )) { |
2892 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat); |
2893 | } else if (Name.starts_with(Prefix: "sse2.psubus." ) || |
2894 | Name.starts_with(Prefix: "avx2.psubus." ) || |
2895 | Name.starts_with(Prefix: "avx512.mask.psubus." )) { |
2896 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat); |
2897 | } else if (Name.starts_with(Prefix: "avx512.mask.palignr." )) { |
2898 | Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0), |
2899 | Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2), |
2900 | Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), |
2901 | IsVALIGN: false); |
2902 | } else if (Name.starts_with(Prefix: "avx512.mask.valign." )) { |
2903 | Rep = upgradeX86ALIGNIntrinsics( |
2904 | Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1), |
2905 | Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true); |
2906 | } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq" ) { |
2907 | // 128/256-bit shift left specified in bits. |
2908 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
2909 | Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), |
2910 | Shift: Shift / 8); // Shift is in bits. |
2911 | } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq" ) { |
2912 | // 128/256-bit shift right specified in bits. |
2913 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
2914 | Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), |
2915 | Shift: Shift / 8); // Shift is in bits. |
2916 | } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" || |
2917 | Name == "avx512.psll.dq.512" ) { |
2918 | // 128/256/512-bit shift left specified in bytes. |
2919 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
2920 | Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift); |
2921 | } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" || |
2922 | Name == "avx512.psrl.dq.512" ) { |
2923 | // 128/256/512-bit shift right specified in bytes. |
2924 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
2925 | Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift); |
2926 | } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp" ) || |
2927 | Name.starts_with(Prefix: "avx.blend.p" ) || Name == "avx2.pblendw" || |
2928 | Name.starts_with(Prefix: "avx2.pblendd." )) { |
2929 | Value *Op0 = CI->getArgOperand(i: 0); |
2930 | Value *Op1 = CI->getArgOperand(i: 1); |
2931 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2932 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
2933 | unsigned NumElts = VecTy->getNumElements(); |
2934 | |
2935 | SmallVector<int, 16> Idxs(NumElts); |
2936 | for (unsigned i = 0; i != NumElts; ++i) |
2937 | Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i; |
2938 | |
2939 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
2940 | } else if (Name.starts_with(Prefix: "avx.vinsertf128." ) || |
2941 | Name == "avx2.vinserti128" || |
2942 | Name.starts_with(Prefix: "avx512.mask.insert" )) { |
2943 | Value *Op0 = CI->getArgOperand(i: 0); |
2944 | Value *Op1 = CI->getArgOperand(i: 1); |
2945 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2946 | unsigned DstNumElts = |
2947 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
2948 | unsigned SrcNumElts = |
2949 | cast<FixedVectorType>(Val: Op1->getType())->getNumElements(); |
2950 | unsigned Scale = DstNumElts / SrcNumElts; |
2951 | |
2952 | // Mask off the high bits of the immediate value; hardware ignores those. |
2953 | Imm = Imm % Scale; |
2954 | |
2955 | // Extend the second operand into a vector the size of the destination. |
2956 | SmallVector<int, 8> Idxs(DstNumElts); |
2957 | for (unsigned i = 0; i != SrcNumElts; ++i) |
2958 | Idxs[i] = i; |
2959 | for (unsigned i = SrcNumElts; i != DstNumElts; ++i) |
2960 | Idxs[i] = SrcNumElts; |
2961 | Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs); |
2962 | |
2963 | // Insert the second operand into the first operand. |
2964 | |
2965 | // Note that there is no guarantee that instruction lowering will actually |
2966 | // produce a vinsertf128 instruction for the created shuffles. In |
2967 | // particular, the 0 immediate case involves no lane changes, so it can |
2968 | // be handled as a blend. |
2969 | |
2970 | // Example of shuffle mask for 32-bit elements: |
2971 | // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
2972 | // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > |
2973 | |
2974 | // First fill with identify mask. |
2975 | for (unsigned i = 0; i != DstNumElts; ++i) |
2976 | Idxs[i] = i; |
2977 | // Then replace the elements where we need to insert. |
2978 | for (unsigned i = 0; i != SrcNumElts; ++i) |
2979 | Idxs[i + Imm * SrcNumElts] = i + DstNumElts; |
2980 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs); |
2981 | |
2982 | // If the intrinsic has a mask operand, handle that. |
2983 | if (CI->arg_size() == 5) |
2984 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, |
2985 | Op1: CI->getArgOperand(i: 3)); |
2986 | } else if (Name.starts_with(Prefix: "avx.vextractf128." ) || |
2987 | Name == "avx2.vextracti128" || |
2988 | Name.starts_with(Prefix: "avx512.mask.vextract" )) { |
2989 | Value *Op0 = CI->getArgOperand(i: 0); |
2990 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
2991 | unsigned DstNumElts = |
2992 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
2993 | unsigned SrcNumElts = |
2994 | cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
2995 | unsigned Scale = SrcNumElts / DstNumElts; |
2996 | |
2997 | // Mask off the high bits of the immediate value; hardware ignores those. |
2998 | Imm = Imm % Scale; |
2999 | |
3000 | // Get indexes for the subvector of the input vector. |
3001 | SmallVector<int, 8> Idxs(DstNumElts); |
3002 | for (unsigned i = 0; i != DstNumElts; ++i) { |
3003 | Idxs[i] = i + (Imm * DstNumElts); |
3004 | } |
3005 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3006 | |
3007 | // If the intrinsic has a mask operand, handle that. |
3008 | if (CI->arg_size() == 4) |
3009 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3010 | Op1: CI->getArgOperand(i: 2)); |
3011 | } else if (Name.starts_with(Prefix: "avx512.mask.perm.df." ) || |
3012 | Name.starts_with(Prefix: "avx512.mask.perm.di." )) { |
3013 | Value *Op0 = CI->getArgOperand(i: 0); |
3014 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3015 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3016 | unsigned NumElts = VecTy->getNumElements(); |
3017 | |
3018 | SmallVector<int, 8> Idxs(NumElts); |
3019 | for (unsigned i = 0; i != NumElts; ++i) |
3020 | Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); |
3021 | |
3022 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3023 | |
3024 | if (CI->arg_size() == 4) |
3025 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3026 | Op1: CI->getArgOperand(i: 2)); |
3027 | } else if (Name.starts_with(Prefix: "avx.vperm2f128." ) || Name == "avx2.vperm2i128" ) { |
3028 | // The immediate permute control byte looks like this: |
3029 | // [1:0] - select 128 bits from sources for low half of destination |
3030 | // [2] - ignore |
3031 | // [3] - zero low half of destination |
3032 | // [5:4] - select 128 bits from sources for high half of destination |
3033 | // [6] - ignore |
3034 | // [7] - zero high half of destination |
3035 | |
3036 | uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3037 | |
3038 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3039 | unsigned HalfSize = NumElts / 2; |
3040 | SmallVector<int, 8> ShuffleMask(NumElts); |
3041 | |
3042 | // Determine which operand(s) are actually in use for this instruction. |
3043 | Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0); |
3044 | Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0); |
3045 | |
3046 | // If needed, replace operands based on zero mask. |
3047 | V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0; |
3048 | V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1; |
3049 | |
3050 | // Permute low half of result. |
3051 | unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; |
3052 | for (unsigned i = 0; i < HalfSize; ++i) |
3053 | ShuffleMask[i] = StartIndex + i; |
3054 | |
3055 | // Permute high half of result. |
3056 | StartIndex = (Imm & 0x10) ? HalfSize : 0; |
3057 | for (unsigned i = 0; i < HalfSize; ++i) |
3058 | ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; |
3059 | |
3060 | Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask); |
3061 | |
3062 | } else if (Name.starts_with(Prefix: "avx.vpermil." ) || Name == "sse2.pshuf.d" || |
3063 | Name.starts_with(Prefix: "avx512.mask.vpermil.p" ) || |
3064 | Name.starts_with(Prefix: "avx512.mask.pshuf.d." )) { |
3065 | Value *Op0 = CI->getArgOperand(i: 0); |
3066 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3067 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3068 | unsigned NumElts = VecTy->getNumElements(); |
3069 | // Calculate the size of each index in the immediate. |
3070 | unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); |
3071 | unsigned IdxMask = ((1 << IdxSize) - 1); |
3072 | |
3073 | SmallVector<int, 8> Idxs(NumElts); |
3074 | // Lookup the bits for this element, wrapping around the immediate every |
3075 | // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need |
3076 | // to offset by the first index of each group. |
3077 | for (unsigned i = 0; i != NumElts; ++i) |
3078 | Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); |
3079 | |
3080 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3081 | |
3082 | if (CI->arg_size() == 4) |
3083 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3084 | Op1: CI->getArgOperand(i: 2)); |
3085 | } else if (Name == "sse2.pshufl.w" || |
3086 | Name.starts_with(Prefix: "avx512.mask.pshufl.w." )) { |
3087 | Value *Op0 = CI->getArgOperand(i: 0); |
3088 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3089 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3090 | |
3091 | SmallVector<int, 16> Idxs(NumElts); |
3092 | for (unsigned l = 0; l != NumElts; l += 8) { |
3093 | for (unsigned i = 0; i != 4; ++i) |
3094 | Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; |
3095 | for (unsigned i = 4; i != 8; ++i) |
3096 | Idxs[i + l] = i + l; |
3097 | } |
3098 | |
3099 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3100 | |
3101 | if (CI->arg_size() == 4) |
3102 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3103 | Op1: CI->getArgOperand(i: 2)); |
3104 | } else if (Name == "sse2.pshufh.w" || |
3105 | Name.starts_with(Prefix: "avx512.mask.pshufh.w." )) { |
3106 | Value *Op0 = CI->getArgOperand(i: 0); |
3107 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3108 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3109 | |
3110 | SmallVector<int, 16> Idxs(NumElts); |
3111 | for (unsigned l = 0; l != NumElts; l += 8) { |
3112 | for (unsigned i = 0; i != 4; ++i) |
3113 | Idxs[i + l] = i + l; |
3114 | for (unsigned i = 0; i != 4; ++i) |
3115 | Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; |
3116 | } |
3117 | |
3118 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3119 | |
3120 | if (CI->arg_size() == 4) |
3121 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3122 | Op1: CI->getArgOperand(i: 2)); |
3123 | } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p" )) { |
3124 | Value *Op0 = CI->getArgOperand(i: 0); |
3125 | Value *Op1 = CI->getArgOperand(i: 1); |
3126 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3127 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3128 | |
3129 | unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3130 | unsigned HalfLaneElts = NumLaneElts / 2; |
3131 | |
3132 | SmallVector<int, 16> Idxs(NumElts); |
3133 | for (unsigned i = 0; i != NumElts; ++i) { |
3134 | // Base index is the starting element of the lane. |
3135 | Idxs[i] = i - (i % NumLaneElts); |
3136 | // If we are half way through the lane switch to the other source. |
3137 | if ((i % NumLaneElts) >= HalfLaneElts) |
3138 | Idxs[i] += NumElts; |
3139 | // Now select the specific element. By adding HalfLaneElts bits from |
3140 | // the immediate. Wrapping around the immediate every 8-bits. |
3141 | Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); |
3142 | } |
3143 | |
3144 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3145 | |
3146 | Rep = |
3147 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3)); |
3148 | } else if (Name.starts_with(Prefix: "avx512.mask.movddup" ) || |
3149 | Name.starts_with(Prefix: "avx512.mask.movshdup" ) || |
3150 | Name.starts_with(Prefix: "avx512.mask.movsldup" )) { |
3151 | Value *Op0 = CI->getArgOperand(i: 0); |
3152 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3153 | unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3154 | |
3155 | unsigned Offset = 0; |
3156 | if (Name.starts_with(Prefix: "avx512.mask.movshdup." )) |
3157 | Offset = 1; |
3158 | |
3159 | SmallVector<int, 16> Idxs(NumElts); |
3160 | for (unsigned l = 0; l != NumElts; l += NumLaneElts) |
3161 | for (unsigned i = 0; i != NumLaneElts; i += 2) { |
3162 | Idxs[i + l + 0] = i + l + Offset; |
3163 | Idxs[i + l + 1] = i + l + Offset; |
3164 | } |
3165 | |
3166 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3167 | |
3168 | Rep = |
3169 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
3170 | } else if (Name.starts_with(Prefix: "avx512.mask.punpckl" ) || |
3171 | Name.starts_with(Prefix: "avx512.mask.unpckl." )) { |
3172 | Value *Op0 = CI->getArgOperand(i: 0); |
3173 | Value *Op1 = CI->getArgOperand(i: 1); |
3174 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3175 | int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3176 | |
3177 | SmallVector<int, 64> Idxs(NumElts); |
3178 | for (int l = 0; l != NumElts; l += NumLaneElts) |
3179 | for (int i = 0; i != NumLaneElts; ++i) |
3180 | Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); |
3181 | |
3182 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3183 | |
3184 | Rep = |
3185 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3186 | } else if (Name.starts_with(Prefix: "avx512.mask.punpckh" ) || |
3187 | Name.starts_with(Prefix: "avx512.mask.unpckh." )) { |
3188 | Value *Op0 = CI->getArgOperand(i: 0); |
3189 | Value *Op1 = CI->getArgOperand(i: 1); |
3190 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3191 | int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3192 | |
3193 | SmallVector<int, 64> Idxs(NumElts); |
3194 | for (int l = 0; l != NumElts; l += NumLaneElts) |
3195 | for (int i = 0; i != NumLaneElts; ++i) |
3196 | Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); |
3197 | |
3198 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3199 | |
3200 | Rep = |
3201 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3202 | } else if (Name.starts_with(Prefix: "avx512.mask.and." ) || |
3203 | Name.starts_with(Prefix: "avx512.mask.pand." )) { |
3204 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3205 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3206 | Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3207 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3208 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3209 | Rep = |
3210 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3211 | } else if (Name.starts_with(Prefix: "avx512.mask.andn." ) || |
3212 | Name.starts_with(Prefix: "avx512.mask.pandn." )) { |
3213 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3214 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3215 | Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy)); |
3216 | Rep = Builder.CreateAnd(LHS: Rep, |
3217 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3218 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3219 | Rep = |
3220 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3221 | } else if (Name.starts_with(Prefix: "avx512.mask.or." ) || |
3222 | Name.starts_with(Prefix: "avx512.mask.por." )) { |
3223 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3224 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3225 | Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3226 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3227 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3228 | Rep = |
3229 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3230 | } else if (Name.starts_with(Prefix: "avx512.mask.xor." ) || |
3231 | Name.starts_with(Prefix: "avx512.mask.pxor." )) { |
3232 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3233 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3234 | Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3235 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3236 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3237 | Rep = |
3238 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3239 | } else if (Name.starts_with(Prefix: "avx512.mask.padd." )) { |
3240 | Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3241 | Rep = |
3242 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3243 | } else if (Name.starts_with(Prefix: "avx512.mask.psub." )) { |
3244 | Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3245 | Rep = |
3246 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3247 | } else if (Name.starts_with(Prefix: "avx512.mask.pmull." )) { |
3248 | Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3249 | Rep = |
3250 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3251 | } else if (Name.starts_with(Prefix: "avx512.mask.add.p" )) { |
3252 | if (Name.ends_with(Suffix: ".512" )) { |
3253 | Intrinsic::ID IID; |
3254 | if (Name[17] == 's') |
3255 | IID = Intrinsic::x86_avx512_add_ps_512; |
3256 | else |
3257 | IID = Intrinsic::x86_avx512_add_pd_512; |
3258 | |
3259 | Rep = Builder.CreateCall( |
3260 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3261 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3262 | } else { |
3263 | Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3264 | } |
3265 | Rep = |
3266 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3267 | } else if (Name.starts_with(Prefix: "avx512.mask.div.p" )) { |
3268 | if (Name.ends_with(Suffix: ".512" )) { |
3269 | Intrinsic::ID IID; |
3270 | if (Name[17] == 's') |
3271 | IID = Intrinsic::x86_avx512_div_ps_512; |
3272 | else |
3273 | IID = Intrinsic::x86_avx512_div_pd_512; |
3274 | |
3275 | Rep = Builder.CreateCall( |
3276 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3277 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3278 | } else { |
3279 | Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3280 | } |
3281 | Rep = |
3282 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3283 | } else if (Name.starts_with(Prefix: "avx512.mask.mul.p" )) { |
3284 | if (Name.ends_with(Suffix: ".512" )) { |
3285 | Intrinsic::ID IID; |
3286 | if (Name[17] == 's') |
3287 | IID = Intrinsic::x86_avx512_mul_ps_512; |
3288 | else |
3289 | IID = Intrinsic::x86_avx512_mul_pd_512; |
3290 | |
3291 | Rep = Builder.CreateCall( |
3292 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3293 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3294 | } else { |
3295 | Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3296 | } |
3297 | Rep = |
3298 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3299 | } else if (Name.starts_with(Prefix: "avx512.mask.sub.p" )) { |
3300 | if (Name.ends_with(Suffix: ".512" )) { |
3301 | Intrinsic::ID IID; |
3302 | if (Name[17] == 's') |
3303 | IID = Intrinsic::x86_avx512_sub_ps_512; |
3304 | else |
3305 | IID = Intrinsic::x86_avx512_sub_pd_512; |
3306 | |
3307 | Rep = Builder.CreateCall( |
3308 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3309 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3310 | } else { |
3311 | Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3312 | } |
3313 | Rep = |
3314 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3315 | } else if ((Name.starts_with(Prefix: "avx512.mask.max.p" ) || |
3316 | Name.starts_with(Prefix: "avx512.mask.min.p" )) && |
3317 | Name.drop_front(N: 18) == ".512" ) { |
3318 | bool IsDouble = Name[17] == 'd'; |
3319 | bool IsMin = Name[13] == 'i'; |
3320 | static const Intrinsic::ID MinMaxTbl[2][2] = { |
3321 | {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512}, |
3322 | {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}}; |
3323 | Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; |
3324 | |
3325 | Rep = Builder.CreateCall( |
3326 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3327 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3328 | Rep = |
3329 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3330 | } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt." )) { |
3331 | Rep = |
3332 | Builder.CreateCall(Callee: Intrinsic::getDeclaration( |
3333 | M: F->getParent(), id: Intrinsic::ctlz, Tys: CI->getType()), |
3334 | Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)}); |
3335 | Rep = |
3336 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
3337 | } else if (Name.starts_with(Prefix: "avx512.mask.psll" )) { |
3338 | bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); |
3339 | bool IsVariable = Name[16] == 'v'; |
3340 | char Size = Name[16] == '.' ? Name[17] |
3341 | : Name[17] == '.' ? Name[18] |
3342 | : Name[18] == '.' ? Name[19] |
3343 | : Name[20]; |
3344 | |
3345 | Intrinsic::ID IID; |
3346 | if (IsVariable && Name[17] != '.') { |
3347 | if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di |
3348 | IID = Intrinsic::x86_avx2_psllv_q; |
3349 | else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di |
3350 | IID = Intrinsic::x86_avx2_psllv_q_256; |
3351 | else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si |
3352 | IID = Intrinsic::x86_avx2_psllv_d; |
3353 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si |
3354 | IID = Intrinsic::x86_avx2_psllv_d_256; |
3355 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi |
3356 | IID = Intrinsic::x86_avx512_psllv_w_128; |
3357 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi |
3358 | IID = Intrinsic::x86_avx512_psllv_w_256; |
3359 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi |
3360 | IID = Intrinsic::x86_avx512_psllv_w_512; |
3361 | else |
3362 | llvm_unreachable("Unexpected size" ); |
3363 | } else if (Name.ends_with(Suffix: ".128" )) { |
3364 | if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 |
3365 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d |
3366 | : Intrinsic::x86_sse2_psll_d; |
3367 | else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 |
3368 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q |
3369 | : Intrinsic::x86_sse2_psll_q; |
3370 | else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 |
3371 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w |
3372 | : Intrinsic::x86_sse2_psll_w; |
3373 | else |
3374 | llvm_unreachable("Unexpected size" ); |
3375 | } else if (Name.ends_with(Suffix: ".256" )) { |
3376 | if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 |
3377 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d |
3378 | : Intrinsic::x86_avx2_psll_d; |
3379 | else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 |
3380 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q |
3381 | : Intrinsic::x86_avx2_psll_q; |
3382 | else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 |
3383 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w |
3384 | : Intrinsic::x86_avx2_psll_w; |
3385 | else |
3386 | llvm_unreachable("Unexpected size" ); |
3387 | } else { |
3388 | if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 |
3389 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 |
3390 | : IsVariable ? Intrinsic::x86_avx512_psllv_d_512 |
3391 | : Intrinsic::x86_avx512_psll_d_512; |
3392 | else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 |
3393 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 |
3394 | : IsVariable ? Intrinsic::x86_avx512_psllv_q_512 |
3395 | : Intrinsic::x86_avx512_psll_q_512; |
3396 | else if (Size == 'w') // psll.wi.512, pslli.w, psll.w |
3397 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 |
3398 | : Intrinsic::x86_avx512_psll_w_512; |
3399 | else |
3400 | llvm_unreachable("Unexpected size" ); |
3401 | } |
3402 | |
3403 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3404 | } else if (Name.starts_with(Prefix: "avx512.mask.psrl" )) { |
3405 | bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); |
3406 | bool IsVariable = Name[16] == 'v'; |
3407 | char Size = Name[16] == '.' ? Name[17] |
3408 | : Name[17] == '.' ? Name[18] |
3409 | : Name[18] == '.' ? Name[19] |
3410 | : Name[20]; |
3411 | |
3412 | Intrinsic::ID IID; |
3413 | if (IsVariable && Name[17] != '.') { |
3414 | if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di |
3415 | IID = Intrinsic::x86_avx2_psrlv_q; |
3416 | else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di |
3417 | IID = Intrinsic::x86_avx2_psrlv_q_256; |
3418 | else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si |
3419 | IID = Intrinsic::x86_avx2_psrlv_d; |
3420 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si |
3421 | IID = Intrinsic::x86_avx2_psrlv_d_256; |
3422 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi |
3423 | IID = Intrinsic::x86_avx512_psrlv_w_128; |
3424 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi |
3425 | IID = Intrinsic::x86_avx512_psrlv_w_256; |
3426 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi |
3427 | IID = Intrinsic::x86_avx512_psrlv_w_512; |
3428 | else |
3429 | llvm_unreachable("Unexpected size" ); |
3430 | } else if (Name.ends_with(Suffix: ".128" )) { |
3431 | if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 |
3432 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d |
3433 | : Intrinsic::x86_sse2_psrl_d; |
3434 | else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 |
3435 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q |
3436 | : Intrinsic::x86_sse2_psrl_q; |
3437 | else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 |
3438 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w |
3439 | : Intrinsic::x86_sse2_psrl_w; |
3440 | else |
3441 | llvm_unreachable("Unexpected size" ); |
3442 | } else if (Name.ends_with(Suffix: ".256" )) { |
3443 | if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 |
3444 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d |
3445 | : Intrinsic::x86_avx2_psrl_d; |
3446 | else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 |
3447 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q |
3448 | : Intrinsic::x86_avx2_psrl_q; |
3449 | else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 |
3450 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w |
3451 | : Intrinsic::x86_avx2_psrl_w; |
3452 | else |
3453 | llvm_unreachable("Unexpected size" ); |
3454 | } else { |
3455 | if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 |
3456 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 |
3457 | : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 |
3458 | : Intrinsic::x86_avx512_psrl_d_512; |
3459 | else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 |
3460 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 |
3461 | : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 |
3462 | : Intrinsic::x86_avx512_psrl_q_512; |
3463 | else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) |
3464 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 |
3465 | : Intrinsic::x86_avx512_psrl_w_512; |
3466 | else |
3467 | llvm_unreachable("Unexpected size" ); |
3468 | } |
3469 | |
3470 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3471 | } else if (Name.starts_with(Prefix: "avx512.mask.psra" )) { |
3472 | bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); |
3473 | bool IsVariable = Name[16] == 'v'; |
3474 | char Size = Name[16] == '.' ? Name[17] |
3475 | : Name[17] == '.' ? Name[18] |
3476 | : Name[18] == '.' ? Name[19] |
3477 | : Name[20]; |
3478 | |
3479 | Intrinsic::ID IID; |
3480 | if (IsVariable && Name[17] != '.') { |
3481 | if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si |
3482 | IID = Intrinsic::x86_avx2_psrav_d; |
3483 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si |
3484 | IID = Intrinsic::x86_avx2_psrav_d_256; |
3485 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi |
3486 | IID = Intrinsic::x86_avx512_psrav_w_128; |
3487 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi |
3488 | IID = Intrinsic::x86_avx512_psrav_w_256; |
3489 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi |
3490 | IID = Intrinsic::x86_avx512_psrav_w_512; |
3491 | else |
3492 | llvm_unreachable("Unexpected size" ); |
3493 | } else if (Name.ends_with(Suffix: ".128" )) { |
3494 | if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 |
3495 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d |
3496 | : Intrinsic::x86_sse2_psra_d; |
3497 | else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 |
3498 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 |
3499 | : IsVariable ? Intrinsic::x86_avx512_psrav_q_128 |
3500 | : Intrinsic::x86_avx512_psra_q_128; |
3501 | else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 |
3502 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w |
3503 | : Intrinsic::x86_sse2_psra_w; |
3504 | else |
3505 | llvm_unreachable("Unexpected size" ); |
3506 | } else if (Name.ends_with(Suffix: ".256" )) { |
3507 | if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 |
3508 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d |
3509 | : Intrinsic::x86_avx2_psra_d; |
3510 | else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 |
3511 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 |
3512 | : IsVariable ? Intrinsic::x86_avx512_psrav_q_256 |
3513 | : Intrinsic::x86_avx512_psra_q_256; |
3514 | else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 |
3515 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w |
3516 | : Intrinsic::x86_avx2_psra_w; |
3517 | else |
3518 | llvm_unreachable("Unexpected size" ); |
3519 | } else { |
3520 | if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 |
3521 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 |
3522 | : IsVariable ? Intrinsic::x86_avx512_psrav_d_512 |
3523 | : Intrinsic::x86_avx512_psra_d_512; |
3524 | else if (Size == 'q') // psra.qi.512, psrai.q, psra.q |
3525 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 |
3526 | : IsVariable ? Intrinsic::x86_avx512_psrav_q_512 |
3527 | : Intrinsic::x86_avx512_psra_q_512; |
3528 | else if (Size == 'w') // psra.wi.512, psrai.w, psra.w |
3529 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 |
3530 | : Intrinsic::x86_avx512_psra_w_512; |
3531 | else |
3532 | llvm_unreachable("Unexpected size" ); |
3533 | } |
3534 | |
3535 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3536 | } else if (Name.starts_with(Prefix: "avx512.mask.move.s" )) { |
3537 | Rep = upgradeMaskedMove(Builder, CI&: *CI); |
3538 | } else if (Name.starts_with(Prefix: "avx512.cvtmask2" )) { |
3539 | Rep = upgradeMaskToInt(Builder, CI&: *CI); |
3540 | } else if (Name.ends_with(Suffix: ".movntdqa" )) { |
3541 | MDNode *Node = MDNode::get( |
3542 | Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
3543 | |
3544 | Value *Ptr = CI->getArgOperand(i: 0); |
3545 | |
3546 | // Convert the type of the pointer to a pointer to the stored type. |
3547 | Value *BC = Builder.CreateBitCast( |
3548 | V: Ptr, DestTy: PointerType::getUnqual(ElementType: CI->getType()), Name: "cast" ); |
3549 | LoadInst *LI = Builder.CreateAlignedLoad( |
3550 | Ty: CI->getType(), Ptr: BC, |
3551 | Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); |
3552 | LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
3553 | Rep = LI; |
3554 | } else if (Name.starts_with(Prefix: "fma.vfmadd." ) || |
3555 | Name.starts_with(Prefix: "fma.vfmsub." ) || |
3556 | Name.starts_with(Prefix: "fma.vfnmadd." ) || |
3557 | Name.starts_with(Prefix: "fma.vfnmsub." )) { |
3558 | bool NegMul = Name[6] == 'n'; |
3559 | bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; |
3560 | bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; |
3561 | |
3562 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3563 | CI->getArgOperand(i: 2)}; |
3564 | |
3565 | if (IsScalar) { |
3566 | Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0); |
3567 | Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0); |
3568 | Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0); |
3569 | } |
3570 | |
3571 | if (NegMul && !IsScalar) |
3572 | Ops[0] = Builder.CreateFNeg(V: Ops[0]); |
3573 | if (NegMul && IsScalar) |
3574 | Ops[1] = Builder.CreateFNeg(V: Ops[1]); |
3575 | if (NegAcc) |
3576 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3577 | |
3578 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), |
3579 | id: Intrinsic::fma, |
3580 | Tys: Ops[0]->getType()), |
3581 | Args: Ops); |
3582 | |
3583 | if (IsScalar) |
3584 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
3585 | } else if (Name.starts_with(Prefix: "fma4.vfmadd.s" )) { |
3586 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3587 | CI->getArgOperand(i: 2)}; |
3588 | |
3589 | Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0); |
3590 | Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0); |
3591 | Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0); |
3592 | |
3593 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), |
3594 | id: Intrinsic::fma, |
3595 | Tys: Ops[0]->getType()), |
3596 | Args: Ops); |
3597 | |
3598 | Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()), |
3599 | NewElt: Rep, Idx: (uint64_t)0); |
3600 | } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s" ) || |
3601 | Name.starts_with(Prefix: "avx512.maskz.vfmadd.s" ) || |
3602 | Name.starts_with(Prefix: "avx512.mask3.vfmadd.s" ) || |
3603 | Name.starts_with(Prefix: "avx512.mask3.vfmsub.s" ) || |
3604 | Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s" )) { |
3605 | bool IsMask3 = Name[11] == '3'; |
3606 | bool IsMaskZ = Name[11] == 'z'; |
3607 | // Drop the "avx512.mask." to make it easier. |
3608 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3609 | bool NegMul = Name[2] == 'n'; |
3610 | bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; |
3611 | |
3612 | Value *A = CI->getArgOperand(i: 0); |
3613 | Value *B = CI->getArgOperand(i: 1); |
3614 | Value *C = CI->getArgOperand(i: 2); |
3615 | |
3616 | if (NegMul && (IsMask3 || IsMaskZ)) |
3617 | A = Builder.CreateFNeg(V: A); |
3618 | if (NegMul && !(IsMask3 || IsMaskZ)) |
3619 | B = Builder.CreateFNeg(V: B); |
3620 | if (NegAcc) |
3621 | C = Builder.CreateFNeg(V: C); |
3622 | |
3623 | A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0); |
3624 | B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0); |
3625 | C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0); |
3626 | |
3627 | if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) || |
3628 | cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) { |
3629 | Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)}; |
3630 | |
3631 | Intrinsic::ID IID; |
3632 | if (Name.back() == 'd') |
3633 | IID = Intrinsic::x86_avx512_vfmadd_f64; |
3634 | else |
3635 | IID = Intrinsic::x86_avx512_vfmadd_f32; |
3636 | Function *FMA = Intrinsic::getDeclaration(M: CI->getModule(), id: IID); |
3637 | Rep = Builder.CreateCall(Callee: FMA, Args: Ops); |
3638 | } else { |
3639 | Function *FMA = Intrinsic::getDeclaration(M: CI->getModule(), id: Intrinsic::fma, |
3640 | Tys: A->getType()); |
3641 | Rep = Builder.CreateCall(Callee: FMA, Args: {A, B, C}); |
3642 | } |
3643 | |
3644 | Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType()) |
3645 | : IsMask3 ? C |
3646 | : A; |
3647 | |
3648 | // For Mask3 with NegAcc, we need to create a new extractelement that |
3649 | // avoids the negation above. |
3650 | if (NegAcc && IsMask3) |
3651 | PassThru = |
3652 | Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0); |
3653 | |
3654 | Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3655 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep, |
3656 | Idx: (uint64_t)0); |
3657 | } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p" ) || |
3658 | Name.starts_with(Prefix: "avx512.mask.vfnmadd.p" ) || |
3659 | Name.starts_with(Prefix: "avx512.mask.vfnmsub.p" ) || |
3660 | Name.starts_with(Prefix: "avx512.mask3.vfmadd.p" ) || |
3661 | Name.starts_with(Prefix: "avx512.mask3.vfmsub.p" ) || |
3662 | Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p" ) || |
3663 | Name.starts_with(Prefix: "avx512.maskz.vfmadd.p" )) { |
3664 | bool IsMask3 = Name[11] == '3'; |
3665 | bool IsMaskZ = Name[11] == 'z'; |
3666 | // Drop the "avx512.mask." to make it easier. |
3667 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3668 | bool NegMul = Name[2] == 'n'; |
3669 | bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; |
3670 | |
3671 | Value *A = CI->getArgOperand(i: 0); |
3672 | Value *B = CI->getArgOperand(i: 1); |
3673 | Value *C = CI->getArgOperand(i: 2); |
3674 | |
3675 | if (NegMul && (IsMask3 || IsMaskZ)) |
3676 | A = Builder.CreateFNeg(V: A); |
3677 | if (NegMul && !(IsMask3 || IsMaskZ)) |
3678 | B = Builder.CreateFNeg(V: B); |
3679 | if (NegAcc) |
3680 | C = Builder.CreateFNeg(V: C); |
3681 | |
3682 | if (CI->arg_size() == 5 && |
3683 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) || |
3684 | cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) { |
3685 | Intrinsic::ID IID; |
3686 | // Check the character before ".512" in string. |
3687 | if (Name[Name.size() - 5] == 's') |
3688 | IID = Intrinsic::x86_avx512_vfmadd_ps_512; |
3689 | else |
3690 | IID = Intrinsic::x86_avx512_vfmadd_pd_512; |
3691 | |
3692 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3693 | Args: {A, B, C, CI->getArgOperand(i: 4)}); |
3694 | } else { |
3695 | Function *FMA = Intrinsic::getDeclaration(M: CI->getModule(), id: Intrinsic::fma, |
3696 | Tys: A->getType()); |
3697 | Rep = Builder.CreateCall(Callee: FMA, Args: {A, B, C}); |
3698 | } |
3699 | |
3700 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) |
3701 | : IsMask3 ? CI->getArgOperand(i: 2) |
3702 | : CI->getArgOperand(i: 0); |
3703 | |
3704 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3705 | } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p" )) { |
3706 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3707 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
3708 | Intrinsic::ID IID; |
3709 | if (VecWidth == 128 && EltWidth == 32) |
3710 | IID = Intrinsic::x86_fma_vfmaddsub_ps; |
3711 | else if (VecWidth == 256 && EltWidth == 32) |
3712 | IID = Intrinsic::x86_fma_vfmaddsub_ps_256; |
3713 | else if (VecWidth == 128 && EltWidth == 64) |
3714 | IID = Intrinsic::x86_fma_vfmaddsub_pd; |
3715 | else if (VecWidth == 256 && EltWidth == 64) |
3716 | IID = Intrinsic::x86_fma_vfmaddsub_pd_256; |
3717 | else |
3718 | llvm_unreachable("Unexpected intrinsic" ); |
3719 | |
3720 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3721 | CI->getArgOperand(i: 2)}; |
3722 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3723 | Rep = |
3724 | Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), Args: Ops); |
3725 | } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p" ) || |
3726 | Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p" ) || |
3727 | Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p" ) || |
3728 | Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p" )) { |
3729 | bool IsMask3 = Name[11] == '3'; |
3730 | bool IsMaskZ = Name[11] == 'z'; |
3731 | // Drop the "avx512.mask." to make it easier. |
3732 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3733 | bool IsSubAdd = Name[3] == 's'; |
3734 | if (CI->arg_size() == 5) { |
3735 | Intrinsic::ID IID; |
3736 | // Check the character before ".512" in string. |
3737 | if (Name[Name.size() - 5] == 's') |
3738 | IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; |
3739 | else |
3740 | IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; |
3741 | |
3742 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3743 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)}; |
3744 | if (IsSubAdd) |
3745 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3746 | |
3747 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3748 | Args: Ops); |
3749 | } else { |
3750 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3751 | |
3752 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3753 | CI->getArgOperand(i: 2)}; |
3754 | |
3755 | Function *FMA = Intrinsic::getDeclaration(M: CI->getModule(), id: Intrinsic::fma, |
3756 | Tys: Ops[0]->getType()); |
3757 | Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops); |
3758 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3759 | Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops); |
3760 | |
3761 | if (IsSubAdd) |
3762 | std::swap(a&: Even, b&: Odd); |
3763 | |
3764 | SmallVector<int, 32> Idxs(NumElts); |
3765 | for (int i = 0; i != NumElts; ++i) |
3766 | Idxs[i] = i + (i % 2) * NumElts; |
3767 | |
3768 | Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs); |
3769 | } |
3770 | |
3771 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) |
3772 | : IsMask3 ? CI->getArgOperand(i: 2) |
3773 | : CI->getArgOperand(i: 0); |
3774 | |
3775 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3776 | } else if (Name.starts_with(Prefix: "avx512.mask.pternlog." ) || |
3777 | Name.starts_with(Prefix: "avx512.maskz.pternlog." )) { |
3778 | bool ZeroMask = Name[11] == 'z'; |
3779 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3780 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
3781 | Intrinsic::ID IID; |
3782 | if (VecWidth == 128 && EltWidth == 32) |
3783 | IID = Intrinsic::x86_avx512_pternlog_d_128; |
3784 | else if (VecWidth == 256 && EltWidth == 32) |
3785 | IID = Intrinsic::x86_avx512_pternlog_d_256; |
3786 | else if (VecWidth == 512 && EltWidth == 32) |
3787 | IID = Intrinsic::x86_avx512_pternlog_d_512; |
3788 | else if (VecWidth == 128 && EltWidth == 64) |
3789 | IID = Intrinsic::x86_avx512_pternlog_q_128; |
3790 | else if (VecWidth == 256 && EltWidth == 64) |
3791 | IID = Intrinsic::x86_avx512_pternlog_q_256; |
3792 | else if (VecWidth == 512 && EltWidth == 64) |
3793 | IID = Intrinsic::x86_avx512_pternlog_q_512; |
3794 | else |
3795 | llvm_unreachable("Unexpected intrinsic" ); |
3796 | |
3797 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3798 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)}; |
3799 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
3800 | Args); |
3801 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
3802 | : CI->getArgOperand(i: 0); |
3803 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru); |
3804 | } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52" ) || |
3805 | Name.starts_with(Prefix: "avx512.maskz.vpmadd52" )) { |
3806 | bool ZeroMask = Name[11] == 'z'; |
3807 | bool High = Name[20] == 'h' || Name[21] == 'h'; |
3808 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3809 | Intrinsic::ID IID; |
3810 | if (VecWidth == 128 && !High) |
3811 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; |
3812 | else if (VecWidth == 256 && !High) |
3813 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; |
3814 | else if (VecWidth == 512 && !High) |
3815 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; |
3816 | else if (VecWidth == 128 && High) |
3817 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; |
3818 | else if (VecWidth == 256 && High) |
3819 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; |
3820 | else if (VecWidth == 512 && High) |
3821 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; |
3822 | else |
3823 | llvm_unreachable("Unexpected intrinsic" ); |
3824 | |
3825 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3826 | CI->getArgOperand(i: 2)}; |
3827 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
3828 | Args); |
3829 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
3830 | : CI->getArgOperand(i: 0); |
3831 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3832 | } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var." ) || |
3833 | Name.starts_with(Prefix: "avx512.mask.vpermt2var." ) || |
3834 | Name.starts_with(Prefix: "avx512.maskz.vpermt2var." )) { |
3835 | bool ZeroMask = Name[11] == 'z'; |
3836 | bool IndexForm = Name[17] == 'i'; |
3837 | Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm); |
3838 | } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd." ) || |
3839 | Name.starts_with(Prefix: "avx512.maskz.vpdpbusd." ) || |
3840 | Name.starts_with(Prefix: "avx512.mask.vpdpbusds." ) || |
3841 | Name.starts_with(Prefix: "avx512.maskz.vpdpbusds." )) { |
3842 | bool ZeroMask = Name[11] == 'z'; |
3843 | bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; |
3844 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3845 | Intrinsic::ID IID; |
3846 | if (VecWidth == 128 && !IsSaturating) |
3847 | IID = Intrinsic::x86_avx512_vpdpbusd_128; |
3848 | else if (VecWidth == 256 && !IsSaturating) |
3849 | IID = Intrinsic::x86_avx512_vpdpbusd_256; |
3850 | else if (VecWidth == 512 && !IsSaturating) |
3851 | IID = Intrinsic::x86_avx512_vpdpbusd_512; |
3852 | else if (VecWidth == 128 && IsSaturating) |
3853 | IID = Intrinsic::x86_avx512_vpdpbusds_128; |
3854 | else if (VecWidth == 256 && IsSaturating) |
3855 | IID = Intrinsic::x86_avx512_vpdpbusds_256; |
3856 | else if (VecWidth == 512 && IsSaturating) |
3857 | IID = Intrinsic::x86_avx512_vpdpbusds_512; |
3858 | else |
3859 | llvm_unreachable("Unexpected intrinsic" ); |
3860 | |
3861 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3862 | CI->getArgOperand(i: 2)}; |
3863 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
3864 | Args); |
3865 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
3866 | : CI->getArgOperand(i: 0); |
3867 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3868 | } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd." ) || |
3869 | Name.starts_with(Prefix: "avx512.maskz.vpdpwssd." ) || |
3870 | Name.starts_with(Prefix: "avx512.mask.vpdpwssds." ) || |
3871 | Name.starts_with(Prefix: "avx512.maskz.vpdpwssds." )) { |
3872 | bool ZeroMask = Name[11] == 'z'; |
3873 | bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; |
3874 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3875 | Intrinsic::ID IID; |
3876 | if (VecWidth == 128 && !IsSaturating) |
3877 | IID = Intrinsic::x86_avx512_vpdpwssd_128; |
3878 | else if (VecWidth == 256 && !IsSaturating) |
3879 | IID = Intrinsic::x86_avx512_vpdpwssd_256; |
3880 | else if (VecWidth == 512 && !IsSaturating) |
3881 | IID = Intrinsic::x86_avx512_vpdpwssd_512; |
3882 | else if (VecWidth == 128 && IsSaturating) |
3883 | IID = Intrinsic::x86_avx512_vpdpwssds_128; |
3884 | else if (VecWidth == 256 && IsSaturating) |
3885 | IID = Intrinsic::x86_avx512_vpdpwssds_256; |
3886 | else if (VecWidth == 512 && IsSaturating) |
3887 | IID = Intrinsic::x86_avx512_vpdpwssds_512; |
3888 | else |
3889 | llvm_unreachable("Unexpected intrinsic" ); |
3890 | |
3891 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3892 | CI->getArgOperand(i: 2)}; |
3893 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
3894 | Args); |
3895 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
3896 | : CI->getArgOperand(i: 0); |
3897 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3898 | } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" || |
3899 | Name == "addcarry.u32" || Name == "addcarry.u64" || |
3900 | Name == "subborrow.u32" || Name == "subborrow.u64" ) { |
3901 | Intrinsic::ID IID; |
3902 | if (Name[0] == 'a' && Name.back() == '2') |
3903 | IID = Intrinsic::x86_addcarry_32; |
3904 | else if (Name[0] == 'a' && Name.back() == '4') |
3905 | IID = Intrinsic::x86_addcarry_64; |
3906 | else if (Name[0] == 's' && Name.back() == '2') |
3907 | IID = Intrinsic::x86_subborrow_32; |
3908 | else if (Name[0] == 's' && Name.back() == '4') |
3909 | IID = Intrinsic::x86_subborrow_64; |
3910 | else |
3911 | llvm_unreachable("Unexpected intrinsic" ); |
3912 | |
3913 | // Make a call with 3 operands. |
3914 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3915 | CI->getArgOperand(i: 2)}; |
3916 | Value *NewCall = Builder.CreateCall( |
3917 | Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), Args); |
3918 | |
3919 | // Extract the second result and store it. |
3920 | Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1); |
3921 | // Cast the pointer to the right type. |
3922 | Value *Ptr = Builder.CreateBitCast( |
3923 | V: CI->getArgOperand(i: 3), DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType())); |
3924 | Builder.CreateAlignedStore(Val: Data, Ptr, Align: Align(1)); |
3925 | // Replace the original call result with the first result of the new call. |
3926 | Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0); |
3927 | |
3928 | CI->replaceAllUsesWith(V: CF); |
3929 | Rep = nullptr; |
3930 | } else if (Name.starts_with(Prefix: "avx512.mask." ) && |
3931 | upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) { |
3932 | // Rep will be updated by the call in the condition. |
3933 | } |
3934 | |
3935 | return Rep; |
3936 | } |
3937 | |
3938 | static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, |
3939 | IRBuilder<> &Builder) { |
3940 | if (Name == "mve.vctp64.old" ) { |
3941 | // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the |
3942 | // correct type. |
3943 | Value *VCTP = Builder.CreateCall( |
3944 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::arm_mve_vctp64), |
3945 | Args: CI->getArgOperand(i: 0), Name: CI->getName()); |
3946 | Value *C1 = Builder.CreateCall( |
3947 | Callee: Intrinsic::getDeclaration( |
3948 | M: F->getParent(), id: Intrinsic::arm_mve_pred_v2i, |
3949 | Tys: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}), |
3950 | Args: VCTP); |
3951 | return Builder.CreateCall( |
3952 | Callee: Intrinsic::getDeclaration( |
3953 | M: F->getParent(), id: Intrinsic::arm_mve_pred_i2v, |
3954 | Tys: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}), |
3955 | Args: C1); |
3956 | } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" || |
3957 | Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" || |
3958 | Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" || |
3959 | Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" || |
3960 | Name == |
3961 | "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" || |
3962 | Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" || |
3963 | Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" || |
3964 | Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" || |
3965 | Name == |
3966 | "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" || |
3967 | Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" || |
3968 | Name == "cde.vcx1q.predicated.v2i64.v4i1" || |
3969 | Name == "cde.vcx1qa.predicated.v2i64.v4i1" || |
3970 | Name == "cde.vcx2q.predicated.v2i64.v4i1" || |
3971 | Name == "cde.vcx2qa.predicated.v2i64.v4i1" || |
3972 | Name == "cde.vcx3q.predicated.v2i64.v4i1" || |
3973 | Name == "cde.vcx3qa.predicated.v2i64.v4i1" ) { |
3974 | std::vector<Type *> Tys; |
3975 | unsigned ID = CI->getIntrinsicID(); |
3976 | Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2); |
3977 | switch (ID) { |
3978 | case Intrinsic::arm_mve_mull_int_predicated: |
3979 | case Intrinsic::arm_mve_vqdmull_predicated: |
3980 | case Intrinsic::arm_mve_vldr_gather_base_predicated: |
3981 | Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty}; |
3982 | break; |
3983 | case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: |
3984 | case Intrinsic::arm_mve_vstr_scatter_base_predicated: |
3985 | case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: |
3986 | Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(), |
3987 | V2I1Ty}; |
3988 | break; |
3989 | case Intrinsic::arm_mve_vldr_gather_offset_predicated: |
3990 | Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), |
3991 | CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty}; |
3992 | break; |
3993 | case Intrinsic::arm_mve_vstr_scatter_offset_predicated: |
3994 | Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(), |
3995 | CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty}; |
3996 | break; |
3997 | case Intrinsic::arm_cde_vcx1q_predicated: |
3998 | case Intrinsic::arm_cde_vcx1qa_predicated: |
3999 | case Intrinsic::arm_cde_vcx2q_predicated: |
4000 | case Intrinsic::arm_cde_vcx2qa_predicated: |
4001 | case Intrinsic::arm_cde_vcx3q_predicated: |
4002 | case Intrinsic::arm_cde_vcx3qa_predicated: |
4003 | Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty}; |
4004 | break; |
4005 | default: |
4006 | llvm_unreachable("Unhandled Intrinsic!" ); |
4007 | } |
4008 | |
4009 | std::vector<Value *> Ops; |
4010 | for (Value *Op : CI->args()) { |
4011 | Type *Ty = Op->getType(); |
4012 | if (Ty->getScalarSizeInBits() == 1) { |
4013 | Value *C1 = Builder.CreateCall( |
4014 | Callee: Intrinsic::getDeclaration( |
4015 | M: F->getParent(), id: Intrinsic::arm_mve_pred_v2i, |
4016 | Tys: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}), |
4017 | Args: Op); |
4018 | Op = Builder.CreateCall( |
4019 | Callee: Intrinsic::getDeclaration(M: F->getParent(), |
4020 | id: Intrinsic::arm_mve_pred_i2v, Tys: {V2I1Ty}), |
4021 | Args: C1); |
4022 | } |
4023 | Ops.push_back(x: Op); |
4024 | } |
4025 | |
4026 | Function *Fn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
4027 | return Builder.CreateCall(Callee: Fn, Args: Ops, Name: CI->getName()); |
4028 | } |
4029 | llvm_unreachable("Unknown function for ARM CallBase upgrade." ); |
4030 | } |
4031 | |
4032 | // These are expected to have the arguments: |
4033 | // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile) |
4034 | // |
4035 | // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value). |
4036 | // |
4037 | static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, |
4038 | Function *F, IRBuilder<> &Builder) { |
4039 | AtomicRMWInst::BinOp RMWOp = |
4040 | StringSwitch<AtomicRMWInst::BinOp>(Name) |
4041 | .StartsWith(S: "ds.fadd" , Value: AtomicRMWInst::FAdd) |
4042 | .StartsWith(S: "ds.fmin" , Value: AtomicRMWInst::FMin) |
4043 | .StartsWith(S: "ds.fmax" , Value: AtomicRMWInst::FMax) |
4044 | .StartsWith(S: "atomic.inc." , Value: AtomicRMWInst::UIncWrap) |
4045 | .StartsWith(S: "atomic.dec." , Value: AtomicRMWInst::UDecWrap); |
4046 | |
4047 | unsigned NumOperands = CI->getNumOperands(); |
4048 | if (NumOperands < 3) // Malformed bitcode. |
4049 | return nullptr; |
4050 | |
4051 | Value *Ptr = CI->getArgOperand(i: 0); |
4052 | PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType()); |
4053 | if (!PtrTy) // Malformed. |
4054 | return nullptr; |
4055 | |
4056 | Value *Val = CI->getArgOperand(i: 1); |
4057 | if (Val->getType() != CI->getType()) // Malformed. |
4058 | return nullptr; |
4059 | |
4060 | ConstantInt *OrderArg = nullptr; |
4061 | bool IsVolatile = false; |
4062 | |
4063 | // These should have 5 arguments (plus the callee). A separate version of the |
4064 | // ds_fadd intrinsic was defined for bf16 which was missing arguments. |
4065 | if (NumOperands > 3) |
4066 | OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2)); |
4067 | |
4068 | // Ignore scope argument at 3 |
4069 | |
4070 | if (NumOperands > 5) { |
4071 | ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4)); |
4072 | IsVolatile = !VolatileArg || !VolatileArg->isZero(); |
4073 | } |
4074 | |
4075 | AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent; |
4076 | if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue())) |
4077 | Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue()); |
4078 | if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered) |
4079 | Order = AtomicOrdering::SequentiallyConsistent; |
4080 | |
4081 | LLVMContext &Ctx = F->getContext(); |
4082 | |
4083 | // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat> |
4084 | Type *RetTy = CI->getType(); |
4085 | if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) { |
4086 | if (VT->getElementType()->isIntegerTy(Bitwidth: 16)) { |
4087 | VectorType *AsBF16 = |
4088 | VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount()); |
4089 | Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16); |
4090 | } |
4091 | } |
4092 | |
4093 | // The scope argument never really worked correctly. Use agent as the most |
4094 | // conservative option which should still always produce the instruction. |
4095 | SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent" ); |
4096 | AtomicRMWInst *RMW = |
4097 | Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID); |
4098 | |
4099 | if (PtrTy->getAddressSpace() != 3) { |
4100 | RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory" , |
4101 | Node: MDNode::get(Context&: F->getContext(), MDs: {})); |
4102 | } |
4103 | |
4104 | if (IsVolatile) |
4105 | RMW->setVolatile(true); |
4106 | |
4107 | return Builder.CreateBitCast(V: RMW, DestTy: RetTy); |
4108 | } |
4109 | |
4110 | /// Helper to unwrap intrinsic call MetadataAsValue operands. |
4111 | template <typename MDType> |
4112 | static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) { |
4113 | if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) |
4114 | return dyn_cast<MDType>(MAV->getMetadata()); |
4115 | return nullptr; |
4116 | } |
4117 | |
4118 | /// Convert debug intrinsic calls to non-instruction debug records. |
4119 | /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value. |
4120 | /// \p CI - The debug intrinsic call. |
4121 | static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) { |
4122 | DbgRecord *DR = nullptr; |
4123 | if (Name == "label" ) { |
4124 | DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, Op: 0), CI->getDebugLoc()); |
4125 | } else if (Name == "assign" ) { |
4126 | DR = new DbgVariableRecord( |
4127 | unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: 1), |
4128 | unwrapMAVOp<DIExpression>(CI, Op: 2), unwrapMAVOp<DIAssignID>(CI, Op: 3), |
4129 | unwrapMAVOp<Metadata>(CI, Op: 4), unwrapMAVOp<DIExpression>(CI, Op: 5), |
4130 | CI->getDebugLoc()); |
4131 | } else if (Name == "declare" ) { |
4132 | DR = new DbgVariableRecord( |
4133 | unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: 1), |
4134 | unwrapMAVOp<DIExpression>(CI, Op: 2), CI->getDebugLoc(), |
4135 | DbgVariableRecord::LocationType::Declare); |
4136 | } else if (Name == "addr" ) { |
4137 | // Upgrade dbg.addr to dbg.value with DW_OP_deref. |
4138 | DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, Op: 2); |
4139 | Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref); |
4140 | DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, Op: 0), |
4141 | unwrapMAVOp<DILocalVariable>(CI, Op: 1), Expr, |
4142 | CI->getDebugLoc()); |
4143 | } else if (Name == "value" ) { |
4144 | // An old version of dbg.value had an extra offset argument. |
4145 | unsigned VarOp = 1; |
4146 | unsigned ExprOp = 2; |
4147 | if (CI->arg_size() == 4) { |
4148 | auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)); |
4149 | // Nonzero offset dbg.values get dropped without a replacement. |
4150 | if (!Offset || !Offset->isZeroValue()) |
4151 | return; |
4152 | VarOp = 2; |
4153 | ExprOp = 3; |
4154 | } |
4155 | DR = new DbgVariableRecord( |
4156 | unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: VarOp), |
4157 | unwrapMAVOp<DIExpression>(CI, Op: ExprOp), CI->getDebugLoc()); |
4158 | } |
4159 | assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord" ); |
4160 | CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator()); |
4161 | } |
4162 | |
4163 | /// Upgrade a call to an old intrinsic. All argument and return casting must be |
4164 | /// provided to seamlessly integrate with existing context. |
4165 | void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { |
4166 | // Note dyn_cast to Function is not quite the same as getCalledFunction, which |
4167 | // checks the callee's function type matches. It's likely we need to handle |
4168 | // type changes here. |
4169 | Function *F = dyn_cast<Function>(Val: CI->getCalledOperand()); |
4170 | if (!F) |
4171 | return; |
4172 | |
4173 | LLVMContext &C = CI->getContext(); |
4174 | IRBuilder<> Builder(C); |
4175 | Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator()); |
4176 | |
4177 | if (!NewFn) { |
4178 | bool FallthroughToDefaultUpgrade = false; |
4179 | // Get the Function's name. |
4180 | StringRef Name = F->getName(); |
4181 | |
4182 | assert(Name.starts_with("llvm." ) && "Intrinsic doesn't start with 'llvm.'" ); |
4183 | Name = Name.substr(Start: 5); |
4184 | |
4185 | bool IsX86 = Name.consume_front(Prefix: "x86." ); |
4186 | bool IsNVVM = Name.consume_front(Prefix: "nvvm." ); |
4187 | bool IsARM = Name.consume_front(Prefix: "arm." ); |
4188 | bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn." ); |
4189 | bool IsDbg = Name.consume_front(Prefix: "dbg." ); |
4190 | Value *Rep = nullptr; |
4191 | |
4192 | if (!IsX86 && Name == "stackprotectorcheck" ) { |
4193 | Rep = nullptr; |
4194 | } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll" )) { |
4195 | Value *Arg = CI->getArgOperand(i: 0); |
4196 | Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg" ); |
4197 | Value *Cmp = Builder.CreateICmpSGE( |
4198 | LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond" ); |
4199 | Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs" ); |
4200 | } else if (IsNVVM && (Name.starts_with(Prefix: "atomic.load.add.f32.p" ) || |
4201 | Name.starts_with(Prefix: "atomic.load.add.f64.p" ))) { |
4202 | Value *Ptr = CI->getArgOperand(i: 0); |
4203 | Value *Val = CI->getArgOperand(i: 1); |
4204 | Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(), |
4205 | Ordering: AtomicOrdering::SequentiallyConsistent); |
4206 | } else if (IsNVVM && Name.consume_front(Prefix: "max." ) && |
4207 | (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
4208 | Name == "ui" || Name == "ull" )) { |
4209 | Value *Arg0 = CI->getArgOperand(i: 0); |
4210 | Value *Arg1 = CI->getArgOperand(i: 1); |
4211 | Value *Cmp = Name.starts_with(Prefix: "u" ) |
4212 | ? Builder.CreateICmpUGE(LHS: Arg0, RHS: Arg1, Name: "max.cond" ) |
4213 | : Builder.CreateICmpSGE(LHS: Arg0, RHS: Arg1, Name: "max.cond" ); |
4214 | Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "max" ); |
4215 | } else if (IsNVVM && Name.consume_front(Prefix: "min." ) && |
4216 | (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
4217 | Name == "ui" || Name == "ull" )) { |
4218 | Value *Arg0 = CI->getArgOperand(i: 0); |
4219 | Value *Arg1 = CI->getArgOperand(i: 1); |
4220 | Value *Cmp = Name.starts_with(Prefix: "u" ) |
4221 | ? Builder.CreateICmpULE(LHS: Arg0, RHS: Arg1, Name: "min.cond" ) |
4222 | : Builder.CreateICmpSLE(LHS: Arg0, RHS: Arg1, Name: "min.cond" ); |
4223 | Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "min" ); |
4224 | } else if (IsNVVM && Name == "clz.ll" ) { |
4225 | // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. |
4226 | Value *Arg = CI->getArgOperand(i: 0); |
4227 | Value *Ctlz = Builder.CreateCall( |
4228 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::ctlz, |
4229 | Tys: {Arg->getType()}), |
4230 | Args: {Arg, Builder.getFalse()}, Name: "ctlz" ); |
4231 | Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc" ); |
4232 | } else if (IsNVVM && Name == "popc.ll" ) { |
4233 | // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an |
4234 | // i64. |
4235 | Value *Arg = CI->getArgOperand(i: 0); |
4236 | Value *Popc = Builder.CreateCall( |
4237 | Callee: Intrinsic::getDeclaration(M: F->getParent(), id: Intrinsic::ctpop, |
4238 | Tys: {Arg->getType()}), |
4239 | Args: Arg, Name: "ctpop" ); |
4240 | Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc" ); |
4241 | } else if (IsNVVM) { |
4242 | if (Name == "h2f" ) { |
4243 | Rep = |
4244 | Builder.CreateCall(Callee: Intrinsic::getDeclaration( |
4245 | M: F->getParent(), id: Intrinsic::convert_from_fp16, |
4246 | Tys: {Builder.getFloatTy()}), |
4247 | Args: CI->getArgOperand(i: 0), Name: "h2f" ); |
4248 | } else { |
4249 | Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); |
4250 | if (IID != Intrinsic::not_intrinsic && |
4251 | !F->getReturnType()->getScalarType()->isBFloatTy()) { |
4252 | rename(GV: F); |
4253 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
4254 | SmallVector<Value *, 2> Args; |
4255 | for (size_t I = 0; I < NewFn->arg_size(); ++I) { |
4256 | Value *Arg = CI->getArgOperand(i: I); |
4257 | Type *OldType = Arg->getType(); |
4258 | Type *NewType = NewFn->getArg(i: I)->getType(); |
4259 | Args.push_back(Elt: (OldType->isIntegerTy() && |
4260 | NewType->getScalarType()->isBFloatTy()) |
4261 | ? Builder.CreateBitCast(V: Arg, DestTy: NewType) |
4262 | : Arg); |
4263 | } |
4264 | Rep = Builder.CreateCall(Callee: NewFn, Args); |
4265 | if (F->getReturnType()->isIntegerTy()) |
4266 | Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType()); |
4267 | } |
4268 | } |
4269 | } else if (IsX86) { |
4270 | Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); |
4271 | } else if (IsARM) { |
4272 | Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder); |
4273 | } else if (IsAMDGCN) { |
4274 | Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder); |
4275 | } else if (IsDbg) { |
4276 | // We might have decided we don't want the new format after all between |
4277 | // first requesting the upgrade and now; skip the conversion if that is |
4278 | // the case, and check here to see if the intrinsic needs to be upgraded |
4279 | // normally. |
4280 | if (!CI->getModule()->IsNewDbgInfoFormat) { |
4281 | bool NeedsUpgrade = |
4282 | upgradeIntrinsicFunction1(F: CI->getCalledFunction(), NewFn, CanUpgradeDebugIntrinsicsToRecords: false); |
4283 | if (!NeedsUpgrade) |
4284 | return; |
4285 | FallthroughToDefaultUpgrade = true; |
4286 | } else { |
4287 | upgradeDbgIntrinsicToDbgRecord(Name, CI); |
4288 | } |
4289 | } else { |
4290 | llvm_unreachable("Unknown function for CallBase upgrade." ); |
4291 | } |
4292 | |
4293 | if (!FallthroughToDefaultUpgrade) { |
4294 | if (Rep) |
4295 | CI->replaceAllUsesWith(V: Rep); |
4296 | CI->eraseFromParent(); |
4297 | return; |
4298 | } |
4299 | } |
4300 | |
4301 | const auto &DefaultCase = [&]() -> void { |
4302 | if (CI->getFunctionType() == NewFn->getFunctionType()) { |
4303 | // Handle generic mangling change. |
4304 | assert( |
4305 | (CI->getCalledFunction()->getName() != NewFn->getName()) && |
4306 | "Unknown function for CallBase upgrade and isn't just a name change" ); |
4307 | CI->setCalledFunction(NewFn); |
4308 | return; |
4309 | } |
4310 | |
4311 | // This must be an upgrade from a named to a literal struct. |
4312 | if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) { |
4313 | assert(OldST != NewFn->getReturnType() && |
4314 | "Return type must have changed" ); |
4315 | assert(OldST->getNumElements() == |
4316 | cast<StructType>(NewFn->getReturnType())->getNumElements() && |
4317 | "Must have same number of elements" ); |
4318 | |
4319 | SmallVector<Value *> Args(CI->args()); |
4320 | Value *NewCI = Builder.CreateCall(Callee: NewFn, Args); |
4321 | Value *Res = PoisonValue::get(T: OldST); |
4322 | for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { |
4323 | Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx); |
4324 | Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx); |
4325 | } |
4326 | CI->replaceAllUsesWith(V: Res); |
4327 | CI->eraseFromParent(); |
4328 | return; |
4329 | } |
4330 | |
4331 | // We're probably about to produce something invalid. Let the verifier catch |
4332 | // it instead of dying here. |
4333 | CI->setCalledOperand( |
4334 | ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType())); |
4335 | return; |
4336 | }; |
4337 | CallInst *NewCall = nullptr; |
4338 | switch (NewFn->getIntrinsicID()) { |
4339 | default: { |
4340 | DefaultCase(); |
4341 | return; |
4342 | } |
4343 | case Intrinsic::arm_neon_vst1: |
4344 | case Intrinsic::arm_neon_vst2: |
4345 | case Intrinsic::arm_neon_vst3: |
4346 | case Intrinsic::arm_neon_vst4: |
4347 | case Intrinsic::arm_neon_vst2lane: |
4348 | case Intrinsic::arm_neon_vst3lane: |
4349 | case Intrinsic::arm_neon_vst4lane: { |
4350 | SmallVector<Value *, 4> Args(CI->args()); |
4351 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4352 | break; |
4353 | } |
4354 | case Intrinsic::aarch64_sve_bfmlalb_lane_v2: |
4355 | case Intrinsic::aarch64_sve_bfmlalt_lane_v2: |
4356 | case Intrinsic::aarch64_sve_bfdot_lane_v2: { |
4357 | LLVMContext &Ctx = F->getParent()->getContext(); |
4358 | SmallVector<Value *, 4> Args(CI->args()); |
4359 | Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), |
4360 | V: cast<ConstantInt>(Val: Args[3])->getZExtValue()); |
4361 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4362 | break; |
4363 | } |
4364 | case Intrinsic::aarch64_sve_ld3_sret: |
4365 | case Intrinsic::aarch64_sve_ld4_sret: |
4366 | case Intrinsic::aarch64_sve_ld2_sret: { |
4367 | StringRef Name = F->getName(); |
4368 | Name = Name.substr(Start: 5); |
4369 | unsigned N = StringSwitch<unsigned>(Name) |
4370 | .StartsWith(S: "aarch64.sve.ld2" , Value: 2) |
4371 | .StartsWith(S: "aarch64.sve.ld3" , Value: 3) |
4372 | .StartsWith(S: "aarch64.sve.ld4" , Value: 4) |
4373 | .Default(Value: 0); |
4374 | auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType()); |
4375 | unsigned MinElts = RetTy->getMinNumElements() / N; |
4376 | SmallVector<Value *, 2> Args(CI->args()); |
4377 | Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args); |
4378 | Value *Ret = llvm::PoisonValue::get(T: RetTy); |
4379 | for (unsigned I = 0; I < N; I++) { |
4380 | Value *Idx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4381 | Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I); |
4382 | Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx); |
4383 | } |
4384 | NewCall = dyn_cast<CallInst>(Val: Ret); |
4385 | break; |
4386 | } |
4387 | |
4388 | case Intrinsic::coro_end: { |
4389 | SmallVector<Value *, 3> Args(CI->args()); |
4390 | Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext())); |
4391 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4392 | break; |
4393 | } |
4394 | |
4395 | case Intrinsic::vector_extract: { |
4396 | StringRef Name = F->getName(); |
4397 | Name = Name.substr(Start: 5); // Strip llvm |
4398 | if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get" )) { |
4399 | DefaultCase(); |
4400 | return; |
4401 | } |
4402 | auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType()); |
4403 | unsigned MinElts = RetTy->getMinNumElements(); |
4404 | unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
4405 | Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4406 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx}); |
4407 | break; |
4408 | } |
4409 | |
4410 | case Intrinsic::vector_insert: { |
4411 | StringRef Name = F->getName(); |
4412 | Name = Name.substr(Start: 5); |
4413 | if (!Name.starts_with(Prefix: "aarch64.sve.tuple" )) { |
4414 | DefaultCase(); |
4415 | return; |
4416 | } |
4417 | if (Name.starts_with(Prefix: "aarch64.sve.tuple.set" )) { |
4418 | unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
4419 | auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType()); |
4420 | Value *NewIdx = |
4421 | ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements()); |
4422 | NewCall = Builder.CreateCall( |
4423 | Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx}); |
4424 | break; |
4425 | } |
4426 | if (Name.starts_with(Prefix: "aarch64.sve.tuple.create" )) { |
4427 | unsigned N = StringSwitch<unsigned>(Name) |
4428 | .StartsWith(S: "aarch64.sve.tuple.create2" , Value: 2) |
4429 | .StartsWith(S: "aarch64.sve.tuple.create3" , Value: 3) |
4430 | .StartsWith(S: "aarch64.sve.tuple.create4" , Value: 4) |
4431 | .Default(Value: 0); |
4432 | assert(N > 1 && "Create is expected to be between 2-4" ); |
4433 | auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType()); |
4434 | Value *Ret = llvm::PoisonValue::get(T: RetTy); |
4435 | unsigned MinElts = RetTy->getMinNumElements() / N; |
4436 | for (unsigned I = 0; I < N; I++) { |
4437 | Value *Idx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4438 | Value *V = CI->getArgOperand(i: I); |
4439 | Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx); |
4440 | } |
4441 | NewCall = dyn_cast<CallInst>(Val: Ret); |
4442 | } |
4443 | break; |
4444 | } |
4445 | |
4446 | case Intrinsic::arm_neon_bfdot: |
4447 | case Intrinsic::arm_neon_bfmmla: |
4448 | case Intrinsic::arm_neon_bfmlalb: |
4449 | case Intrinsic::arm_neon_bfmlalt: |
4450 | case Intrinsic::aarch64_neon_bfdot: |
4451 | case Intrinsic::aarch64_neon_bfmmla: |
4452 | case Intrinsic::aarch64_neon_bfmlalb: |
4453 | case Intrinsic::aarch64_neon_bfmlalt: { |
4454 | SmallVector<Value *, 3> Args; |
4455 | assert(CI->arg_size() == 3 && |
4456 | "Mismatch between function args and call args" ); |
4457 | size_t OperandWidth = |
4458 | CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits(); |
4459 | assert((OperandWidth == 64 || OperandWidth == 128) && |
4460 | "Unexpected operand width" ); |
4461 | Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16); |
4462 | auto Iter = CI->args().begin(); |
4463 | Args.push_back(Elt: *Iter++); |
4464 | Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy)); |
4465 | Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy)); |
4466 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4467 | break; |
4468 | } |
4469 | |
4470 | case Intrinsic::bitreverse: |
4471 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4472 | break; |
4473 | |
4474 | case Intrinsic::ctlz: |
4475 | case Intrinsic::cttz: |
4476 | assert(CI->arg_size() == 1 && |
4477 | "Mismatch between function args and call args" ); |
4478 | NewCall = |
4479 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()}); |
4480 | break; |
4481 | |
4482 | case Intrinsic::objectsize: { |
4483 | Value *NullIsUnknownSize = |
4484 | CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2); |
4485 | Value *Dynamic = |
4486 | CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3); |
4487 | NewCall = Builder.CreateCall( |
4488 | Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic}); |
4489 | break; |
4490 | } |
4491 | |
4492 | case Intrinsic::ctpop: |
4493 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4494 | break; |
4495 | |
4496 | case Intrinsic::convert_from_fp16: |
4497 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4498 | break; |
4499 | |
4500 | case Intrinsic::dbg_value: { |
4501 | StringRef Name = F->getName(); |
4502 | Name = Name.substr(Start: 5); // Strip llvm. |
4503 | // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`. |
4504 | if (Name.starts_with(Prefix: "dbg.addr" )) { |
4505 | DIExpression *Expr = cast<DIExpression>( |
4506 | Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata()); |
4507 | Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref); |
4508 | NewCall = |
4509 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4510 | MetadataAsValue::get(Context&: C, MD: Expr)}); |
4511 | break; |
4512 | } |
4513 | |
4514 | // Upgrade from the old version that had an extra offset argument. |
4515 | assert(CI->arg_size() == 4); |
4516 | // Drop nonzero offsets instead of attempting to upgrade them. |
4517 | if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1))) |
4518 | if (Offset->isZeroValue()) { |
4519 | NewCall = Builder.CreateCall( |
4520 | Callee: NewFn, |
4521 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)}); |
4522 | break; |
4523 | } |
4524 | CI->eraseFromParent(); |
4525 | return; |
4526 | } |
4527 | |
4528 | case Intrinsic::ptr_annotation: |
4529 | // Upgrade from versions that lacked the annotation attribute argument. |
4530 | if (CI->arg_size() != 4) { |
4531 | DefaultCase(); |
4532 | return; |
4533 | } |
4534 | |
4535 | // Create a new call with an added null annotation attribute argument. |
4536 | NewCall = |
4537 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4538 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3), |
4539 | Constant::getNullValue(Ty: Builder.getPtrTy())}); |
4540 | NewCall->takeName(V: CI); |
4541 | CI->replaceAllUsesWith(V: NewCall); |
4542 | CI->eraseFromParent(); |
4543 | return; |
4544 | |
4545 | case Intrinsic::var_annotation: |
4546 | // Upgrade from versions that lacked the annotation attribute argument. |
4547 | if (CI->arg_size() != 4) { |
4548 | DefaultCase(); |
4549 | return; |
4550 | } |
4551 | // Create a new call with an added null annotation attribute argument. |
4552 | NewCall = |
4553 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4554 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3), |
4555 | Constant::getNullValue(Ty: Builder.getPtrTy())}); |
4556 | NewCall->takeName(V: CI); |
4557 | CI->replaceAllUsesWith(V: NewCall); |
4558 | CI->eraseFromParent(); |
4559 | return; |
4560 | |
4561 | case Intrinsic::riscv_aes32dsi: |
4562 | case Intrinsic::riscv_aes32dsmi: |
4563 | case Intrinsic::riscv_aes32esi: |
4564 | case Intrinsic::riscv_aes32esmi: |
4565 | case Intrinsic::riscv_sm4ks: |
4566 | case Intrinsic::riscv_sm4ed: { |
4567 | // The last argument to these intrinsics used to be i8 and changed to i32. |
4568 | // The type overload for sm4ks and sm4ed was removed. |
4569 | Value *Arg2 = CI->getArgOperand(i: 2); |
4570 | if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64)) |
4571 | return; |
4572 | |
4573 | Value *Arg0 = CI->getArgOperand(i: 0); |
4574 | Value *Arg1 = CI->getArgOperand(i: 1); |
4575 | if (CI->getType()->isIntegerTy(Bitwidth: 64)) { |
4576 | Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty()); |
4577 | Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty()); |
4578 | } |
4579 | |
4580 | Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C), |
4581 | V: cast<ConstantInt>(Val: Arg2)->getZExtValue()); |
4582 | |
4583 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2}); |
4584 | Value *Res = NewCall; |
4585 | if (Res->getType() != CI->getType()) |
4586 | Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true); |
4587 | NewCall->takeName(V: CI); |
4588 | CI->replaceAllUsesWith(V: Res); |
4589 | CI->eraseFromParent(); |
4590 | return; |
4591 | } |
4592 | case Intrinsic::riscv_sha256sig0: |
4593 | case Intrinsic::riscv_sha256sig1: |
4594 | case Intrinsic::riscv_sha256sum0: |
4595 | case Intrinsic::riscv_sha256sum1: |
4596 | case Intrinsic::riscv_sm3p0: |
4597 | case Intrinsic::riscv_sm3p1: { |
4598 | // The last argument to these intrinsics used to be i8 and changed to i32. |
4599 | // The type overload for sm4ks and sm4ed was removed. |
4600 | if (!CI->getType()->isIntegerTy(Bitwidth: 64)) |
4601 | return; |
4602 | |
4603 | Value *Arg = |
4604 | Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty()); |
4605 | |
4606 | NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg); |
4607 | Value *Res = |
4608 | Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true); |
4609 | NewCall->takeName(V: CI); |
4610 | CI->replaceAllUsesWith(V: Res); |
4611 | CI->eraseFromParent(); |
4612 | return; |
4613 | } |
4614 | |
4615 | case Intrinsic::x86_xop_vfrcz_ss: |
4616 | case Intrinsic::x86_xop_vfrcz_sd: |
4617 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)}); |
4618 | break; |
4619 | |
4620 | case Intrinsic::x86_xop_vpermil2pd: |
4621 | case Intrinsic::x86_xop_vpermil2ps: |
4622 | case Intrinsic::x86_xop_vpermil2pd_256: |
4623 | case Intrinsic::x86_xop_vpermil2ps_256: { |
4624 | SmallVector<Value *, 4> Args(CI->args()); |
4625 | VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType()); |
4626 | VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy); |
4627 | Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy); |
4628 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4629 | break; |
4630 | } |
4631 | |
4632 | case Intrinsic::x86_sse41_ptestc: |
4633 | case Intrinsic::x86_sse41_ptestz: |
4634 | case Intrinsic::x86_sse41_ptestnzc: { |
4635 | // The arguments for these intrinsics used to be v4f32, and changed |
4636 | // to v2i64. This is purely a nop, since those are bitwise intrinsics. |
4637 | // So, the only thing required is a bitcast for both arguments. |
4638 | // First, check the arguments have the old type. |
4639 | Value *Arg0 = CI->getArgOperand(i: 0); |
4640 | if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4)) |
4641 | return; |
4642 | |
4643 | // Old intrinsic, add bitcasts |
4644 | Value *Arg1 = CI->getArgOperand(i: 1); |
4645 | |
4646 | auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2); |
4647 | |
4648 | Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast" ); |
4649 | Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast" ); |
4650 | |
4651 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1}); |
4652 | break; |
4653 | } |
4654 | |
4655 | case Intrinsic::x86_rdtscp: { |
4656 | // This used to take 1 arguments. If we have no arguments, it is already |
4657 | // upgraded. |
4658 | if (CI->getNumOperands() == 0) |
4659 | return; |
4660 | |
4661 | NewCall = Builder.CreateCall(Callee: NewFn); |
4662 | // Extract the second result and store it. |
4663 | Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1); |
4664 | // Cast the pointer to the right type. |
4665 | Value *Ptr = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), |
4666 | DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType())); |
4667 | Builder.CreateAlignedStore(Val: Data, Ptr, Align: Align(1)); |
4668 | // Replace the original call result with the first result of the new call. |
4669 | Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0); |
4670 | |
4671 | NewCall->takeName(V: CI); |
4672 | CI->replaceAllUsesWith(V: TSC); |
4673 | CI->eraseFromParent(); |
4674 | return; |
4675 | } |
4676 | |
4677 | case Intrinsic::x86_sse41_insertps: |
4678 | case Intrinsic::x86_sse41_dppd: |
4679 | case Intrinsic::x86_sse41_dpps: |
4680 | case Intrinsic::x86_sse41_mpsadbw: |
4681 | case Intrinsic::x86_avx_dp_ps_256: |
4682 | case Intrinsic::x86_avx2_mpsadbw: { |
4683 | // Need to truncate the last argument from i32 to i8 -- this argument models |
4684 | // an inherently 8-bit immediate operand to these x86 instructions. |
4685 | SmallVector<Value *, 4> Args(CI->args()); |
4686 | |
4687 | // Replace the last argument with a trunc. |
4688 | Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc" ); |
4689 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4690 | break; |
4691 | } |
4692 | |
4693 | case Intrinsic::x86_avx512_mask_cmp_pd_128: |
4694 | case Intrinsic::x86_avx512_mask_cmp_pd_256: |
4695 | case Intrinsic::x86_avx512_mask_cmp_pd_512: |
4696 | case Intrinsic::x86_avx512_mask_cmp_ps_128: |
4697 | case Intrinsic::x86_avx512_mask_cmp_ps_256: |
4698 | case Intrinsic::x86_avx512_mask_cmp_ps_512: { |
4699 | SmallVector<Value *, 4> Args(CI->args()); |
4700 | unsigned NumElts = |
4701 | cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements(); |
4702 | Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts); |
4703 | |
4704 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4705 | Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr); |
4706 | |
4707 | NewCall->takeName(V: CI); |
4708 | CI->replaceAllUsesWith(V: Res); |
4709 | CI->eraseFromParent(); |
4710 | return; |
4711 | } |
4712 | |
4713 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128: |
4714 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256: |
4715 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512: |
4716 | case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128: |
4717 | case Intrinsic::x86_avx512bf16_cvtneps2bf16_256: |
4718 | case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: { |
4719 | SmallVector<Value *, 4> Args(CI->args()); |
4720 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
4721 | if (NewFn->getIntrinsicID() == |
4722 | Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) |
4723 | Args[1] = Builder.CreateBitCast( |
4724 | V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
4725 | |
4726 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4727 | Value *Res = Builder.CreateBitCast( |
4728 | V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts)); |
4729 | |
4730 | NewCall->takeName(V: CI); |
4731 | CI->replaceAllUsesWith(V: Res); |
4732 | CI->eraseFromParent(); |
4733 | return; |
4734 | } |
4735 | case Intrinsic::x86_avx512bf16_dpbf16ps_128: |
4736 | case Intrinsic::x86_avx512bf16_dpbf16ps_256: |
4737 | case Intrinsic::x86_avx512bf16_dpbf16ps_512:{ |
4738 | SmallVector<Value *, 4> Args(CI->args()); |
4739 | unsigned NumElts = |
4740 | cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2; |
4741 | Args[1] = Builder.CreateBitCast( |
4742 | V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
4743 | Args[2] = Builder.CreateBitCast( |
4744 | V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
4745 | |
4746 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4747 | break; |
4748 | } |
4749 | |
4750 | case Intrinsic::thread_pointer: { |
4751 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {}); |
4752 | break; |
4753 | } |
4754 | |
4755 | case Intrinsic::memcpy: |
4756 | case Intrinsic::memmove: |
4757 | case Intrinsic::memset: { |
4758 | // We have to make sure that the call signature is what we're expecting. |
4759 | // We only want to change the old signatures by removing the alignment arg: |
4760 | // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) |
4761 | // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) |
4762 | // @llvm.memset...(i8*, i8, i[32|64], i32, i1) |
4763 | // -> @llvm.memset...(i8*, i8, i[32|64], i1) |
4764 | // Note: i8*'s in the above can be any pointer type |
4765 | if (CI->arg_size() != 5) { |
4766 | DefaultCase(); |
4767 | return; |
4768 | } |
4769 | // Remove alignment argument (3), and add alignment attributes to the |
4770 | // dest/src pointers. |
4771 | Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4772 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)}; |
4773 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4774 | AttributeList OldAttrs = CI->getAttributes(); |
4775 | AttributeList NewAttrs = AttributeList::get( |
4776 | C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(), |
4777 | ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1), |
4778 | OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)}); |
4779 | NewCall->setAttributes(NewAttrs); |
4780 | auto *MemCI = cast<MemIntrinsic>(Val: NewCall); |
4781 | // All mem intrinsics support dest alignment. |
4782 | const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3)); |
4783 | MemCI->setDestAlignment(Align->getMaybeAlignValue()); |
4784 | // Memcpy/Memmove also support source alignment. |
4785 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI)) |
4786 | MTI->setSourceAlignment(Align->getMaybeAlignValue()); |
4787 | break; |
4788 | } |
4789 | } |
4790 | assert(NewCall && "Should have either set this variable or returned through " |
4791 | "the default case" ); |
4792 | NewCall->takeName(V: CI); |
4793 | CI->replaceAllUsesWith(V: NewCall); |
4794 | CI->eraseFromParent(); |
4795 | } |
4796 | |
4797 | void llvm::UpgradeCallsToIntrinsic(Function *F) { |
4798 | assert(F && "Illegal attempt to upgrade a non-existent intrinsic." ); |
4799 | |
4800 | // Check if this function should be upgraded and get the replacement function |
4801 | // if there is one. |
4802 | Function *NewFn; |
4803 | if (UpgradeIntrinsicFunction(F, NewFn)) { |
4804 | // Replace all users of the old function with the new function or new |
4805 | // instructions. This is not a range loop because the call is deleted. |
4806 | for (User *U : make_early_inc_range(Range: F->users())) |
4807 | if (CallBase *CB = dyn_cast<CallBase>(Val: U)) |
4808 | UpgradeIntrinsicCall(CI: CB, NewFn); |
4809 | |
4810 | // Remove old function, no longer used, from the module. |
4811 | F->eraseFromParent(); |
4812 | } |
4813 | } |
4814 | |
4815 | MDNode *llvm::UpgradeTBAANode(MDNode &MD) { |
4816 | const unsigned NumOperands = MD.getNumOperands(); |
4817 | if (NumOperands == 0) |
4818 | return &MD; // Invalid, punt to a verifier error. |
4819 | |
4820 | // Check if the tag uses struct-path aware TBAA format. |
4821 | if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3) |
4822 | return &MD; |
4823 | |
4824 | auto &Context = MD.getContext(); |
4825 | if (NumOperands == 3) { |
4826 | Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)}; |
4827 | MDNode *ScalarType = MDNode::get(Context, MDs: Elts); |
4828 | // Create a MDNode <ScalarType, ScalarType, offset 0, const> |
4829 | Metadata *Elts2[] = {ScalarType, ScalarType, |
4830 | ConstantAsMetadata::get( |
4831 | C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))), |
4832 | MD.getOperand(I: 2)}; |
4833 | return MDNode::get(Context, MDs: Elts2); |
4834 | } |
4835 | // Create a MDNode <MD, MD, offset 0> |
4836 | Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue( |
4837 | Ty: Type::getInt64Ty(C&: Context)))}; |
4838 | return MDNode::get(Context, MDs: Elts); |
4839 | } |
4840 | |
4841 | Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, |
4842 | Instruction *&Temp) { |
4843 | if (Opc != Instruction::BitCast) |
4844 | return nullptr; |
4845 | |
4846 | Temp = nullptr; |
4847 | Type *SrcTy = V->getType(); |
4848 | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && |
4849 | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { |
4850 | LLVMContext &Context = V->getContext(); |
4851 | |
4852 | // We have no information about target data layout, so we assume that |
4853 | // the maximum pointer size is 64bit. |
4854 | Type *MidTy = Type::getInt64Ty(C&: Context); |
4855 | Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy); |
4856 | |
4857 | return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy); |
4858 | } |
4859 | |
4860 | return nullptr; |
4861 | } |
4862 | |
4863 | Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { |
4864 | if (Opc != Instruction::BitCast) |
4865 | return nullptr; |
4866 | |
4867 | Type *SrcTy = C->getType(); |
4868 | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && |
4869 | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { |
4870 | LLVMContext &Context = C->getContext(); |
4871 | |
4872 | // We have no information about target data layout, so we assume that |
4873 | // the maximum pointer size is 64bit. |
4874 | Type *MidTy = Type::getInt64Ty(C&: Context); |
4875 | |
4876 | return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy), |
4877 | Ty: DestTy); |
4878 | } |
4879 | |
4880 | return nullptr; |
4881 | } |
4882 | |
4883 | /// Check the debug info version number, if it is out-dated, drop the debug |
4884 | /// info. Return true if module is modified. |
4885 | bool llvm::UpgradeDebugInfo(Module &M) { |
4886 | if (DisableAutoUpgradeDebugInfo) |
4887 | return false; |
4888 | |
4889 | unsigned Version = getDebugMetadataVersionFromModule(M); |
4890 | if (Version == DEBUG_METADATA_VERSION) { |
4891 | bool BrokenDebugInfo = false; |
4892 | if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo)) |
4893 | report_fatal_error(reason: "Broken module found, compilation aborted!" ); |
4894 | if (!BrokenDebugInfo) |
4895 | // Everything is ok. |
4896 | return false; |
4897 | else { |
4898 | // Diagnose malformed debug info. |
4899 | DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); |
4900 | M.getContext().diagnose(DI: Diag); |
4901 | } |
4902 | } |
4903 | bool Modified = StripDebugInfo(M); |
4904 | if (Modified && Version != DEBUG_METADATA_VERSION) { |
4905 | // Diagnose a version mismatch. |
4906 | DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); |
4907 | M.getContext().diagnose(DI: DiagVersion); |
4908 | } |
4909 | return Modified; |
4910 | } |
4911 | |
4912 | /// This checks for objc retain release marker which should be upgraded. It |
4913 | /// returns true if module is modified. |
4914 | static bool upgradeRetainReleaseMarker(Module &M) { |
4915 | bool Changed = false; |
4916 | const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker" ; |
4917 | NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey); |
4918 | if (ModRetainReleaseMarker) { |
4919 | MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0); |
4920 | if (Op) { |
4921 | MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0)); |
4922 | if (ID) { |
4923 | SmallVector<StringRef, 4> ValueComp; |
4924 | ID->getString().split(A&: ValueComp, Separator: "#" ); |
4925 | if (ValueComp.size() == 2) { |
4926 | std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); |
4927 | ID = MDString::get(Context&: M.getContext(), Str: NewValue); |
4928 | } |
4929 | M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID); |
4930 | M.eraseNamedMetadata(NMD: ModRetainReleaseMarker); |
4931 | Changed = true; |
4932 | } |
4933 | } |
4934 | } |
4935 | return Changed; |
4936 | } |
4937 | |
4938 | void llvm::UpgradeARCRuntime(Module &M) { |
4939 | // This lambda converts normal function calls to ARC runtime functions to |
4940 | // intrinsic calls. |
4941 | auto UpgradeToIntrinsic = [&](const char *OldFunc, |
4942 | llvm::Intrinsic::ID IntrinsicFunc) { |
4943 | Function *Fn = M.getFunction(Name: OldFunc); |
4944 | |
4945 | if (!Fn) |
4946 | return; |
4947 | |
4948 | Function *NewFn = llvm::Intrinsic::getDeclaration(M: &M, id: IntrinsicFunc); |
4949 | |
4950 | for (User *U : make_early_inc_range(Range: Fn->users())) { |
4951 | CallInst *CI = dyn_cast<CallInst>(Val: U); |
4952 | if (!CI || CI->getCalledFunction() != Fn) |
4953 | continue; |
4954 | |
4955 | IRBuilder<> Builder(CI->getParent(), CI->getIterator()); |
4956 | FunctionType *NewFuncTy = NewFn->getFunctionType(); |
4957 | SmallVector<Value *, 2> Args; |
4958 | |
4959 | // Don't upgrade the intrinsic if it's not valid to bitcast the return |
4960 | // value to the return type of the old function. |
4961 | if (NewFuncTy->getReturnType() != CI->getType() && |
4962 | !CastInst::castIsValid(op: Instruction::BitCast, S: CI, |
4963 | DstTy: NewFuncTy->getReturnType())) |
4964 | continue; |
4965 | |
4966 | bool InvalidCast = false; |
4967 | |
4968 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
4969 | Value *Arg = CI->getArgOperand(i: I); |
4970 | |
4971 | // Bitcast argument to the parameter type of the new function if it's |
4972 | // not a variadic argument. |
4973 | if (I < NewFuncTy->getNumParams()) { |
4974 | // Don't upgrade the intrinsic if it's not valid to bitcast the argument |
4975 | // to the parameter type of the new function. |
4976 | if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg, |
4977 | DstTy: NewFuncTy->getParamType(i: I))) { |
4978 | InvalidCast = true; |
4979 | break; |
4980 | } |
4981 | Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I)); |
4982 | } |
4983 | Args.push_back(Elt: Arg); |
4984 | } |
4985 | |
4986 | if (InvalidCast) |
4987 | continue; |
4988 | |
4989 | // Create a call instruction that calls the new function. |
4990 | CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args); |
4991 | NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind()); |
4992 | NewCall->takeName(V: CI); |
4993 | |
4994 | // Bitcast the return value back to the type of the old call. |
4995 | Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType()); |
4996 | |
4997 | if (!CI->use_empty()) |
4998 | CI->replaceAllUsesWith(V: NewRetVal); |
4999 | CI->eraseFromParent(); |
5000 | } |
5001 | |
5002 | if (Fn->use_empty()) |
5003 | Fn->eraseFromParent(); |
5004 | }; |
5005 | |
5006 | // Unconditionally convert a call to "clang.arc.use" to a call to |
5007 | // "llvm.objc.clang.arc.use". |
5008 | UpgradeToIntrinsic("clang.arc.use" , llvm::Intrinsic::objc_clang_arc_use); |
5009 | |
5010 | // Upgrade the retain release marker. If there is no need to upgrade |
5011 | // the marker, that means either the module is already new enough to contain |
5012 | // new intrinsics or it is not ARC. There is no need to upgrade runtime call. |
5013 | if (!upgradeRetainReleaseMarker(M)) |
5014 | return; |
5015 | |
5016 | std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = { |
5017 | {"objc_autorelease" , llvm::Intrinsic::objc_autorelease}, |
5018 | {"objc_autoreleasePoolPop" , llvm::Intrinsic::objc_autoreleasePoolPop}, |
5019 | {"objc_autoreleasePoolPush" , llvm::Intrinsic::objc_autoreleasePoolPush}, |
5020 | {"objc_autoreleaseReturnValue" , |
5021 | llvm::Intrinsic::objc_autoreleaseReturnValue}, |
5022 | {"objc_copyWeak" , llvm::Intrinsic::objc_copyWeak}, |
5023 | {"objc_destroyWeak" , llvm::Intrinsic::objc_destroyWeak}, |
5024 | {"objc_initWeak" , llvm::Intrinsic::objc_initWeak}, |
5025 | {"objc_loadWeak" , llvm::Intrinsic::objc_loadWeak}, |
5026 | {"objc_loadWeakRetained" , llvm::Intrinsic::objc_loadWeakRetained}, |
5027 | {"objc_moveWeak" , llvm::Intrinsic::objc_moveWeak}, |
5028 | {"objc_release" , llvm::Intrinsic::objc_release}, |
5029 | {"objc_retain" , llvm::Intrinsic::objc_retain}, |
5030 | {"objc_retainAutorelease" , llvm::Intrinsic::objc_retainAutorelease}, |
5031 | {"objc_retainAutoreleaseReturnValue" , |
5032 | llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, |
5033 | {"objc_retainAutoreleasedReturnValue" , |
5034 | llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, |
5035 | {"objc_retainBlock" , llvm::Intrinsic::objc_retainBlock}, |
5036 | {"objc_storeStrong" , llvm::Intrinsic::objc_storeStrong}, |
5037 | {"objc_storeWeak" , llvm::Intrinsic::objc_storeWeak}, |
5038 | {"objc_unsafeClaimAutoreleasedReturnValue" , |
5039 | llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, |
5040 | {"objc_retainedObject" , llvm::Intrinsic::objc_retainedObject}, |
5041 | {"objc_unretainedObject" , llvm::Intrinsic::objc_unretainedObject}, |
5042 | {"objc_unretainedPointer" , llvm::Intrinsic::objc_unretainedPointer}, |
5043 | {"objc_retain_autorelease" , llvm::Intrinsic::objc_retain_autorelease}, |
5044 | {"objc_sync_enter" , llvm::Intrinsic::objc_sync_enter}, |
5045 | {"objc_sync_exit" , llvm::Intrinsic::objc_sync_exit}, |
5046 | {"objc_arc_annotation_topdown_bbstart" , |
5047 | llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, |
5048 | {"objc_arc_annotation_topdown_bbend" , |
5049 | llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, |
5050 | {"objc_arc_annotation_bottomup_bbstart" , |
5051 | llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, |
5052 | {"objc_arc_annotation_bottomup_bbend" , |
5053 | llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; |
5054 | |
5055 | for (auto &I : RuntimeFuncs) |
5056 | UpgradeToIntrinsic(I.first, I.second); |
5057 | } |
5058 | |
5059 | bool llvm::UpgradeModuleFlags(Module &M) { |
5060 | NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); |
5061 | if (!ModFlags) |
5062 | return false; |
5063 | |
5064 | bool HasObjCFlag = false, HasClassProperties = false, Changed = false; |
5065 | bool HasSwiftVersionFlag = false; |
5066 | uint8_t SwiftMajorVersion, SwiftMinorVersion; |
5067 | uint32_t SwiftABIVersion; |
5068 | auto Int8Ty = Type::getInt8Ty(C&: M.getContext()); |
5069 | auto Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
5070 | |
5071 | for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { |
5072 | MDNode *Op = ModFlags->getOperand(i: I); |
5073 | if (Op->getNumOperands() != 3) |
5074 | continue; |
5075 | MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1)); |
5076 | if (!ID) |
5077 | continue; |
5078 | auto SetBehavior = [&](Module::ModFlagBehavior B) { |
5079 | Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get( |
5080 | Ty: Type::getInt32Ty(C&: M.getContext()), V: B)), |
5081 | MDString::get(Context&: M.getContext(), Str: ID->getString()), |
5082 | Op->getOperand(I: 2)}; |
5083 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5084 | Changed = true; |
5085 | }; |
5086 | |
5087 | if (ID->getString() == "Objective-C Image Info Version" ) |
5088 | HasObjCFlag = true; |
5089 | if (ID->getString() == "Objective-C Class Properties" ) |
5090 | HasClassProperties = true; |
5091 | // Upgrade PIC from Error/Max to Min. |
5092 | if (ID->getString() == "PIC Level" ) { |
5093 | if (auto *Behavior = |
5094 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) { |
5095 | uint64_t V = Behavior->getLimitedValue(); |
5096 | if (V == Module::Error || V == Module::Max) |
5097 | SetBehavior(Module::Min); |
5098 | } |
5099 | } |
5100 | // Upgrade "PIE Level" from Error to Max. |
5101 | if (ID->getString() == "PIE Level" ) |
5102 | if (auto *Behavior = |
5103 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) |
5104 | if (Behavior->getLimitedValue() == Module::Error) |
5105 | SetBehavior(Module::Max); |
5106 | |
5107 | // Upgrade branch protection and return address signing module flags. The |
5108 | // module flag behavior for these fields were Error and now they are Min. |
5109 | if (ID->getString() == "branch-target-enforcement" || |
5110 | ID->getString().starts_with(Prefix: "sign-return-address" )) { |
5111 | if (auto *Behavior = |
5112 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) { |
5113 | if (Behavior->getLimitedValue() == Module::Error) { |
5114 | Type *Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
5115 | Metadata *Ops[3] = { |
5116 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)), |
5117 | Op->getOperand(I: 1), Op->getOperand(I: 2)}; |
5118 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5119 | Changed = true; |
5120 | } |
5121 | } |
5122 | } |
5123 | |
5124 | // Upgrade Objective-C Image Info Section. Removed the whitespce in the |
5125 | // section name so that llvm-lto will not complain about mismatching |
5126 | // module flags that is functionally the same. |
5127 | if (ID->getString() == "Objective-C Image Info Section" ) { |
5128 | if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) { |
5129 | SmallVector<StringRef, 4> ValueComp; |
5130 | Value->getString().split(A&: ValueComp, Separator: " " ); |
5131 | if (ValueComp.size() != 1) { |
5132 | std::string NewValue; |
5133 | for (auto &S : ValueComp) |
5134 | NewValue += S.str(); |
5135 | Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1), |
5136 | MDString::get(Context&: M.getContext(), Str: NewValue)}; |
5137 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5138 | Changed = true; |
5139 | } |
5140 | } |
5141 | } |
5142 | |
5143 | // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value. |
5144 | // If the higher bits are set, it adds new module flag for swift info. |
5145 | if (ID->getString() == "Objective-C Garbage Collection" ) { |
5146 | auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2)); |
5147 | if (Md) { |
5148 | assert(Md->getValue() && "Expected non-empty metadata" ); |
5149 | auto Type = Md->getValue()->getType(); |
5150 | if (Type == Int8Ty) |
5151 | continue; |
5152 | unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue(); |
5153 | if ((Val & 0xff) != Val) { |
5154 | HasSwiftVersionFlag = true; |
5155 | SwiftABIVersion = (Val & 0xff00) >> 8; |
5156 | SwiftMajorVersion = (Val & 0xff000000) >> 24; |
5157 | SwiftMinorVersion = (Val & 0xff0000) >> 16; |
5158 | } |
5159 | Metadata *Ops[3] = { |
5160 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)), |
5161 | Op->getOperand(I: 1), |
5162 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))}; |
5163 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5164 | Changed = true; |
5165 | } |
5166 | } |
5167 | |
5168 | if (ID->getString() == "amdgpu_code_object_version" ) { |
5169 | Metadata *Ops[3] = { |
5170 | Op->getOperand(I: 0), |
5171 | MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version" ), |
5172 | Op->getOperand(I: 2)}; |
5173 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5174 | Changed = true; |
5175 | } |
5176 | } |
5177 | |
5178 | // "Objective-C Class Properties" is recently added for Objective-C. We |
5179 | // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module |
5180 | // flag of value 0, so we can correclty downgrade this flag when trying to |
5181 | // link an ObjC bitcode without this module flag with an ObjC bitcode with |
5182 | // this module flag. |
5183 | if (HasObjCFlag && !HasClassProperties) { |
5184 | M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties" , |
5185 | Val: (uint32_t)0); |
5186 | Changed = true; |
5187 | } |
5188 | |
5189 | if (HasSwiftVersionFlag) { |
5190 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version" , |
5191 | Val: SwiftABIVersion); |
5192 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version" , |
5193 | Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion)); |
5194 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version" , |
5195 | Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion)); |
5196 | Changed = true; |
5197 | } |
5198 | |
5199 | return Changed; |
5200 | } |
5201 | |
5202 | void llvm::UpgradeSectionAttributes(Module &M) { |
5203 | auto TrimSpaces = [](StringRef Section) -> std::string { |
5204 | SmallVector<StringRef, 5> Components; |
5205 | Section.split(A&: Components, Separator: ','); |
5206 | |
5207 | SmallString<32> Buffer; |
5208 | raw_svector_ostream OS(Buffer); |
5209 | |
5210 | for (auto Component : Components) |
5211 | OS << ',' << Component.trim(); |
5212 | |
5213 | return std::string(OS.str().substr(Start: 1)); |
5214 | }; |
5215 | |
5216 | for (auto &GV : M.globals()) { |
5217 | if (!GV.hasSection()) |
5218 | continue; |
5219 | |
5220 | StringRef Section = GV.getSection(); |
5221 | |
5222 | if (!Section.starts_with(Prefix: "__DATA, __objc_catlist" )) |
5223 | continue; |
5224 | |
5225 | // __DATA, __objc_catlist, regular, no_dead_strip |
5226 | // __DATA,__objc_catlist,regular,no_dead_strip |
5227 | GV.setSection(TrimSpaces(Section)); |
5228 | } |
5229 | } |
5230 | |
5231 | namespace { |
5232 | // Prior to LLVM 10.0, the strictfp attribute could be used on individual |
5233 | // callsites within a function that did not also have the strictfp attribute. |
5234 | // Since 10.0, if strict FP semantics are needed within a function, the |
5235 | // function must have the strictfp attribute and all calls within the function |
5236 | // must also have the strictfp attribute. This latter restriction is |
5237 | // necessary to prevent unwanted libcall simplification when a function is |
5238 | // being cloned (such as for inlining). |
5239 | // |
5240 | // The "dangling" strictfp attribute usage was only used to prevent constant |
5241 | // folding and other libcall simplification. The nobuiltin attribute on the |
5242 | // callsite has the same effect. |
5243 | struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> { |
5244 | StrictFPUpgradeVisitor() = default; |
5245 | |
5246 | void visitCallBase(CallBase &Call) { |
5247 | if (!Call.isStrictFP()) |
5248 | return; |
5249 | if (isa<ConstrainedFPIntrinsic>(Val: &Call)) |
5250 | return; |
5251 | // If we get here, the caller doesn't have the strictfp attribute |
5252 | // but this callsite does. Replace the strictfp attribute with nobuiltin. |
5253 | Call.removeFnAttr(Kind: Attribute::StrictFP); |
5254 | Call.addFnAttr(Kind: Attribute::NoBuiltin); |
5255 | } |
5256 | }; |
5257 | } // namespace |
5258 | |
5259 | void llvm::UpgradeFunctionAttributes(Function &F) { |
5260 | // If a function definition doesn't have the strictfp attribute, |
5261 | // convert any callsite strictfp attributes to nobuiltin. |
5262 | if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) { |
5263 | StrictFPUpgradeVisitor SFPV; |
5264 | SFPV.visit(F); |
5265 | } |
5266 | |
5267 | // Remove all incompatibile attributes from function. |
5268 | F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(Ty: F.getReturnType())); |
5269 | for (auto &Arg : F.args()) |
5270 | Arg.removeAttrs(AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType())); |
5271 | |
5272 | // Older versions of LLVM treated an "implicit-section-name" attribute |
5273 | // similarly to directly setting the section on a Function. |
5274 | if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name" ); |
5275 | A.isValid() && A.isStringAttribute()) { |
5276 | F.setSection(A.getValueAsString()); |
5277 | F.removeFnAttr(Kind: "implicit-section-name" ); |
5278 | } |
5279 | } |
5280 | |
5281 | static bool isOldLoopArgument(Metadata *MD) { |
5282 | auto *T = dyn_cast_or_null<MDTuple>(Val: MD); |
5283 | if (!T) |
5284 | return false; |
5285 | if (T->getNumOperands() < 1) |
5286 | return false; |
5287 | auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0)); |
5288 | if (!S) |
5289 | return false; |
5290 | return S->getString().starts_with(Prefix: "llvm.vectorizer." ); |
5291 | } |
5292 | |
5293 | static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { |
5294 | StringRef OldPrefix = "llvm.vectorizer." ; |
5295 | assert(OldTag.starts_with(OldPrefix) && "Expected old prefix" ); |
5296 | |
5297 | if (OldTag == "llvm.vectorizer.unroll" ) |
5298 | return MDString::get(Context&: C, Str: "llvm.loop.interleave.count" ); |
5299 | |
5300 | return MDString::get( |
5301 | Context&: C, Str: (Twine("llvm.loop.vectorize." ) + OldTag.drop_front(N: OldPrefix.size())) |
5302 | .str()); |
5303 | } |
5304 | |
5305 | static Metadata *upgradeLoopArgument(Metadata *MD) { |
5306 | auto *T = dyn_cast_or_null<MDTuple>(Val: MD); |
5307 | if (!T) |
5308 | return MD; |
5309 | if (T->getNumOperands() < 1) |
5310 | return MD; |
5311 | auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0)); |
5312 | if (!OldTag) |
5313 | return MD; |
5314 | if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer." )) |
5315 | return MD; |
5316 | |
5317 | // This has an old tag. Upgrade it. |
5318 | SmallVector<Metadata *, 8> Ops; |
5319 | Ops.reserve(N: T->getNumOperands()); |
5320 | Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString())); |
5321 | for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) |
5322 | Ops.push_back(Elt: T->getOperand(I)); |
5323 | |
5324 | return MDTuple::get(Context&: T->getContext(), MDs: Ops); |
5325 | } |
5326 | |
5327 | MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { |
5328 | auto *T = dyn_cast<MDTuple>(Val: &N); |
5329 | if (!T) |
5330 | return &N; |
5331 | |
5332 | if (none_of(Range: T->operands(), P: isOldLoopArgument)) |
5333 | return &N; |
5334 | |
5335 | SmallVector<Metadata *, 8> Ops; |
5336 | Ops.reserve(N: T->getNumOperands()); |
5337 | for (Metadata *MD : T->operands()) |
5338 | Ops.push_back(Elt: upgradeLoopArgument(MD)); |
5339 | |
5340 | return MDTuple::get(Context&: T->getContext(), MDs: Ops); |
5341 | } |
5342 | |
5343 | std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { |
5344 | Triple T(TT); |
5345 | // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting |
5346 | // the address space of globals to 1. This does not apply to SPIRV Logical. |
5347 | if (((T.isAMDGPU() && !T.isAMDGCN()) || |
5348 | (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) && |
5349 | !DL.contains(Other: "-G" ) && !DL.starts_with(Prefix: "G" )) { |
5350 | return DL.empty() ? std::string("G1" ) : (DL + "-G1" ).str(); |
5351 | } |
5352 | |
5353 | if (T.isLoongArch64() || T.isRISCV64()) { |
5354 | // Make i32 a native type for 64-bit LoongArch and RISC-V. |
5355 | auto I = DL.find(Str: "-n64-" ); |
5356 | if (I != StringRef::npos) |
5357 | return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str(); |
5358 | return DL.str(); |
5359 | } |
5360 | |
5361 | std::string Res = DL.str(); |
5362 | // AMDGCN data layout upgrades. |
5363 | if (T.isAMDGCN()) { |
5364 | // Define address spaces for constants. |
5365 | if (!DL.contains(Other: "-G" ) && !DL.starts_with(Prefix: "G" )) |
5366 | Res.append(s: Res.empty() ? "G1" : "-G1" ); |
5367 | |
5368 | // Add missing non-integral declarations. |
5369 | // This goes before adding new address spaces to prevent incoherent string |
5370 | // values. |
5371 | if (!DL.contains(Other: "-ni" ) && !DL.starts_with(Prefix: "ni" )) |
5372 | Res.append(s: "-ni:7:8:9" ); |
5373 | // Update ni:7 to ni:7:8:9. |
5374 | if (DL.ends_with(Suffix: "ni:7" )) |
5375 | Res.append(s: ":8:9" ); |
5376 | if (DL.ends_with(Suffix: "ni:7:8" )) |
5377 | Res.append(s: ":9" ); |
5378 | |
5379 | // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer |
5380 | // resources) An empty data layout has already been upgraded to G1 by now. |
5381 | if (!DL.contains(Other: "-p7" ) && !DL.starts_with(Prefix: "p7" )) |
5382 | Res.append(s: "-p7:160:256:256:32" ); |
5383 | if (!DL.contains(Other: "-p8" ) && !DL.starts_with(Prefix: "p8" )) |
5384 | Res.append(s: "-p8:128:128" ); |
5385 | if (!DL.contains(Other: "-p9" ) && !DL.starts_with(Prefix: "p9" )) |
5386 | Res.append(s: "-p9:192:256:256:32" ); |
5387 | |
5388 | return Res; |
5389 | } |
5390 | |
5391 | // AArch64 data layout upgrades. |
5392 | if (T.isAArch64()) { |
5393 | // Add "-Fn32" |
5394 | if (!DL.empty() && !DL.contains(Other: "-Fn32" )) |
5395 | Res.append(s: "-Fn32" ); |
5396 | return Res; |
5397 | } |
5398 | |
5399 | if (!T.isX86()) |
5400 | return Res; |
5401 | |
5402 | // If the datalayout matches the expected format, add pointer size address |
5403 | // spaces to the datalayout. |
5404 | std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64" ; |
5405 | if (StringRef Ref = Res; !Ref.contains(Other: AddrSpaces)) { |
5406 | SmallVector<StringRef, 4> Groups; |
5407 | Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)" ); |
5408 | if (R.match(String: Res, Matches: &Groups)) |
5409 | Res = (Groups[1] + AddrSpaces + Groups[3]).str(); |
5410 | } |
5411 | |
5412 | // i128 values need to be 16-byte-aligned. LLVM already called into libgcc |
5413 | // for i128 operations prior to this being reflected in the data layout, and |
5414 | // clang mostly produced LLVM IR that already aligned i128 to 16 byte |
5415 | // boundaries, so although this is a breaking change, the upgrade is expected |
5416 | // to fix more IR than it breaks. |
5417 | // Intel MCU is an exception and uses 4-byte-alignment. |
5418 | if (!T.isOSIAMCU()) { |
5419 | std::string I128 = "-i128:128" ; |
5420 | if (StringRef Ref = Res; !Ref.contains(Other: I128)) { |
5421 | SmallVector<StringRef, 4> Groups; |
5422 | Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$" ); |
5423 | if (R.match(String: Res, Matches: &Groups)) |
5424 | Res = (Groups[1] + I128 + Groups[3]).str(); |
5425 | } |
5426 | } |
5427 | |
5428 | // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes. |
5429 | // Raising the alignment is safe because Clang did not produce f80 values in |
5430 | // the MSVC environment before this upgrade was added. |
5431 | if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) { |
5432 | StringRef Ref = Res; |
5433 | auto I = Ref.find(Str: "-f80:32-" ); |
5434 | if (I != StringRef::npos) |
5435 | Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str(); |
5436 | } |
5437 | |
5438 | return Res; |
5439 | } |
5440 | |
5441 | void llvm::UpgradeAttributes(AttrBuilder &B) { |
5442 | StringRef FramePointer; |
5443 | Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim" ); |
5444 | if (A.isValid()) { |
5445 | // The value can be "true" or "false". |
5446 | FramePointer = A.getValueAsString() == "true" ? "all" : "none" ; |
5447 | B.removeAttribute(A: "no-frame-pointer-elim" ); |
5448 | } |
5449 | if (B.contains(A: "no-frame-pointer-elim-non-leaf" )) { |
5450 | // The value is ignored. "no-frame-pointer-elim"="true" takes priority. |
5451 | if (FramePointer != "all" ) |
5452 | FramePointer = "non-leaf" ; |
5453 | B.removeAttribute(A: "no-frame-pointer-elim-non-leaf" ); |
5454 | } |
5455 | if (!FramePointer.empty()) |
5456 | B.addAttribute(A: "frame-pointer" , V: FramePointer); |
5457 | |
5458 | A = B.getAttribute(Kind: "null-pointer-is-valid" ); |
5459 | if (A.isValid()) { |
5460 | // The value can be "true" or "false". |
5461 | bool NullPointerIsValid = A.getValueAsString() == "true" ; |
5462 | B.removeAttribute(A: "null-pointer-is-valid" ); |
5463 | if (NullPointerIsValid) |
5464 | B.addAttribute(Val: Attribute::NullPointerIsValid); |
5465 | } |
5466 | } |
5467 | |
5468 | void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) { |
5469 | // clang.arc.attachedcall bundles are now required to have an operand. |
5470 | // If they don't, it's okay to drop them entirely: when there is an operand, |
5471 | // the "attachedcall" is meaningful and required, but without an operand, |
5472 | // it's just a marker NOP. Dropping it merely prevents an optimization. |
5473 | erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) { |
5474 | return OBD.getTag() == "clang.arc.attachedcall" && |
5475 | OBD.inputs().empty(); |
5476 | }); |
5477 | } |
5478 | |