1 | //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the auto-upgrade helper functions. |
10 | // This is where deprecated IR intrinsics and other IR features are updated to |
11 | // current specifications. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/IR/AutoUpgrade.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/ADT/StringExtras.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/ADT/StringSwitch.h" |
20 | #include "llvm/BinaryFormat/Dwarf.h" |
21 | #include "llvm/IR/AttributeMask.h" |
22 | #include "llvm/IR/Attributes.h" |
23 | #include "llvm/IR/CallingConv.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/DebugInfo.h" |
26 | #include "llvm/IR/DebugInfoMetadata.h" |
27 | #include "llvm/IR/DiagnosticInfo.h" |
28 | #include "llvm/IR/Function.h" |
29 | #include "llvm/IR/IRBuilder.h" |
30 | #include "llvm/IR/InstVisitor.h" |
31 | #include "llvm/IR/Instruction.h" |
32 | #include "llvm/IR/IntrinsicInst.h" |
33 | #include "llvm/IR/Intrinsics.h" |
34 | #include "llvm/IR/IntrinsicsAArch64.h" |
35 | #include "llvm/IR/IntrinsicsARM.h" |
36 | #include "llvm/IR/IntrinsicsNVPTX.h" |
37 | #include "llvm/IR/IntrinsicsRISCV.h" |
38 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
39 | #include "llvm/IR/IntrinsicsX86.h" |
40 | #include "llvm/IR/LLVMContext.h" |
41 | #include "llvm/IR/MDBuilder.h" |
42 | #include "llvm/IR/Metadata.h" |
43 | #include "llvm/IR/Module.h" |
44 | #include "llvm/IR/Value.h" |
45 | #include "llvm/IR/Verifier.h" |
46 | #include "llvm/Support/AMDGPUAddrSpace.h" |
47 | #include "llvm/Support/CommandLine.h" |
48 | #include "llvm/Support/ErrorHandling.h" |
49 | #include "llvm/Support/NVPTXAddrSpace.h" |
50 | #include "llvm/Support/Regex.h" |
51 | #include "llvm/TargetParser/Triple.h" |
52 | #include <cstdint> |
53 | #include <cstring> |
54 | #include <numeric> |
55 | |
56 | using namespace llvm; |
57 | |
58 | static cl::opt<bool> |
59 | DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info" , |
60 | cl::desc("Disable autoupgrade of debug info" )); |
61 | |
62 | static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old" ); } |
63 | |
64 | // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have |
65 | // changed their type from v4f32 to v2i64. |
66 | static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, |
67 | Function *&NewFn) { |
68 | // Check whether this is an old version of the function, which received |
69 | // v4f32 arguments. |
70 | Type *Arg0Type = F->getFunctionType()->getParamType(i: 0); |
71 | if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4)) |
72 | return false; |
73 | |
74 | // Yes, it's old, replace it with new version. |
75 | rename(GV: F); |
76 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
77 | return true; |
78 | } |
79 | |
80 | // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask |
81 | // arguments have changed their type from i32 to i8. |
82 | static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, |
83 | Function *&NewFn) { |
84 | // Check that the last argument is an i32. |
85 | Type *LastArgType = F->getFunctionType()->getParamType( |
86 | i: F->getFunctionType()->getNumParams() - 1); |
87 | if (!LastArgType->isIntegerTy(Bitwidth: 32)) |
88 | return false; |
89 | |
90 | // Move this function aside and map down. |
91 | rename(GV: F); |
92 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
93 | return true; |
94 | } |
95 | |
96 | // Upgrade the declaration of fp compare intrinsics that change return type |
97 | // from scalar to vXi1 mask. |
98 | static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, |
99 | Function *&NewFn) { |
100 | // Check if the return type is a vector. |
101 | if (F->getReturnType()->isVectorTy()) |
102 | return false; |
103 | |
104 | rename(GV: F); |
105 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
106 | return true; |
107 | } |
108 | |
109 | static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, |
110 | Function *&NewFn) { |
111 | if (F->getReturnType()->getScalarType()->isBFloatTy()) |
112 | return false; |
113 | |
114 | rename(GV: F); |
115 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
116 | return true; |
117 | } |
118 | |
119 | static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, |
120 | Function *&NewFn) { |
121 | if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy()) |
122 | return false; |
123 | |
124 | rename(GV: F); |
125 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
126 | return true; |
127 | } |
128 | |
129 | static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) { |
130 | // All of the intrinsics matches below should be marked with which llvm |
131 | // version started autoupgrading them. At some point in the future we would |
132 | // like to use this information to remove upgrade code for some older |
133 | // intrinsics. It is currently undecided how we will determine that future |
134 | // point. |
135 | if (Name.consume_front(Prefix: "avx." )) |
136 | return (Name.starts_with(Prefix: "blend.p" ) || // Added in 3.7 |
137 | Name == "cvt.ps2.pd.256" || // Added in 3.9 |
138 | Name == "cvtdq2.pd.256" || // Added in 3.9 |
139 | Name == "cvtdq2.ps.256" || // Added in 7.0 |
140 | Name.starts_with(Prefix: "movnt." ) || // Added in 3.2 |
141 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
142 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
143 | Name.starts_with(Prefix: "vbroadcast.s" ) || // Added in 3.5 |
144 | Name.starts_with(Prefix: "vbroadcastf128" ) || // Added in 4.0 |
145 | Name.starts_with(Prefix: "vextractf128." ) || // Added in 3.7 |
146 | Name.starts_with(Prefix: "vinsertf128." ) || // Added in 3.7 |
147 | Name.starts_with(Prefix: "vperm2f128." ) || // Added in 6.0 |
148 | Name.starts_with(Prefix: "vpermil." )); // Added in 3.1 |
149 | |
150 | if (Name.consume_front(Prefix: "avx2." )) |
151 | return (Name == "movntdqa" || // Added in 5.0 |
152 | Name.starts_with(Prefix: "pabs." ) || // Added in 6.0 |
153 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
154 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
155 | Name.starts_with(Prefix: "pblendd." ) || // Added in 3.7 |
156 | Name == "pblendw" || // Added in 3.7 |
157 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 3.8 |
158 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.1 |
159 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.1 |
160 | Name.starts_with(Prefix: "pmax" ) || // Added in 3.9 |
161 | Name.starts_with(Prefix: "pmin" ) || // Added in 3.9 |
162 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 3.9 |
163 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 3.9 |
164 | Name == "pmul.dq" || // Added in 7.0 |
165 | Name == "pmulu.dq" || // Added in 7.0 |
166 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.7 |
167 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.7 |
168 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
169 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
170 | Name.starts_with(Prefix: "vbroadcast" ) || // Added in 3.8 |
171 | Name == "vbroadcasti128" || // Added in 3.7 |
172 | Name == "vextracti128" || // Added in 3.7 |
173 | Name == "vinserti128" || // Added in 3.7 |
174 | Name == "vperm2i128" ); // Added in 6.0 |
175 | |
176 | if (Name.consume_front(Prefix: "avx512." )) { |
177 | if (Name.consume_front(Prefix: "mask." )) |
178 | // 'avx512.mask.*' |
179 | return (Name.starts_with(Prefix: "add.p" ) || // Added in 7.0. 128/256 in 4.0 |
180 | Name.starts_with(Prefix: "and." ) || // Added in 3.9 |
181 | Name.starts_with(Prefix: "andn." ) || // Added in 3.9 |
182 | Name.starts_with(Prefix: "broadcast.s" ) || // Added in 3.9 |
183 | Name.starts_with(Prefix: "broadcastf32x4." ) || // Added in 6.0 |
184 | Name.starts_with(Prefix: "broadcastf32x8." ) || // Added in 6.0 |
185 | Name.starts_with(Prefix: "broadcastf64x2." ) || // Added in 6.0 |
186 | Name.starts_with(Prefix: "broadcastf64x4." ) || // Added in 6.0 |
187 | Name.starts_with(Prefix: "broadcasti32x4." ) || // Added in 6.0 |
188 | Name.starts_with(Prefix: "broadcasti32x8." ) || // Added in 6.0 |
189 | Name.starts_with(Prefix: "broadcasti64x2." ) || // Added in 6.0 |
190 | Name.starts_with(Prefix: "broadcasti64x4." ) || // Added in 6.0 |
191 | Name.starts_with(Prefix: "cmp.b" ) || // Added in 5.0 |
192 | Name.starts_with(Prefix: "cmp.d" ) || // Added in 5.0 |
193 | Name.starts_with(Prefix: "cmp.q" ) || // Added in 5.0 |
194 | Name.starts_with(Prefix: "cmp.w" ) || // Added in 5.0 |
195 | Name.starts_with(Prefix: "compress.b" ) || // Added in 9.0 |
196 | Name.starts_with(Prefix: "compress.d" ) || // Added in 9.0 |
197 | Name.starts_with(Prefix: "compress.p" ) || // Added in 9.0 |
198 | Name.starts_with(Prefix: "compress.q" ) || // Added in 9.0 |
199 | Name.starts_with(Prefix: "compress.store." ) || // Added in 7.0 |
200 | Name.starts_with(Prefix: "compress.w" ) || // Added in 9.0 |
201 | Name.starts_with(Prefix: "conflict." ) || // Added in 9.0 |
202 | Name.starts_with(Prefix: "cvtdq2pd." ) || // Added in 4.0 |
203 | Name.starts_with(Prefix: "cvtdq2ps." ) || // Added in 7.0 updated 9.0 |
204 | Name == "cvtpd2dq.256" || // Added in 7.0 |
205 | Name == "cvtpd2ps.256" || // Added in 7.0 |
206 | Name == "cvtps2pd.128" || // Added in 7.0 |
207 | Name == "cvtps2pd.256" || // Added in 7.0 |
208 | Name.starts_with(Prefix: "cvtqq2pd." ) || // Added in 7.0 updated 9.0 |
209 | Name == "cvtqq2ps.256" || // Added in 9.0 |
210 | Name == "cvtqq2ps.512" || // Added in 9.0 |
211 | Name == "cvttpd2dq.256" || // Added in 7.0 |
212 | Name == "cvttps2dq.128" || // Added in 7.0 |
213 | Name == "cvttps2dq.256" || // Added in 7.0 |
214 | Name.starts_with(Prefix: "cvtudq2pd." ) || // Added in 4.0 |
215 | Name.starts_with(Prefix: "cvtudq2ps." ) || // Added in 7.0 updated 9.0 |
216 | Name.starts_with(Prefix: "cvtuqq2pd." ) || // Added in 7.0 updated 9.0 |
217 | Name == "cvtuqq2ps.256" || // Added in 9.0 |
218 | Name == "cvtuqq2ps.512" || // Added in 9.0 |
219 | Name.starts_with(Prefix: "dbpsadbw." ) || // Added in 7.0 |
220 | Name.starts_with(Prefix: "div.p" ) || // Added in 7.0. 128/256 in 4.0 |
221 | Name.starts_with(Prefix: "expand.b" ) || // Added in 9.0 |
222 | Name.starts_with(Prefix: "expand.d" ) || // Added in 9.0 |
223 | Name.starts_with(Prefix: "expand.load." ) || // Added in 7.0 |
224 | Name.starts_with(Prefix: "expand.p" ) || // Added in 9.0 |
225 | Name.starts_with(Prefix: "expand.q" ) || // Added in 9.0 |
226 | Name.starts_with(Prefix: "expand.w" ) || // Added in 9.0 |
227 | Name.starts_with(Prefix: "fpclass.p" ) || // Added in 7.0 |
228 | Name.starts_with(Prefix: "insert" ) || // Added in 4.0 |
229 | Name.starts_with(Prefix: "load." ) || // Added in 3.9 |
230 | Name.starts_with(Prefix: "loadu." ) || // Added in 3.9 |
231 | Name.starts_with(Prefix: "lzcnt." ) || // Added in 5.0 |
232 | Name.starts_with(Prefix: "max.p" ) || // Added in 7.0. 128/256 in 5.0 |
233 | Name.starts_with(Prefix: "min.p" ) || // Added in 7.0. 128/256 in 5.0 |
234 | Name.starts_with(Prefix: "movddup" ) || // Added in 3.9 |
235 | Name.starts_with(Prefix: "move.s" ) || // Added in 4.0 |
236 | Name.starts_with(Prefix: "movshdup" ) || // Added in 3.9 |
237 | Name.starts_with(Prefix: "movsldup" ) || // Added in 3.9 |
238 | Name.starts_with(Prefix: "mul.p" ) || // Added in 7.0. 128/256 in 4.0 |
239 | Name.starts_with(Prefix: "or." ) || // Added in 3.9 |
240 | Name.starts_with(Prefix: "pabs." ) || // Added in 6.0 |
241 | Name.starts_with(Prefix: "packssdw." ) || // Added in 5.0 |
242 | Name.starts_with(Prefix: "packsswb." ) || // Added in 5.0 |
243 | Name.starts_with(Prefix: "packusdw." ) || // Added in 5.0 |
244 | Name.starts_with(Prefix: "packuswb." ) || // Added in 5.0 |
245 | Name.starts_with(Prefix: "padd." ) || // Added in 4.0 |
246 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
247 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
248 | Name.starts_with(Prefix: "palignr." ) || // Added in 3.9 |
249 | Name.starts_with(Prefix: "pand." ) || // Added in 3.9 |
250 | Name.starts_with(Prefix: "pandn." ) || // Added in 3.9 |
251 | Name.starts_with(Prefix: "pavg" ) || // Added in 6.0 |
252 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 6.0 |
253 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.9 |
254 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.9 |
255 | Name.starts_with(Prefix: "perm.df." ) || // Added in 3.9 |
256 | Name.starts_with(Prefix: "perm.di." ) || // Added in 3.9 |
257 | Name.starts_with(Prefix: "permvar." ) || // Added in 7.0 |
258 | Name.starts_with(Prefix: "pmaddubs.w." ) || // Added in 7.0 |
259 | Name.starts_with(Prefix: "pmaddw.d." ) || // Added in 7.0 |
260 | Name.starts_with(Prefix: "pmax" ) || // Added in 4.0 |
261 | Name.starts_with(Prefix: "pmin" ) || // Added in 4.0 |
262 | Name == "pmov.qd.256" || // Added in 9.0 |
263 | Name == "pmov.qd.512" || // Added in 9.0 |
264 | Name == "pmov.wb.256" || // Added in 9.0 |
265 | Name == "pmov.wb.512" || // Added in 9.0 |
266 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 4.0 |
267 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 4.0 |
268 | Name.starts_with(Prefix: "pmul.dq." ) || // Added in 4.0 |
269 | Name.starts_with(Prefix: "pmul.hr.sw." ) || // Added in 7.0 |
270 | Name.starts_with(Prefix: "pmulh.w." ) || // Added in 7.0 |
271 | Name.starts_with(Prefix: "pmulhu.w." ) || // Added in 7.0 |
272 | Name.starts_with(Prefix: "pmull." ) || // Added in 4.0 |
273 | Name.starts_with(Prefix: "pmultishift.qb." ) || // Added in 8.0 |
274 | Name.starts_with(Prefix: "pmulu.dq." ) || // Added in 4.0 |
275 | Name.starts_with(Prefix: "por." ) || // Added in 3.9 |
276 | Name.starts_with(Prefix: "prol." ) || // Added in 8.0 |
277 | Name.starts_with(Prefix: "prolv." ) || // Added in 8.0 |
278 | Name.starts_with(Prefix: "pror." ) || // Added in 8.0 |
279 | Name.starts_with(Prefix: "prorv." ) || // Added in 8.0 |
280 | Name.starts_with(Prefix: "pshuf.b." ) || // Added in 4.0 |
281 | Name.starts_with(Prefix: "pshuf.d." ) || // Added in 3.9 |
282 | Name.starts_with(Prefix: "pshufh.w." ) || // Added in 3.9 |
283 | Name.starts_with(Prefix: "pshufl.w." ) || // Added in 3.9 |
284 | Name.starts_with(Prefix: "psll.d" ) || // Added in 4.0 |
285 | Name.starts_with(Prefix: "psll.q" ) || // Added in 4.0 |
286 | Name.starts_with(Prefix: "psll.w" ) || // Added in 4.0 |
287 | Name.starts_with(Prefix: "pslli" ) || // Added in 4.0 |
288 | Name.starts_with(Prefix: "psllv" ) || // Added in 4.0 |
289 | Name.starts_with(Prefix: "psra.d" ) || // Added in 4.0 |
290 | Name.starts_with(Prefix: "psra.q" ) || // Added in 4.0 |
291 | Name.starts_with(Prefix: "psra.w" ) || // Added in 4.0 |
292 | Name.starts_with(Prefix: "psrai" ) || // Added in 4.0 |
293 | Name.starts_with(Prefix: "psrav" ) || // Added in 4.0 |
294 | Name.starts_with(Prefix: "psrl.d" ) || // Added in 4.0 |
295 | Name.starts_with(Prefix: "psrl.q" ) || // Added in 4.0 |
296 | Name.starts_with(Prefix: "psrl.w" ) || // Added in 4.0 |
297 | Name.starts_with(Prefix: "psrli" ) || // Added in 4.0 |
298 | Name.starts_with(Prefix: "psrlv" ) || // Added in 4.0 |
299 | Name.starts_with(Prefix: "psub." ) || // Added in 4.0 |
300 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
301 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
302 | Name.starts_with(Prefix: "pternlog." ) || // Added in 7.0 |
303 | Name.starts_with(Prefix: "punpckh" ) || // Added in 3.9 |
304 | Name.starts_with(Prefix: "punpckl" ) || // Added in 3.9 |
305 | Name.starts_with(Prefix: "pxor." ) || // Added in 3.9 |
306 | Name.starts_with(Prefix: "shuf.f" ) || // Added in 6.0 |
307 | Name.starts_with(Prefix: "shuf.i" ) || // Added in 6.0 |
308 | Name.starts_with(Prefix: "shuf.p" ) || // Added in 4.0 |
309 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
310 | Name.starts_with(Prefix: "store.b." ) || // Added in 3.9 |
311 | Name.starts_with(Prefix: "store.d." ) || // Added in 3.9 |
312 | Name.starts_with(Prefix: "store.p" ) || // Added in 3.9 |
313 | Name.starts_with(Prefix: "store.q." ) || // Added in 3.9 |
314 | Name.starts_with(Prefix: "store.w." ) || // Added in 3.9 |
315 | Name == "store.ss" || // Added in 7.0 |
316 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
317 | Name.starts_with(Prefix: "sub.p" ) || // Added in 7.0. 128/256 in 4.0 |
318 | Name.starts_with(Prefix: "ucmp." ) || // Added in 5.0 |
319 | Name.starts_with(Prefix: "unpckh." ) || // Added in 3.9 |
320 | Name.starts_with(Prefix: "unpckl." ) || // Added in 3.9 |
321 | Name.starts_with(Prefix: "valign." ) || // Added in 4.0 |
322 | Name == "vcvtph2ps.128" || // Added in 11.0 |
323 | Name == "vcvtph2ps.256" || // Added in 11.0 |
324 | Name.starts_with(Prefix: "vextract" ) || // Added in 4.0 |
325 | Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
326 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
327 | Name.starts_with(Prefix: "vfnmadd." ) || // Added in 7.0 |
328 | Name.starts_with(Prefix: "vfnmsub." ) || // Added in 7.0 |
329 | Name.starts_with(Prefix: "vpdpbusd." ) || // Added in 7.0 |
330 | Name.starts_with(Prefix: "vpdpbusds." ) || // Added in 7.0 |
331 | Name.starts_with(Prefix: "vpdpwssd." ) || // Added in 7.0 |
332 | Name.starts_with(Prefix: "vpdpwssds." ) || // Added in 7.0 |
333 | Name.starts_with(Prefix: "vpermi2var." ) || // Added in 7.0 |
334 | Name.starts_with(Prefix: "vpermil.p" ) || // Added in 3.9 |
335 | Name.starts_with(Prefix: "vpermilvar." ) || // Added in 4.0 |
336 | Name.starts_with(Prefix: "vpermt2var." ) || // Added in 7.0 |
337 | Name.starts_with(Prefix: "vpmadd52" ) || // Added in 7.0 |
338 | Name.starts_with(Prefix: "vpshld." ) || // Added in 7.0 |
339 | Name.starts_with(Prefix: "vpshldv." ) || // Added in 8.0 |
340 | Name.starts_with(Prefix: "vpshrd." ) || // Added in 7.0 |
341 | Name.starts_with(Prefix: "vpshrdv." ) || // Added in 8.0 |
342 | Name.starts_with(Prefix: "vpshufbitqmb." ) || // Added in 8.0 |
343 | Name.starts_with(Prefix: "xor." )); // Added in 3.9 |
344 | |
345 | if (Name.consume_front(Prefix: "mask3." )) |
346 | // 'avx512.mask3.*' |
347 | return (Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
348 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
349 | Name.starts_with(Prefix: "vfmsub." ) || // Added in 7.0 |
350 | Name.starts_with(Prefix: "vfmsubadd." ) || // Added in 7.0 |
351 | Name.starts_with(Prefix: "vfnmsub." )); // Added in 7.0 |
352 | |
353 | if (Name.consume_front(Prefix: "maskz." )) |
354 | // 'avx512.maskz.*' |
355 | return (Name.starts_with(Prefix: "pternlog." ) || // Added in 7.0 |
356 | Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
357 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
358 | Name.starts_with(Prefix: "vpdpbusd." ) || // Added in 7.0 |
359 | Name.starts_with(Prefix: "vpdpbusds." ) || // Added in 7.0 |
360 | Name.starts_with(Prefix: "vpdpwssd." ) || // Added in 7.0 |
361 | Name.starts_with(Prefix: "vpdpwssds." ) || // Added in 7.0 |
362 | Name.starts_with(Prefix: "vpermt2var." ) || // Added in 7.0 |
363 | Name.starts_with(Prefix: "vpmadd52" ) || // Added in 7.0 |
364 | Name.starts_with(Prefix: "vpshldv." ) || // Added in 8.0 |
365 | Name.starts_with(Prefix: "vpshrdv." )); // Added in 8.0 |
366 | |
367 | // 'avx512.*' |
368 | return (Name == "movntdqa" || // Added in 5.0 |
369 | Name == "pmul.dq.512" || // Added in 7.0 |
370 | Name == "pmulu.dq.512" || // Added in 7.0 |
371 | Name.starts_with(Prefix: "broadcastm" ) || // Added in 6.0 |
372 | Name.starts_with(Prefix: "cmp.p" ) || // Added in 12.0 |
373 | Name.starts_with(Prefix: "cvtb2mask." ) || // Added in 7.0 |
374 | Name.starts_with(Prefix: "cvtd2mask." ) || // Added in 7.0 |
375 | Name.starts_with(Prefix: "cvtmask2" ) || // Added in 5.0 |
376 | Name.starts_with(Prefix: "cvtq2mask." ) || // Added in 7.0 |
377 | Name == "cvtusi2sd" || // Added in 7.0 |
378 | Name.starts_with(Prefix: "cvtw2mask." ) || // Added in 7.0 |
379 | Name == "kand.w" || // Added in 7.0 |
380 | Name == "kandn.w" || // Added in 7.0 |
381 | Name == "knot.w" || // Added in 7.0 |
382 | Name == "kor.w" || // Added in 7.0 |
383 | Name == "kortestc.w" || // Added in 7.0 |
384 | Name == "kortestz.w" || // Added in 7.0 |
385 | Name.starts_with(Prefix: "kunpck" ) || // added in 6.0 |
386 | Name == "kxnor.w" || // Added in 7.0 |
387 | Name == "kxor.w" || // Added in 7.0 |
388 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
389 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 3.9 |
390 | Name.starts_with(Prefix: "prol" ) || // Added in 8.0 |
391 | Name.starts_with(Prefix: "pror" ) || // Added in 8.0 |
392 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.9 |
393 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.9 |
394 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
395 | Name.starts_with(Prefix: "ptestm" ) || // Added in 6.0 |
396 | Name.starts_with(Prefix: "ptestnm" ) || // Added in 6.0 |
397 | Name.starts_with(Prefix: "storent." ) || // Added in 3.9 |
398 | Name.starts_with(Prefix: "vbroadcast.s" ) || // Added in 7.0 |
399 | Name.starts_with(Prefix: "vpshld." ) || // Added in 8.0 |
400 | Name.starts_with(Prefix: "vpshrd." )); // Added in 8.0 |
401 | } |
402 | |
403 | if (Name.consume_front(Prefix: "fma." )) |
404 | return (Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
405 | Name.starts_with(Prefix: "vfmsub." ) || // Added in 7.0 |
406 | Name.starts_with(Prefix: "vfmsubadd." ) || // Added in 7.0 |
407 | Name.starts_with(Prefix: "vfnmadd." ) || // Added in 7.0 |
408 | Name.starts_with(Prefix: "vfnmsub." )); // Added in 7.0 |
409 | |
410 | if (Name.consume_front(Prefix: "fma4." )) |
411 | return Name.starts_with(Prefix: "vfmadd.s" ); // Added in 7.0 |
412 | |
413 | if (Name.consume_front(Prefix: "sse." )) |
414 | return (Name == "add.ss" || // Added in 4.0 |
415 | Name == "cvtsi2ss" || // Added in 7.0 |
416 | Name == "cvtsi642ss" || // Added in 7.0 |
417 | Name == "div.ss" || // Added in 4.0 |
418 | Name == "mul.ss" || // Added in 4.0 |
419 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
420 | Name == "sqrt.ss" || // Added in 7.0 |
421 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
422 | Name == "sub.ss" ); // Added in 4.0 |
423 | |
424 | if (Name.consume_front(Prefix: "sse2." )) |
425 | return (Name == "add.sd" || // Added in 4.0 |
426 | Name == "cvtdq2pd" || // Added in 3.9 |
427 | Name == "cvtdq2ps" || // Added in 7.0 |
428 | Name == "cvtps2pd" || // Added in 3.9 |
429 | Name == "cvtsi2sd" || // Added in 7.0 |
430 | Name == "cvtsi642sd" || // Added in 7.0 |
431 | Name == "cvtss2sd" || // Added in 7.0 |
432 | Name == "div.sd" || // Added in 4.0 |
433 | Name == "mul.sd" || // Added in 4.0 |
434 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
435 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
436 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.1 |
437 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.1 |
438 | Name == "pmaxs.w" || // Added in 3.9 |
439 | Name == "pmaxu.b" || // Added in 3.9 |
440 | Name == "pmins.w" || // Added in 3.9 |
441 | Name == "pminu.b" || // Added in 3.9 |
442 | Name == "pmulu.dq" || // Added in 7.0 |
443 | Name.starts_with(Prefix: "pshuf" ) || // Added in 3.9 |
444 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.7 |
445 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.7 |
446 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
447 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
448 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
449 | Name == "sqrt.sd" || // Added in 7.0 |
450 | Name == "storel.dq" || // Added in 3.9 |
451 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
452 | Name == "sub.sd" ); // Added in 4.0 |
453 | |
454 | if (Name.consume_front(Prefix: "sse41." )) |
455 | return (Name.starts_with(Prefix: "blendp" ) || // Added in 3.7 |
456 | Name == "movntdqa" || // Added in 5.0 |
457 | Name == "pblendw" || // Added in 3.7 |
458 | Name == "pmaxsb" || // Added in 3.9 |
459 | Name == "pmaxsd" || // Added in 3.9 |
460 | Name == "pmaxud" || // Added in 3.9 |
461 | Name == "pmaxuw" || // Added in 3.9 |
462 | Name == "pminsb" || // Added in 3.9 |
463 | Name == "pminsd" || // Added in 3.9 |
464 | Name == "pminud" || // Added in 3.9 |
465 | Name == "pminuw" || // Added in 3.9 |
466 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 3.8 |
467 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 3.9 |
468 | Name == "pmuldq" ); // Added in 7.0 |
469 | |
470 | if (Name.consume_front(Prefix: "sse42." )) |
471 | return Name == "crc32.64.8" ; // Added in 3.4 |
472 | |
473 | if (Name.consume_front(Prefix: "sse4a." )) |
474 | return Name.starts_with(Prefix: "movnt." ); // Added in 3.9 |
475 | |
476 | if (Name.consume_front(Prefix: "ssse3." )) |
477 | return (Name == "pabs.b.128" || // Added in 6.0 |
478 | Name == "pabs.d.128" || // Added in 6.0 |
479 | Name == "pabs.w.128" ); // Added in 6.0 |
480 | |
481 | if (Name.consume_front(Prefix: "xop." )) |
482 | return (Name == "vpcmov" || // Added in 3.8 |
483 | Name == "vpcmov.256" || // Added in 5.0 |
484 | Name.starts_with(Prefix: "vpcom" ) || // Added in 3.2, Updated in 9.0 |
485 | Name.starts_with(Prefix: "vprot" )); // Added in 8.0 |
486 | |
487 | return (Name == "addcarry.u32" || // Added in 8.0 |
488 | Name == "addcarry.u64" || // Added in 8.0 |
489 | Name == "addcarryx.u32" || // Added in 8.0 |
490 | Name == "addcarryx.u64" || // Added in 8.0 |
491 | Name == "subborrow.u32" || // Added in 8.0 |
492 | Name == "subborrow.u64" || // Added in 8.0 |
493 | Name.starts_with(Prefix: "vcvtph2ps." )); // Added in 11.0 |
494 | } |
495 | |
496 | static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, |
497 | Function *&NewFn) { |
498 | // Only handle intrinsics that start with "x86.". |
499 | if (!Name.consume_front(Prefix: "x86." )) |
500 | return false; |
501 | |
502 | if (shouldUpgradeX86Intrinsic(F, Name)) { |
503 | NewFn = nullptr; |
504 | return true; |
505 | } |
506 | |
507 | if (Name == "rdtscp" ) { // Added in 8.0 |
508 | // If this intrinsic has 0 operands, it's the new version. |
509 | if (F->getFunctionType()->getNumParams() == 0) |
510 | return false; |
511 | |
512 | rename(GV: F); |
513 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
514 | id: Intrinsic::x86_rdtscp); |
515 | return true; |
516 | } |
517 | |
518 | Intrinsic::ID ID; |
519 | |
520 | // SSE4.1 ptest functions may have an old signature. |
521 | if (Name.consume_front(Prefix: "sse41.ptest" )) { // Added in 3.2 |
522 | ID = StringSwitch<Intrinsic::ID>(Name) |
523 | .Case(S: "c" , Value: Intrinsic::x86_sse41_ptestc) |
524 | .Case(S: "z" , Value: Intrinsic::x86_sse41_ptestz) |
525 | .Case(S: "nzc" , Value: Intrinsic::x86_sse41_ptestnzc) |
526 | .Default(Value: Intrinsic::not_intrinsic); |
527 | if (ID != Intrinsic::not_intrinsic) |
528 | return upgradePTESTIntrinsic(F, IID: ID, NewFn); |
529 | |
530 | return false; |
531 | } |
532 | |
533 | // Several blend and other instructions with masks used the wrong number of |
534 | // bits. |
535 | |
536 | // Added in 3.6 |
537 | ID = StringSwitch<Intrinsic::ID>(Name) |
538 | .Case(S: "sse41.insertps" , Value: Intrinsic::x86_sse41_insertps) |
539 | .Case(S: "sse41.dppd" , Value: Intrinsic::x86_sse41_dppd) |
540 | .Case(S: "sse41.dpps" , Value: Intrinsic::x86_sse41_dpps) |
541 | .Case(S: "sse41.mpsadbw" , Value: Intrinsic::x86_sse41_mpsadbw) |
542 | .Case(S: "avx.dp.ps.256" , Value: Intrinsic::x86_avx_dp_ps_256) |
543 | .Case(S: "avx2.mpsadbw" , Value: Intrinsic::x86_avx2_mpsadbw) |
544 | .Default(Value: Intrinsic::not_intrinsic); |
545 | if (ID != Intrinsic::not_intrinsic) |
546 | return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn); |
547 | |
548 | if (Name.consume_front(Prefix: "avx512.mask.cmp." )) { |
549 | // Added in 7.0 |
550 | ID = StringSwitch<Intrinsic::ID>(Name) |
551 | .Case(S: "pd.128" , Value: Intrinsic::x86_avx512_mask_cmp_pd_128) |
552 | .Case(S: "pd.256" , Value: Intrinsic::x86_avx512_mask_cmp_pd_256) |
553 | .Case(S: "pd.512" , Value: Intrinsic::x86_avx512_mask_cmp_pd_512) |
554 | .Case(S: "ps.128" , Value: Intrinsic::x86_avx512_mask_cmp_ps_128) |
555 | .Case(S: "ps.256" , Value: Intrinsic::x86_avx512_mask_cmp_ps_256) |
556 | .Case(S: "ps.512" , Value: Intrinsic::x86_avx512_mask_cmp_ps_512) |
557 | .Default(Value: Intrinsic::not_intrinsic); |
558 | if (ID != Intrinsic::not_intrinsic) |
559 | return upgradeX86MaskedFPCompare(F, IID: ID, NewFn); |
560 | return false; // No other 'x86.avx523.mask.cmp.*'. |
561 | } |
562 | |
563 | if (Name.consume_front(Prefix: "avx512bf16." )) { |
564 | // Added in 9.0 |
565 | ID = StringSwitch<Intrinsic::ID>(Name) |
566 | .Case(S: "cvtne2ps2bf16.128" , |
567 | Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128) |
568 | .Case(S: "cvtne2ps2bf16.256" , |
569 | Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256) |
570 | .Case(S: "cvtne2ps2bf16.512" , |
571 | Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512) |
572 | .Case(S: "mask.cvtneps2bf16.128" , |
573 | Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) |
574 | .Case(S: "cvtneps2bf16.256" , |
575 | Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256) |
576 | .Case(S: "cvtneps2bf16.512" , |
577 | Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512) |
578 | .Default(Value: Intrinsic::not_intrinsic); |
579 | if (ID != Intrinsic::not_intrinsic) |
580 | return upgradeX86BF16Intrinsic(F, IID: ID, NewFn); |
581 | |
582 | // Added in 9.0 |
583 | ID = StringSwitch<Intrinsic::ID>(Name) |
584 | .Case(S: "dpbf16ps.128" , Value: Intrinsic::x86_avx512bf16_dpbf16ps_128) |
585 | .Case(S: "dpbf16ps.256" , Value: Intrinsic::x86_avx512bf16_dpbf16ps_256) |
586 | .Case(S: "dpbf16ps.512" , Value: Intrinsic::x86_avx512bf16_dpbf16ps_512) |
587 | .Default(Value: Intrinsic::not_intrinsic); |
588 | if (ID != Intrinsic::not_intrinsic) |
589 | return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn); |
590 | return false; // No other 'x86.avx512bf16.*'. |
591 | } |
592 | |
593 | if (Name.consume_front(Prefix: "xop." )) { |
594 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
595 | if (Name.starts_with(Prefix: "vpermil2" )) { // Added in 3.9 |
596 | // Upgrade any XOP PERMIL2 index operand still using a float/double |
597 | // vector. |
598 | auto Idx = F->getFunctionType()->getParamType(i: 2); |
599 | if (Idx->isFPOrFPVectorTy()) { |
600 | unsigned IdxSize = Idx->getPrimitiveSizeInBits(); |
601 | unsigned EltSize = Idx->getScalarSizeInBits(); |
602 | if (EltSize == 64 && IdxSize == 128) |
603 | ID = Intrinsic::x86_xop_vpermil2pd; |
604 | else if (EltSize == 32 && IdxSize == 128) |
605 | ID = Intrinsic::x86_xop_vpermil2ps; |
606 | else if (EltSize == 64 && IdxSize == 256) |
607 | ID = Intrinsic::x86_xop_vpermil2pd_256; |
608 | else |
609 | ID = Intrinsic::x86_xop_vpermil2ps_256; |
610 | } |
611 | } else if (F->arg_size() == 2) |
612 | // frcz.ss/sd may need to have an argument dropped. Added in 3.2 |
613 | ID = StringSwitch<Intrinsic::ID>(Name) |
614 | .Case(S: "vfrcz.ss" , Value: Intrinsic::x86_xop_vfrcz_ss) |
615 | .Case(S: "vfrcz.sd" , Value: Intrinsic::x86_xop_vfrcz_sd) |
616 | .Default(Value: Intrinsic::not_intrinsic); |
617 | |
618 | if (ID != Intrinsic::not_intrinsic) { |
619 | rename(GV: F); |
620 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
621 | return true; |
622 | } |
623 | return false; // No other 'x86.xop.*' |
624 | } |
625 | |
626 | if (Name == "seh.recoverfp" ) { |
627 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
628 | id: Intrinsic::eh_recoverfp); |
629 | return true; |
630 | } |
631 | |
632 | return false; |
633 | } |
634 | |
635 | // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so. |
636 | // IsArm: 'arm.*', !IsArm: 'aarch64.*'. |
637 | static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, |
638 | StringRef Name, |
639 | Function *&NewFn) { |
640 | if (Name.starts_with(Prefix: "rbit" )) { |
641 | // '(arm|aarch64).rbit'. |
642 | NewFn = Intrinsic::getOrInsertDeclaration( |
643 | M: F->getParent(), id: Intrinsic::bitreverse, Tys: F->arg_begin()->getType()); |
644 | return true; |
645 | } |
646 | |
647 | if (Name == "thread.pointer" ) { |
648 | // '(arm|aarch64).thread.pointer'. |
649 | NewFn = Intrinsic::getOrInsertDeclaration( |
650 | M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType()); |
651 | return true; |
652 | } |
653 | |
654 | bool Neon = Name.consume_front(Prefix: "neon." ); |
655 | if (Neon) { |
656 | // '(arm|aarch64).neon.*'. |
657 | // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and |
658 | // v16i8 respectively. |
659 | if (Name.consume_front(Prefix: "bfdot." )) { |
660 | // (arm|aarch64).neon.bfdot.*'. |
661 | Intrinsic::ID ID = |
662 | StringSwitch<Intrinsic::ID>(Name) |
663 | .Cases(S0: "v2f32.v8i8" , S1: "v4f32.v16i8" , |
664 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot |
665 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot) |
666 | .Default(Value: Intrinsic::not_intrinsic); |
667 | if (ID != Intrinsic::not_intrinsic) { |
668 | size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits(); |
669 | assert((OperandWidth == 64 || OperandWidth == 128) && |
670 | "Unexpected operand width" ); |
671 | LLVMContext &Ctx = F->getParent()->getContext(); |
672 | std::array<Type *, 2> Tys{ |
673 | ._M_elems: {F->getReturnType(), |
674 | FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}}; |
675 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys); |
676 | return true; |
677 | } |
678 | return false; // No other '(arm|aarch64).neon.bfdot.*'. |
679 | } |
680 | |
681 | // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic |
682 | // anymore and accept v8bf16 instead of v16i8. |
683 | if (Name.consume_front(Prefix: "bfm" )) { |
684 | // (arm|aarch64).neon.bfm*'. |
685 | if (Name.consume_back(Suffix: ".v4f32.v16i8" )) { |
686 | // (arm|aarch64).neon.bfm*.v4f32.v16i8'. |
687 | Intrinsic::ID ID = |
688 | StringSwitch<Intrinsic::ID>(Name) |
689 | .Case(S: "mla" , |
690 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla |
691 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla) |
692 | .Case(S: "lalb" , |
693 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb |
694 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb) |
695 | .Case(S: "lalt" , |
696 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt |
697 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt) |
698 | .Default(Value: Intrinsic::not_intrinsic); |
699 | if (ID != Intrinsic::not_intrinsic) { |
700 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
701 | return true; |
702 | } |
703 | return false; // No other '(arm|aarch64).neon.bfm*.v16i8'. |
704 | } |
705 | return false; // No other '(arm|aarch64).neon.bfm*. |
706 | } |
707 | // Continue on to Aarch64 Neon or Arm Neon. |
708 | } |
709 | // Continue on to Arm or Aarch64. |
710 | |
711 | if (IsArm) { |
712 | // 'arm.*'. |
713 | if (Neon) { |
714 | // 'arm.neon.*'. |
715 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
716 | .StartsWith(S: "vclz." , Value: Intrinsic::ctlz) |
717 | .StartsWith(S: "vcnt." , Value: Intrinsic::ctpop) |
718 | .StartsWith(S: "vqadds." , Value: Intrinsic::sadd_sat) |
719 | .StartsWith(S: "vqaddu." , Value: Intrinsic::uadd_sat) |
720 | .StartsWith(S: "vqsubs." , Value: Intrinsic::ssub_sat) |
721 | .StartsWith(S: "vqsubu." , Value: Intrinsic::usub_sat) |
722 | .StartsWith(S: "vrinta." , Value: Intrinsic::round) |
723 | .StartsWith(S: "vrintm." , Value: Intrinsic::floor) |
724 | .StartsWith(S: "vrintp." , Value: Intrinsic::ceil) |
725 | .StartsWith(S: "vrintx." , Value: Intrinsic::rint) |
726 | .StartsWith(S: "vrintz." , Value: Intrinsic::trunc) |
727 | .Default(Value: Intrinsic::not_intrinsic); |
728 | if (ID != Intrinsic::not_intrinsic) { |
729 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, |
730 | Tys: F->arg_begin()->getType()); |
731 | return true; |
732 | } |
733 | |
734 | if (Name.consume_front(Prefix: "vst" )) { |
735 | // 'arm.neon.vst*'. |
736 | static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$" ); |
737 | SmallVector<StringRef, 2> Groups; |
738 | if (vstRegex.match(String: Name, Matches: &Groups)) { |
739 | static const Intrinsic::ID StoreInts[] = { |
740 | Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, |
741 | Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; |
742 | |
743 | static const Intrinsic::ID StoreLaneInts[] = { |
744 | Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, |
745 | Intrinsic::arm_neon_vst4lane}; |
746 | |
747 | auto fArgs = F->getFunctionType()->params(); |
748 | Type *Tys[] = {fArgs[0], fArgs[1]}; |
749 | if (Groups[1].size() == 1) |
750 | NewFn = Intrinsic::getOrInsertDeclaration( |
751 | M: F->getParent(), id: StoreInts[fArgs.size() - 3], Tys); |
752 | else |
753 | NewFn = Intrinsic::getOrInsertDeclaration( |
754 | M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys); |
755 | return true; |
756 | } |
757 | return false; // No other 'arm.neon.vst*'. |
758 | } |
759 | |
760 | return false; // No other 'arm.neon.*'. |
761 | } |
762 | |
763 | if (Name.consume_front(Prefix: "mve." )) { |
764 | // 'arm.mve.*'. |
765 | if (Name == "vctp64" ) { |
766 | if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) { |
767 | // A vctp64 returning a v4i1 is converted to return a v2i1. Rename |
768 | // the function and deal with it below in UpgradeIntrinsicCall. |
769 | rename(GV: F); |
770 | return true; |
771 | } |
772 | return false; // Not 'arm.mve.vctp64'. |
773 | } |
774 | |
775 | if (Name.starts_with(Prefix: "vrintn.v" )) { |
776 | NewFn = Intrinsic::getOrInsertDeclaration( |
777 | M: F->getParent(), id: Intrinsic::roundeven, Tys: F->arg_begin()->getType()); |
778 | return true; |
779 | } |
780 | |
781 | // These too are changed to accept a v2i1 instead of the old v4i1. |
782 | if (Name.consume_back(Suffix: ".v4i1" )) { |
783 | // 'arm.mve.*.v4i1'. |
784 | if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32" )) |
785 | // 'arm.mve.*.predicated.v2i64.v4i32.v4i1' |
786 | return Name == "mull.int" || Name == "vqdmull" ; |
787 | |
788 | if (Name.consume_back(Suffix: ".v2i64" )) { |
789 | // 'arm.mve.*.v2i64.v4i1' |
790 | bool IsGather = Name.consume_front(Prefix: "vldr.gather." ); |
791 | if (IsGather || Name.consume_front(Prefix: "vstr.scatter." )) { |
792 | if (Name.consume_front(Prefix: "base." )) { |
793 | // Optional 'wb.' prefix. |
794 | Name.consume_front(Prefix: "wb." ); |
795 | // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)? |
796 | // predicated.v2i64.v2i64.v4i1'. |
797 | return Name == "predicated.v2i64" ; |
798 | } |
799 | |
800 | if (Name.consume_front(Prefix: "offset.predicated." )) |
801 | return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64" ) || |
802 | Name == (IsGather ? "v2i64.p0" : "p0.v2i64" ); |
803 | |
804 | // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'. |
805 | return false; |
806 | } |
807 | |
808 | return false; // No other 'arm.mve.*.v2i64.v4i1'. |
809 | } |
810 | return false; // No other 'arm.mve.*.v4i1'. |
811 | } |
812 | return false; // No other 'arm.mve.*'. |
813 | } |
814 | |
815 | if (Name.consume_front(Prefix: "cde.vcx" )) { |
816 | // 'arm.cde.vcx*'. |
817 | if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1" )) |
818 | // 'arm.cde.vcx*.predicated.v2i64.v4i1'. |
819 | return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" || |
820 | Name == "3q" || Name == "3qa" ; |
821 | |
822 | return false; // No other 'arm.cde.vcx*'. |
823 | } |
824 | } else { |
825 | // 'aarch64.*'. |
826 | if (Neon) { |
827 | // 'aarch64.neon.*'. |
828 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
829 | .StartsWith(S: "frintn" , Value: Intrinsic::roundeven) |
830 | .StartsWith(S: "rbit" , Value: Intrinsic::bitreverse) |
831 | .Default(Value: Intrinsic::not_intrinsic); |
832 | if (ID != Intrinsic::not_intrinsic) { |
833 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, |
834 | Tys: F->arg_begin()->getType()); |
835 | return true; |
836 | } |
837 | |
838 | if (Name.starts_with(Prefix: "addp" )) { |
839 | // 'aarch64.neon.addp*'. |
840 | if (F->arg_size() != 2) |
841 | return false; // Invalid IR. |
842 | VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType()); |
843 | if (Ty && Ty->getElementType()->isFloatingPointTy()) { |
844 | NewFn = Intrinsic::getOrInsertDeclaration( |
845 | M: F->getParent(), id: Intrinsic::aarch64_neon_faddp, Tys: Ty); |
846 | return true; |
847 | } |
848 | } |
849 | |
850 | // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc. |
851 | if (Name.starts_with(Prefix: "bfcvt" )) { |
852 | NewFn = nullptr; |
853 | return true; |
854 | } |
855 | |
856 | return false; // No other 'aarch64.neon.*'. |
857 | } |
858 | if (Name.consume_front(Prefix: "sve." )) { |
859 | // 'aarch64.sve.*'. |
860 | if (Name.consume_front(Prefix: "bf" )) { |
861 | if (Name.consume_back(Suffix: ".lane" )) { |
862 | // 'aarch64.sve.bf*.lane'. |
863 | Intrinsic::ID ID = |
864 | StringSwitch<Intrinsic::ID>(Name) |
865 | .Case(S: "dot" , Value: Intrinsic::aarch64_sve_bfdot_lane_v2) |
866 | .Case(S: "mlalb" , Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2) |
867 | .Case(S: "mlalt" , Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2) |
868 | .Default(Value: Intrinsic::not_intrinsic); |
869 | if (ID != Intrinsic::not_intrinsic) { |
870 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
871 | return true; |
872 | } |
873 | return false; // No other 'aarch64.sve.bf*.lane'. |
874 | } |
875 | return false; // No other 'aarch64.sve.bf*'. |
876 | } |
877 | |
878 | // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32' |
879 | if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32" ) { |
880 | NewFn = nullptr; |
881 | return true; |
882 | } |
883 | |
884 | if (Name.consume_front(Prefix: "addqv" )) { |
885 | // 'aarch64.sve.addqv'. |
886 | if (!F->getReturnType()->isFPOrFPVectorTy()) |
887 | return false; |
888 | |
889 | auto Args = F->getFunctionType()->params(); |
890 | Type *Tys[] = {F->getReturnType(), Args[1]}; |
891 | NewFn = Intrinsic::getOrInsertDeclaration( |
892 | M: F->getParent(), id: Intrinsic::aarch64_sve_faddqv, Tys); |
893 | return true; |
894 | } |
895 | |
896 | if (Name.consume_front(Prefix: "ld" )) { |
897 | // 'aarch64.sve.ld*'. |
898 | static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)" ); |
899 | if (LdRegex.match(String: Name)) { |
900 | Type *ScalarTy = |
901 | cast<VectorType>(Val: F->getReturnType())->getElementType(); |
902 | ElementCount EC = |
903 | cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount(); |
904 | Type *Ty = VectorType::get(ElementType: ScalarTy, EC); |
905 | static const Intrinsic::ID LoadIDs[] = { |
906 | Intrinsic::aarch64_sve_ld2_sret, |
907 | Intrinsic::aarch64_sve_ld3_sret, |
908 | Intrinsic::aarch64_sve_ld4_sret, |
909 | }; |
910 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
911 | id: LoadIDs[Name[0] - '2'], Tys: Ty); |
912 | return true; |
913 | } |
914 | return false; // No other 'aarch64.sve.ld*'. |
915 | } |
916 | |
917 | if (Name.consume_front(Prefix: "tuple." )) { |
918 | // 'aarch64.sve.tuple.*'. |
919 | if (Name.starts_with(Prefix: "get" )) { |
920 | // 'aarch64.sve.tuple.get*'. |
921 | Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; |
922 | NewFn = Intrinsic::getOrInsertDeclaration( |
923 | M: F->getParent(), id: Intrinsic::vector_extract, Tys); |
924 | return true; |
925 | } |
926 | |
927 | if (Name.starts_with(Prefix: "set" )) { |
928 | // 'aarch64.sve.tuple.set*'. |
929 | auto Args = F->getFunctionType()->params(); |
930 | Type *Tys[] = {Args[0], Args[2], Args[1]}; |
931 | NewFn = Intrinsic::getOrInsertDeclaration( |
932 | M: F->getParent(), id: Intrinsic::vector_insert, Tys); |
933 | return true; |
934 | } |
935 | |
936 | static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)" ); |
937 | if (CreateTupleRegex.match(String: Name)) { |
938 | // 'aarch64.sve.tuple.create*'. |
939 | auto Args = F->getFunctionType()->params(); |
940 | Type *Tys[] = {F->getReturnType(), Args[1]}; |
941 | NewFn = Intrinsic::getOrInsertDeclaration( |
942 | M: F->getParent(), id: Intrinsic::vector_insert, Tys); |
943 | return true; |
944 | } |
945 | return false; // No other 'aarch64.sve.tuple.*'. |
946 | } |
947 | return false; // No other 'aarch64.sve.*'. |
948 | } |
949 | } |
950 | return false; // No other 'arm.*', 'aarch64.*'. |
951 | } |
952 | |
953 | static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, |
954 | StringRef Name) { |
955 | if (Name.consume_front(Prefix: "cp.async.bulk.tensor.g2s." )) { |
956 | Intrinsic::ID ID = |
957 | StringSwitch<Intrinsic::ID>(Name) |
958 | .Case(S: "im2col.3d" , |
959 | Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d) |
960 | .Case(S: "im2col.4d" , |
961 | Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d) |
962 | .Case(S: "im2col.5d" , |
963 | Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d) |
964 | .Case(S: "tile.1d" , Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d) |
965 | .Case(S: "tile.2d" , Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d) |
966 | .Case(S: "tile.3d" , Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d) |
967 | .Case(S: "tile.4d" , Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d) |
968 | .Case(S: "tile.5d" , Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d) |
969 | .Default(Value: Intrinsic::not_intrinsic); |
970 | |
971 | if (ID == Intrinsic::not_intrinsic) |
972 | return ID; |
973 | |
974 | // These intrinsics may need upgrade for two reasons: |
975 | // (1) When the address-space of the first argument is shared[AS=3] |
976 | // (and we upgrade it to use shared_cluster address-space[AS=7]) |
977 | if (F->getArg(i: 0)->getType()->getPointerAddressSpace() == |
978 | NVPTXAS::ADDRESS_SPACE_SHARED) |
979 | return ID; |
980 | |
981 | // (2) When there are only two boolean flag arguments at the end: |
982 | // |
983 | // The last three parameters of the older version of these |
984 | // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag |
985 | // |
986 | // The newer version reads as: |
987 | // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag |
988 | // |
989 | // So, when the type of the [N-3]rd argument is "not i1", then |
990 | // it is the older version and we need to upgrade. |
991 | size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3; |
992 | Type *ArgType = F->getFunctionType()->getParamType(i: FlagStartIndex); |
993 | if (!ArgType->isIntegerTy(Bitwidth: 1)) |
994 | return ID; |
995 | } |
996 | |
997 | return Intrinsic::not_intrinsic; |
998 | } |
999 | |
1000 | static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, |
1001 | StringRef Name) { |
1002 | if (Name.consume_front(Prefix: "mapa.shared.cluster" )) |
1003 | if (F->getReturnType()->getPointerAddressSpace() == |
1004 | NVPTXAS::ADDRESS_SPACE_SHARED) |
1005 | return Intrinsic::nvvm_mapa_shared_cluster; |
1006 | |
1007 | if (Name.consume_front(Prefix: "cp.async.bulk." )) { |
1008 | Intrinsic::ID ID = |
1009 | StringSwitch<Intrinsic::ID>(Name) |
1010 | .Case(S: "global.to.shared.cluster" , |
1011 | Value: Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster) |
1012 | .Case(S: "shared.cta.to.cluster" , |
1013 | Value: Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster) |
1014 | .Default(Value: Intrinsic::not_intrinsic); |
1015 | |
1016 | if (ID != Intrinsic::not_intrinsic) |
1017 | if (F->getArg(i: 0)->getType()->getPointerAddressSpace() == |
1018 | NVPTXAS::ADDRESS_SPACE_SHARED) |
1019 | return ID; |
1020 | } |
1021 | |
1022 | return Intrinsic::not_intrinsic; |
1023 | } |
1024 | |
1025 | static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) { |
1026 | if (Name.consume_front(Prefix: "fma.rn." )) |
1027 | return StringSwitch<Intrinsic::ID>(Name) |
1028 | .Case(S: "bf16" , Value: Intrinsic::nvvm_fma_rn_bf16) |
1029 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_fma_rn_bf16x2) |
1030 | .Case(S: "ftz.bf16" , Value: Intrinsic::nvvm_fma_rn_ftz_bf16) |
1031 | .Case(S: "ftz.bf16x2" , Value: Intrinsic::nvvm_fma_rn_ftz_bf16x2) |
1032 | .Case(S: "ftz.relu.bf16" , Value: Intrinsic::nvvm_fma_rn_ftz_relu_bf16) |
1033 | .Case(S: "ftz.relu.bf16x2" , Value: Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2) |
1034 | .Case(S: "ftz.sat.bf16" , Value: Intrinsic::nvvm_fma_rn_ftz_sat_bf16) |
1035 | .Case(S: "ftz.sat.bf16x2" , Value: Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2) |
1036 | .Case(S: "relu.bf16" , Value: Intrinsic::nvvm_fma_rn_relu_bf16) |
1037 | .Case(S: "relu.bf16x2" , Value: Intrinsic::nvvm_fma_rn_relu_bf16x2) |
1038 | .Case(S: "sat.bf16" , Value: Intrinsic::nvvm_fma_rn_sat_bf16) |
1039 | .Case(S: "sat.bf16x2" , Value: Intrinsic::nvvm_fma_rn_sat_bf16x2) |
1040 | .Default(Value: Intrinsic::not_intrinsic); |
1041 | |
1042 | if (Name.consume_front(Prefix: "fmax." )) |
1043 | return StringSwitch<Intrinsic::ID>(Name) |
1044 | .Case(S: "bf16" , Value: Intrinsic::nvvm_fmax_bf16) |
1045 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_fmax_bf16x2) |
1046 | .Case(S: "ftz.bf16" , Value: Intrinsic::nvvm_fmax_ftz_bf16) |
1047 | .Case(S: "ftz.bf16x2" , Value: Intrinsic::nvvm_fmax_ftz_bf16x2) |
1048 | .Case(S: "ftz.nan.bf16" , Value: Intrinsic::nvvm_fmax_ftz_nan_bf16) |
1049 | .Case(S: "ftz.nan.bf16x2" , Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2) |
1050 | .Case(S: "ftz.nan.xorsign.abs.bf16" , |
1051 | Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16) |
1052 | .Case(S: "ftz.nan.xorsign.abs.bf16x2" , |
1053 | Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2) |
1054 | .Case(S: "ftz.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16) |
1055 | .Case(S: "ftz.xorsign.abs.bf16x2" , |
1056 | Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2) |
1057 | .Case(S: "nan.bf16" , Value: Intrinsic::nvvm_fmax_nan_bf16) |
1058 | .Case(S: "nan.bf16x2" , Value: Intrinsic::nvvm_fmax_nan_bf16x2) |
1059 | .Case(S: "nan.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16) |
1060 | .Case(S: "nan.xorsign.abs.bf16x2" , |
1061 | Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2) |
1062 | .Case(S: "xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16) |
1063 | .Case(S: "xorsign.abs.bf16x2" , Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2) |
1064 | .Default(Value: Intrinsic::not_intrinsic); |
1065 | |
1066 | if (Name.consume_front(Prefix: "fmin." )) |
1067 | return StringSwitch<Intrinsic::ID>(Name) |
1068 | .Case(S: "bf16" , Value: Intrinsic::nvvm_fmin_bf16) |
1069 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_fmin_bf16x2) |
1070 | .Case(S: "ftz.bf16" , Value: Intrinsic::nvvm_fmin_ftz_bf16) |
1071 | .Case(S: "ftz.bf16x2" , Value: Intrinsic::nvvm_fmin_ftz_bf16x2) |
1072 | .Case(S: "ftz.nan.bf16" , Value: Intrinsic::nvvm_fmin_ftz_nan_bf16) |
1073 | .Case(S: "ftz.nan.bf16x2" , Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2) |
1074 | .Case(S: "ftz.nan.xorsign.abs.bf16" , |
1075 | Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16) |
1076 | .Case(S: "ftz.nan.xorsign.abs.bf16x2" , |
1077 | Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2) |
1078 | .Case(S: "ftz.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16) |
1079 | .Case(S: "ftz.xorsign.abs.bf16x2" , |
1080 | Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2) |
1081 | .Case(S: "nan.bf16" , Value: Intrinsic::nvvm_fmin_nan_bf16) |
1082 | .Case(S: "nan.bf16x2" , Value: Intrinsic::nvvm_fmin_nan_bf16x2) |
1083 | .Case(S: "nan.xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16) |
1084 | .Case(S: "nan.xorsign.abs.bf16x2" , |
1085 | Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2) |
1086 | .Case(S: "xorsign.abs.bf16" , Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16) |
1087 | .Case(S: "xorsign.abs.bf16x2" , Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2) |
1088 | .Default(Value: Intrinsic::not_intrinsic); |
1089 | |
1090 | if (Name.consume_front(Prefix: "neg." )) |
1091 | return StringSwitch<Intrinsic::ID>(Name) |
1092 | .Case(S: "bf16" , Value: Intrinsic::nvvm_neg_bf16) |
1093 | .Case(S: "bf16x2" , Value: Intrinsic::nvvm_neg_bf16x2) |
1094 | .Default(Value: Intrinsic::not_intrinsic); |
1095 | |
1096 | return Intrinsic::not_intrinsic; |
1097 | } |
1098 | |
1099 | static bool consumeNVVMPtrAddrSpace(StringRef &Name) { |
1100 | return Name.consume_front(Prefix: "local" ) || Name.consume_front(Prefix: "shared" ) || |
1101 | Name.consume_front(Prefix: "global" ) || Name.consume_front(Prefix: "constant" ) || |
1102 | Name.consume_front(Prefix: "param" ); |
1103 | } |
1104 | |
1105 | static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, |
1106 | bool CanUpgradeDebugIntrinsicsToRecords) { |
1107 | assert(F && "Illegal to upgrade a non-existent Function." ); |
1108 | |
1109 | StringRef Name = F->getName(); |
1110 | |
1111 | // Quickly eliminate it, if it's not a candidate. |
1112 | if (!Name.consume_front(Prefix: "llvm." ) || Name.empty()) |
1113 | return false; |
1114 | |
1115 | switch (Name[0]) { |
1116 | default: break; |
1117 | case 'a': { |
1118 | bool IsArm = Name.consume_front(Prefix: "arm." ); |
1119 | if (IsArm || Name.consume_front(Prefix: "aarch64." )) { |
1120 | if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn)) |
1121 | return true; |
1122 | break; |
1123 | } |
1124 | |
1125 | if (Name.consume_front(Prefix: "amdgcn." )) { |
1126 | if (Name == "alignbit" ) { |
1127 | // Target specific intrinsic became redundant |
1128 | NewFn = Intrinsic::getOrInsertDeclaration( |
1129 | M: F->getParent(), id: Intrinsic::fshr, Tys: {F->getReturnType()}); |
1130 | return true; |
1131 | } |
1132 | |
1133 | if (Name.consume_front(Prefix: "atomic." )) { |
1134 | if (Name.starts_with(Prefix: "inc" ) || Name.starts_with(Prefix: "dec" )) { |
1135 | // These were replaced with atomicrmw uinc_wrap and udec_wrap, so |
1136 | // there's no new declaration. |
1137 | NewFn = nullptr; |
1138 | return true; |
1139 | } |
1140 | break; // No other 'amdgcn.atomic.*' |
1141 | } |
1142 | |
1143 | if (Name.consume_front(Prefix: "ds." ) || Name.consume_front(Prefix: "global.atomic." ) || |
1144 | Name.consume_front(Prefix: "flat.atomic." )) { |
1145 | if (Name.starts_with(Prefix: "fadd" ) || |
1146 | // FIXME: We should also remove fmin.num and fmax.num intrinsics. |
1147 | (Name.starts_with(Prefix: "fmin" ) && !Name.starts_with(Prefix: "fmin.num" )) || |
1148 | (Name.starts_with(Prefix: "fmax" ) && !Name.starts_with(Prefix: "fmax.num" ))) { |
1149 | // Replaced with atomicrmw fadd/fmin/fmax, so there's no new |
1150 | // declaration. |
1151 | NewFn = nullptr; |
1152 | return true; |
1153 | } |
1154 | } |
1155 | |
1156 | if (Name.starts_with(Prefix: "ldexp." )) { |
1157 | // Target specific intrinsic became redundant |
1158 | NewFn = Intrinsic::getOrInsertDeclaration( |
1159 | M: F->getParent(), id: Intrinsic::ldexp, |
1160 | Tys: {F->getReturnType(), F->getArg(i: 1)->getType()}); |
1161 | return true; |
1162 | } |
1163 | break; // No other 'amdgcn.*' |
1164 | } |
1165 | |
1166 | break; |
1167 | } |
1168 | case 'c': { |
1169 | if (F->arg_size() == 1) { |
1170 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
1171 | .StartsWith(S: "ctlz." , Value: Intrinsic::ctlz) |
1172 | .StartsWith(S: "cttz." , Value: Intrinsic::cttz) |
1173 | .Default(Value: Intrinsic::not_intrinsic); |
1174 | if (ID != Intrinsic::not_intrinsic) { |
1175 | rename(GV: F); |
1176 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, |
1177 | Tys: F->arg_begin()->getType()); |
1178 | return true; |
1179 | } |
1180 | } |
1181 | |
1182 | if (F->arg_size() == 2 && Name == "coro.end" ) { |
1183 | rename(GV: F); |
1184 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
1185 | id: Intrinsic::coro_end); |
1186 | return true; |
1187 | } |
1188 | |
1189 | break; |
1190 | } |
1191 | case 'd': |
1192 | if (Name.consume_front(Prefix: "dbg." )) { |
1193 | // Mark debug intrinsics for upgrade to new debug format. |
1194 | if (CanUpgradeDebugIntrinsicsToRecords) { |
1195 | if (Name == "addr" || Name == "value" || Name == "assign" || |
1196 | Name == "declare" || Name == "label" ) { |
1197 | // There's no function to replace these with. |
1198 | NewFn = nullptr; |
1199 | // But we do want these to get upgraded. |
1200 | return true; |
1201 | } |
1202 | } |
1203 | // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get |
1204 | // converted to DbgVariableRecords later. |
1205 | if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) { |
1206 | rename(GV: F); |
1207 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
1208 | id: Intrinsic::dbg_value); |
1209 | return true; |
1210 | } |
1211 | break; // No other 'dbg.*'. |
1212 | } |
1213 | break; |
1214 | case 'e': |
1215 | if (Name.consume_front(Prefix: "experimental.vector." )) { |
1216 | Intrinsic::ID ID = |
1217 | StringSwitch<Intrinsic::ID>(Name) |
1218 | // Skip over extract.last.active, otherwise it will be 'upgraded' |
1219 | // to a regular vector extract which is a different operation. |
1220 | .StartsWith(S: "extract.last.active." , Value: Intrinsic::not_intrinsic) |
1221 | .StartsWith(S: "extract." , Value: Intrinsic::vector_extract) |
1222 | .StartsWith(S: "insert." , Value: Intrinsic::vector_insert) |
1223 | .StartsWith(S: "splice." , Value: Intrinsic::vector_splice) |
1224 | .StartsWith(S: "reverse." , Value: Intrinsic::vector_reverse) |
1225 | .StartsWith(S: "interleave2." , Value: Intrinsic::vector_interleave2) |
1226 | .StartsWith(S: "deinterleave2." , Value: Intrinsic::vector_deinterleave2) |
1227 | .Default(Value: Intrinsic::not_intrinsic); |
1228 | if (ID != Intrinsic::not_intrinsic) { |
1229 | const auto *FT = F->getFunctionType(); |
1230 | SmallVector<Type *, 2> Tys; |
1231 | if (ID == Intrinsic::vector_extract || |
1232 | ID == Intrinsic::vector_interleave2) |
1233 | // Extracting overloads the return type. |
1234 | Tys.push_back(Elt: FT->getReturnType()); |
1235 | if (ID != Intrinsic::vector_interleave2) |
1236 | Tys.push_back(Elt: FT->getParamType(i: 0)); |
1237 | if (ID == Intrinsic::vector_insert) |
1238 | // Inserting overloads the inserted type. |
1239 | Tys.push_back(Elt: FT->getParamType(i: 1)); |
1240 | rename(GV: F); |
1241 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys); |
1242 | return true; |
1243 | } |
1244 | |
1245 | if (Name.consume_front(Prefix: "reduce." )) { |
1246 | SmallVector<StringRef, 2> Groups; |
1247 | static const Regex R("^([a-z]+)\\.[a-z][0-9]+" ); |
1248 | if (R.match(String: Name, Matches: &Groups)) |
1249 | ID = StringSwitch<Intrinsic::ID>(Groups[1]) |
1250 | .Case(S: "add" , Value: Intrinsic::vector_reduce_add) |
1251 | .Case(S: "mul" , Value: Intrinsic::vector_reduce_mul) |
1252 | .Case(S: "and" , Value: Intrinsic::vector_reduce_and) |
1253 | .Case(S: "or" , Value: Intrinsic::vector_reduce_or) |
1254 | .Case(S: "xor" , Value: Intrinsic::vector_reduce_xor) |
1255 | .Case(S: "smax" , Value: Intrinsic::vector_reduce_smax) |
1256 | .Case(S: "smin" , Value: Intrinsic::vector_reduce_smin) |
1257 | .Case(S: "umax" , Value: Intrinsic::vector_reduce_umax) |
1258 | .Case(S: "umin" , Value: Intrinsic::vector_reduce_umin) |
1259 | .Case(S: "fmax" , Value: Intrinsic::vector_reduce_fmax) |
1260 | .Case(S: "fmin" , Value: Intrinsic::vector_reduce_fmin) |
1261 | .Default(Value: Intrinsic::not_intrinsic); |
1262 | |
1263 | bool V2 = false; |
1264 | if (ID == Intrinsic::not_intrinsic) { |
1265 | static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+" ); |
1266 | Groups.clear(); |
1267 | V2 = true; |
1268 | if (R2.match(String: Name, Matches: &Groups)) |
1269 | ID = StringSwitch<Intrinsic::ID>(Groups[1]) |
1270 | .Case(S: "fadd" , Value: Intrinsic::vector_reduce_fadd) |
1271 | .Case(S: "fmul" , Value: Intrinsic::vector_reduce_fmul) |
1272 | .Default(Value: Intrinsic::not_intrinsic); |
1273 | } |
1274 | if (ID != Intrinsic::not_intrinsic) { |
1275 | rename(GV: F); |
1276 | auto Args = F->getFunctionType()->params(); |
1277 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, |
1278 | Tys: {Args[V2 ? 1 : 0]}); |
1279 | return true; |
1280 | } |
1281 | break; // No other 'expermental.vector.reduce.*'. |
1282 | } |
1283 | break; // No other 'experimental.vector.*'. |
1284 | } |
1285 | if (Name.consume_front(Prefix: "experimental.stepvector." )) { |
1286 | Intrinsic::ID ID = Intrinsic::stepvector; |
1287 | rename(GV: F); |
1288 | NewFn = Intrinsic::getOrInsertDeclaration( |
1289 | M: F->getParent(), id: ID, Tys: F->getFunctionType()->getReturnType()); |
1290 | return true; |
1291 | } |
1292 | break; // No other 'e*'. |
1293 | case 'f': |
1294 | if (Name.starts_with(Prefix: "flt.rounds" )) { |
1295 | rename(GV: F); |
1296 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
1297 | id: Intrinsic::get_rounding); |
1298 | return true; |
1299 | } |
1300 | break; |
1301 | case 'i': |
1302 | if (Name.starts_with(Prefix: "invariant.group.barrier" )) { |
1303 | // Rename invariant.group.barrier to launder.invariant.group |
1304 | auto Args = F->getFunctionType()->params(); |
1305 | Type* ObjectPtr[1] = {Args[0]}; |
1306 | rename(GV: F); |
1307 | NewFn = Intrinsic::getOrInsertDeclaration( |
1308 | M: F->getParent(), id: Intrinsic::launder_invariant_group, Tys: ObjectPtr); |
1309 | return true; |
1310 | } |
1311 | break; |
1312 | case 'm': { |
1313 | // Updating the memory intrinsics (memcpy/memmove/memset) that have an |
1314 | // alignment parameter to embedding the alignment as an attribute of |
1315 | // the pointer args. |
1316 | if (unsigned ID = StringSwitch<unsigned>(Name) |
1317 | .StartsWith(S: "memcpy." , Value: Intrinsic::memcpy) |
1318 | .StartsWith(S: "memmove." , Value: Intrinsic::memmove) |
1319 | .Default(Value: 0)) { |
1320 | if (F->arg_size() == 5) { |
1321 | rename(GV: F); |
1322 | // Get the types of dest, src, and len |
1323 | ArrayRef<Type *> ParamTypes = |
1324 | F->getFunctionType()->params().slice(N: 0, M: 3); |
1325 | NewFn = |
1326 | Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes); |
1327 | return true; |
1328 | } |
1329 | } |
1330 | if (Name.starts_with(Prefix: "memset." ) && F->arg_size() == 5) { |
1331 | rename(GV: F); |
1332 | // Get the types of dest, and len |
1333 | const auto *FT = F->getFunctionType(); |
1334 | Type *ParamTypes[2] = { |
1335 | FT->getParamType(i: 0), // Dest |
1336 | FT->getParamType(i: 2) // len |
1337 | }; |
1338 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
1339 | id: Intrinsic::memset, Tys: ParamTypes); |
1340 | return true; |
1341 | } |
1342 | break; |
1343 | } |
1344 | case 'n': { |
1345 | if (Name.consume_front(Prefix: "nvvm." )) { |
1346 | // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic. |
1347 | if (F->arg_size() == 1) { |
1348 | Intrinsic::ID IID = |
1349 | StringSwitch<Intrinsic::ID>(Name) |
1350 | .Cases(S0: "brev32" , S1: "brev64" , Value: Intrinsic::bitreverse) |
1351 | .Case(S: "clz.i" , Value: Intrinsic::ctlz) |
1352 | .Case(S: "popc.i" , Value: Intrinsic::ctpop) |
1353 | .Default(Value: Intrinsic::not_intrinsic); |
1354 | if (IID != Intrinsic::not_intrinsic) { |
1355 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID, |
1356 | Tys: {F->getReturnType()}); |
1357 | return true; |
1358 | } |
1359 | } |
1360 | |
1361 | // Check for nvvm intrinsics that need a return type adjustment. |
1362 | if (!F->getReturnType()->getScalarType()->isBFloatTy()) { |
1363 | Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); |
1364 | if (IID != Intrinsic::not_intrinsic) { |
1365 | NewFn = nullptr; |
1366 | return true; |
1367 | } |
1368 | } |
1369 | |
1370 | // Upgrade Distributed Shared Memory Intrinsics |
1371 | Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name); |
1372 | if (IID != Intrinsic::not_intrinsic) { |
1373 | rename(GV: F); |
1374 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
1375 | return true; |
1376 | } |
1377 | |
1378 | // Upgrade TMA copy G2S Intrinsics |
1379 | IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name); |
1380 | if (IID != Intrinsic::not_intrinsic) { |
1381 | rename(GV: F); |
1382 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
1383 | return true; |
1384 | } |
1385 | |
1386 | // The following nvvm intrinsics correspond exactly to an LLVM idiom, but |
1387 | // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. |
1388 | // |
1389 | // TODO: We could add lohi.i2d. |
1390 | bool Expand = false; |
1391 | if (Name.consume_front(Prefix: "abs." )) |
1392 | // nvvm.abs.{i,ii} |
1393 | Expand = |
1394 | Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2" ; |
1395 | else if (Name.consume_front(Prefix: "fabs." )) |
1396 | // nvvm.fabs.{f,ftz.f,d} |
1397 | Expand = Name == "f" || Name == "ftz.f" || Name == "d" ; |
1398 | else if (Name.consume_front(Prefix: "max." ) || Name.consume_front(Prefix: "min." )) |
1399 | // nvvm.{min,max}.{i,ii,ui,ull} |
1400 | Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
1401 | Name == "ui" || Name == "ull" ; |
1402 | else if (Name.consume_front(Prefix: "atomic.load." )) |
1403 | // nvvm.atomic.load.add.{f32,f64}.p |
1404 | // nvvm.atomic.load.{inc,dec}.32.p |
1405 | Expand = StringSwitch<bool>(Name) |
1406 | .StartsWith(S: "add.f32.p" , Value: true) |
1407 | .StartsWith(S: "add.f64.p" , Value: true) |
1408 | .StartsWith(S: "inc.32.p" , Value: true) |
1409 | .StartsWith(S: "dec.32.p" , Value: true) |
1410 | .Default(Value: false); |
1411 | else if (Name.consume_front(Prefix: "bitcast." )) |
1412 | // nvvm.bitcast.{f2i,i2f,ll2d,d2ll} |
1413 | Expand = |
1414 | Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll" ; |
1415 | else if (Name.consume_front(Prefix: "rotate." )) |
1416 | // nvvm.rotate.{b32,b64,right.b64} |
1417 | Expand = Name == "b32" || Name == "b64" || Name == "right.b64" ; |
1418 | else if (Name.consume_front(Prefix: "ptr.gen.to." )) |
1419 | // nvvm.ptr.gen.to.{local,shared,global,constant,param} |
1420 | Expand = consumeNVVMPtrAddrSpace(Name); |
1421 | else if (Name.consume_front(Prefix: "ptr." )) |
1422 | // nvvm.ptr.{local,shared,global,constant,param}.to.gen |
1423 | Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(Prefix: ".to.gen" ); |
1424 | else if (Name.consume_front(Prefix: "ldg.global." )) |
1425 | // nvvm.ldg.global.{i,p,f} |
1426 | Expand = (Name.starts_with(Prefix: "i." ) || Name.starts_with(Prefix: "f." ) || |
1427 | Name.starts_with(Prefix: "p." )); |
1428 | else |
1429 | Expand = StringSwitch<bool>(Name) |
1430 | .Case(S: "barrier0" , Value: true) |
1431 | .Case(S: "barrier.n" , Value: true) |
1432 | .Case(S: "barrier.sync.cnt" , Value: true) |
1433 | .Case(S: "barrier.sync" , Value: true) |
1434 | .Case(S: "barrier" , Value: true) |
1435 | .Case(S: "bar.sync" , Value: true) |
1436 | .Case(S: "clz.ll" , Value: true) |
1437 | .Case(S: "popc.ll" , Value: true) |
1438 | .Case(S: "h2f" , Value: true) |
1439 | .Case(S: "swap.lo.hi.b64" , Value: true) |
1440 | .Default(Value: false); |
1441 | |
1442 | if (Expand) { |
1443 | NewFn = nullptr; |
1444 | return true; |
1445 | } |
1446 | break; // No other 'nvvm.*'. |
1447 | } |
1448 | break; |
1449 | } |
1450 | case 'o': |
1451 | if (Name.starts_with(Prefix: "objectsize." )) { |
1452 | Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; |
1453 | if (F->arg_size() == 2 || F->arg_size() == 3) { |
1454 | rename(GV: F); |
1455 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), |
1456 | id: Intrinsic::objectsize, Tys); |
1457 | return true; |
1458 | } |
1459 | } |
1460 | break; |
1461 | |
1462 | case 'p': |
1463 | if (Name.starts_with(Prefix: "ptr.annotation." ) && F->arg_size() == 4) { |
1464 | rename(GV: F); |
1465 | NewFn = Intrinsic::getOrInsertDeclaration( |
1466 | M: F->getParent(), id: Intrinsic::ptr_annotation, |
1467 | Tys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()}); |
1468 | return true; |
1469 | } |
1470 | break; |
1471 | |
1472 | case 'r': { |
1473 | if (Name.consume_front(Prefix: "riscv." )) { |
1474 | Intrinsic::ID ID; |
1475 | ID = StringSwitch<Intrinsic::ID>(Name) |
1476 | .Case(S: "aes32dsi" , Value: Intrinsic::riscv_aes32dsi) |
1477 | .Case(S: "aes32dsmi" , Value: Intrinsic::riscv_aes32dsmi) |
1478 | .Case(S: "aes32esi" , Value: Intrinsic::riscv_aes32esi) |
1479 | .Case(S: "aes32esmi" , Value: Intrinsic::riscv_aes32esmi) |
1480 | .Default(Value: Intrinsic::not_intrinsic); |
1481 | if (ID != Intrinsic::not_intrinsic) { |
1482 | if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) { |
1483 | rename(GV: F); |
1484 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
1485 | return true; |
1486 | } |
1487 | break; // No other applicable upgrades. |
1488 | } |
1489 | |
1490 | ID = StringSwitch<Intrinsic::ID>(Name) |
1491 | .StartsWith(S: "sm4ks" , Value: Intrinsic::riscv_sm4ks) |
1492 | .StartsWith(S: "sm4ed" , Value: Intrinsic::riscv_sm4ed) |
1493 | .Default(Value: Intrinsic::not_intrinsic); |
1494 | if (ID != Intrinsic::not_intrinsic) { |
1495 | if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) || |
1496 | F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) { |
1497 | rename(GV: F); |
1498 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
1499 | return true; |
1500 | } |
1501 | break; // No other applicable upgrades. |
1502 | } |
1503 | |
1504 | ID = StringSwitch<Intrinsic::ID>(Name) |
1505 | .StartsWith(S: "sha256sig0" , Value: Intrinsic::riscv_sha256sig0) |
1506 | .StartsWith(S: "sha256sig1" , Value: Intrinsic::riscv_sha256sig1) |
1507 | .StartsWith(S: "sha256sum0" , Value: Intrinsic::riscv_sha256sum0) |
1508 | .StartsWith(S: "sha256sum1" , Value: Intrinsic::riscv_sha256sum1) |
1509 | .StartsWith(S: "sm3p0" , Value: Intrinsic::riscv_sm3p0) |
1510 | .StartsWith(S: "sm3p1" , Value: Intrinsic::riscv_sm3p1) |
1511 | .Default(Value: Intrinsic::not_intrinsic); |
1512 | if (ID != Intrinsic::not_intrinsic) { |
1513 | if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) { |
1514 | rename(GV: F); |
1515 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
1516 | return true; |
1517 | } |
1518 | break; // No other applicable upgrades. |
1519 | } |
1520 | break; // No other 'riscv.*' intrinsics |
1521 | } |
1522 | } break; |
1523 | |
1524 | case 's': |
1525 | if (Name == "stackprotectorcheck" ) { |
1526 | NewFn = nullptr; |
1527 | return true; |
1528 | } |
1529 | break; |
1530 | |
1531 | case 't': |
1532 | if (Name == "thread.pointer" ) { |
1533 | NewFn = Intrinsic::getOrInsertDeclaration( |
1534 | M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType()); |
1535 | return true; |
1536 | } |
1537 | break; |
1538 | |
1539 | case 'v': { |
1540 | if (Name == "var.annotation" && F->arg_size() == 4) { |
1541 | rename(GV: F); |
1542 | NewFn = Intrinsic::getOrInsertDeclaration( |
1543 | M: F->getParent(), id: Intrinsic::var_annotation, |
1544 | Tys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}}); |
1545 | return true; |
1546 | } |
1547 | break; |
1548 | } |
1549 | |
1550 | case 'w': |
1551 | if (Name.consume_front(Prefix: "wasm." )) { |
1552 | Intrinsic::ID ID = |
1553 | StringSwitch<Intrinsic::ID>(Name) |
1554 | .StartsWith(S: "fma." , Value: Intrinsic::wasm_relaxed_madd) |
1555 | .StartsWith(S: "fms." , Value: Intrinsic::wasm_relaxed_nmadd) |
1556 | .StartsWith(S: "laneselect." , Value: Intrinsic::wasm_relaxed_laneselect) |
1557 | .Default(Value: Intrinsic::not_intrinsic); |
1558 | if (ID != Intrinsic::not_intrinsic) { |
1559 | rename(GV: F); |
1560 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, |
1561 | Tys: F->getReturnType()); |
1562 | return true; |
1563 | } |
1564 | |
1565 | if (Name.consume_front(Prefix: "dot.i8x16.i7x16." )) { |
1566 | ID = StringSwitch<Intrinsic::ID>(Name) |
1567 | .Case(S: "signed" , Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed) |
1568 | .Case(S: "add.signed" , |
1569 | Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed) |
1570 | .Default(Value: Intrinsic::not_intrinsic); |
1571 | if (ID != Intrinsic::not_intrinsic) { |
1572 | rename(GV: F); |
1573 | NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID); |
1574 | return true; |
1575 | } |
1576 | break; // No other 'wasm.dot.i8x16.i7x16.*'. |
1577 | } |
1578 | break; // No other 'wasm.*'. |
1579 | } |
1580 | break; |
1581 | |
1582 | case 'x': |
1583 | if (upgradeX86IntrinsicFunction(F, Name, NewFn)) |
1584 | return true; |
1585 | } |
1586 | |
1587 | auto *ST = dyn_cast<StructType>(Val: F->getReturnType()); |
1588 | if (ST && (!ST->isLiteral() || ST->isPacked()) && |
1589 | F->getIntrinsicID() != Intrinsic::not_intrinsic) { |
1590 | // Replace return type with literal non-packed struct. Only do this for |
1591 | // intrinsics declared to return a struct, not for intrinsics with |
1592 | // overloaded return type, in which case the exact struct type will be |
1593 | // mangled into the name. |
1594 | SmallVector<Intrinsic::IITDescriptor> Desc; |
1595 | Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc); |
1596 | if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) { |
1597 | auto *FT = F->getFunctionType(); |
1598 | auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements()); |
1599 | auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg()); |
1600 | std::string Name = F->getName().str(); |
1601 | rename(GV: F); |
1602 | NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(), |
1603 | N: Name, M: F->getParent()); |
1604 | |
1605 | // The new function may also need remangling. |
1606 | if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn)) |
1607 | NewFn = *Result; |
1608 | return true; |
1609 | } |
1610 | } |
1611 | |
1612 | // Remangle our intrinsic since we upgrade the mangling |
1613 | auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); |
1614 | if (Result != std::nullopt) { |
1615 | NewFn = *Result; |
1616 | return true; |
1617 | } |
1618 | |
1619 | // This may not belong here. This function is effectively being overloaded |
1620 | // to both detect an intrinsic which needs upgrading, and to provide the |
1621 | // upgraded form of the intrinsic. We should perhaps have two separate |
1622 | // functions for this. |
1623 | return false; |
1624 | } |
1625 | |
1626 | bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn, |
1627 | bool CanUpgradeDebugIntrinsicsToRecords) { |
1628 | NewFn = nullptr; |
1629 | bool Upgraded = |
1630 | upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords); |
1631 | assert(F != NewFn && "Intrinsic function upgraded to the same function" ); |
1632 | |
1633 | // Upgrade intrinsic attributes. This does not change the function. |
1634 | if (NewFn) |
1635 | F = NewFn; |
1636 | if (Intrinsic::ID id = F->getIntrinsicID()) { |
1637 | // Only do this if the intrinsic signature is valid. |
1638 | SmallVector<Type *> OverloadTys; |
1639 | if (Intrinsic::getIntrinsicSignature(id, FT: F->getFunctionType(), ArgTys&: OverloadTys)) |
1640 | F->setAttributes( |
1641 | Intrinsic::getAttributes(C&: F->getContext(), id, FT: F->getFunctionType())); |
1642 | } |
1643 | return Upgraded; |
1644 | } |
1645 | |
1646 | GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { |
1647 | if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" || |
1648 | GV->getName() == "llvm.global_dtors" )) || |
1649 | !GV->hasInitializer()) |
1650 | return nullptr; |
1651 | ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType()); |
1652 | if (!ATy) |
1653 | return nullptr; |
1654 | StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType()); |
1655 | if (!STy || STy->getNumElements() != 2) |
1656 | return nullptr; |
1657 | |
1658 | LLVMContext &C = GV->getContext(); |
1659 | IRBuilder<> IRB(C); |
1660 | auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1), |
1661 | elts: IRB.getPtrTy()); |
1662 | Constant *Init = GV->getInitializer(); |
1663 | unsigned N = Init->getNumOperands(); |
1664 | std::vector<Constant *> NewCtors(N); |
1665 | for (unsigned i = 0; i != N; ++i) { |
1666 | auto Ctor = cast<Constant>(Val: Init->getOperand(i)); |
1667 | NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u), |
1668 | Vs: Ctor->getAggregateElement(Elt: 1), |
1669 | Vs: ConstantPointerNull::get(T: IRB.getPtrTy())); |
1670 | } |
1671 | Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors); |
1672 | |
1673 | return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), |
1674 | NewInit, GV->getName()); |
1675 | } |
1676 | |
1677 | // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them |
1678 | // to byte shuffles. |
1679 | static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
1680 | unsigned Shift) { |
1681 | auto *ResultTy = cast<FixedVectorType>(Val: Op->getType()); |
1682 | unsigned NumElts = ResultTy->getNumElements() * 8; |
1683 | |
1684 | // Bitcast from a 64-bit element type to a byte element type. |
1685 | Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts); |
1686 | Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast" ); |
1687 | |
1688 | // We'll be shuffling in zeroes. |
1689 | Value *Res = Constant::getNullValue(Ty: VecTy); |
1690 | |
1691 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
1692 | // we'll just return the zero vector. |
1693 | if (Shift < 16) { |
1694 | int Idxs[64]; |
1695 | // 256/512-bit version is split into 2/4 16-byte lanes. |
1696 | for (unsigned l = 0; l != NumElts; l += 16) |
1697 | for (unsigned i = 0; i != 16; ++i) { |
1698 | unsigned Idx = NumElts + i - Shift; |
1699 | if (Idx < NumElts) |
1700 | Idx -= NumElts - 16; // end of lane, switch operand. |
1701 | Idxs[l + i] = Idx + l; |
1702 | } |
1703 | |
1704 | Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts)); |
1705 | } |
1706 | |
1707 | // Bitcast back to a 64-bit element type. |
1708 | return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast" ); |
1709 | } |
1710 | |
1711 | // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them |
1712 | // to byte shuffles. |
1713 | static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
1714 | unsigned Shift) { |
1715 | auto *ResultTy = cast<FixedVectorType>(Val: Op->getType()); |
1716 | unsigned NumElts = ResultTy->getNumElements() * 8; |
1717 | |
1718 | // Bitcast from a 64-bit element type to a byte element type. |
1719 | Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts); |
1720 | Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast" ); |
1721 | |
1722 | // We'll be shuffling in zeroes. |
1723 | Value *Res = Constant::getNullValue(Ty: VecTy); |
1724 | |
1725 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
1726 | // we'll just return the zero vector. |
1727 | if (Shift < 16) { |
1728 | int Idxs[64]; |
1729 | // 256/512-bit version is split into 2/4 16-byte lanes. |
1730 | for (unsigned l = 0; l != NumElts; l += 16) |
1731 | for (unsigned i = 0; i != 16; ++i) { |
1732 | unsigned Idx = i + Shift; |
1733 | if (Idx >= 16) |
1734 | Idx += NumElts - 16; // end of lane, switch operand. |
1735 | Idxs[l + i] = Idx + l; |
1736 | } |
1737 | |
1738 | Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts)); |
1739 | } |
1740 | |
1741 | // Bitcast back to a 64-bit element type. |
1742 | return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast" ); |
1743 | } |
1744 | |
1745 | static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, |
1746 | unsigned NumElts) { |
1747 | assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements" ); |
1748 | llvm::VectorType *MaskTy = FixedVectorType::get( |
1749 | ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth()); |
1750 | Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy); |
1751 | |
1752 | // If we have less than 8 elements (1, 2 or 4), then the starting mask was an |
1753 | // i8 and we need to extract down to the right number of elements. |
1754 | if (NumElts <= 4) { |
1755 | int Indices[4]; |
1756 | for (unsigned i = 0; i != NumElts; ++i) |
1757 | Indices[i] = i; |
1758 | Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts), |
1759 | Name: "extract" ); |
1760 | } |
1761 | |
1762 | return Mask; |
1763 | } |
1764 | |
1765 | static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, |
1766 | Value *Op1) { |
1767 | // If the mask is all ones just emit the first operation. |
1768 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1769 | if (C->isAllOnesValue()) |
1770 | return Op0; |
1771 | |
1772 | Mask = getX86MaskVec(Builder, Mask, |
1773 | NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements()); |
1774 | return Builder.CreateSelect(C: Mask, True: Op0, False: Op1); |
1775 | } |
1776 | |
1777 | static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, |
1778 | Value *Op1) { |
1779 | // If the mask is all ones just emit the first operation. |
1780 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1781 | if (C->isAllOnesValue()) |
1782 | return Op0; |
1783 | |
1784 | auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(), |
1785 | NumElts: Mask->getType()->getIntegerBitWidth()); |
1786 | Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy); |
1787 | Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0); |
1788 | return Builder.CreateSelect(C: Mask, True: Op0, False: Op1); |
1789 | } |
1790 | |
1791 | // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. |
1792 | // PALIGNR handles large immediates by shifting while VALIGN masks the immediate |
1793 | // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. |
1794 | static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, |
1795 | Value *Op1, Value *Shift, |
1796 | Value *Passthru, Value *Mask, |
1797 | bool IsVALIGN) { |
1798 | unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue(); |
1799 | |
1800 | unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
1801 | assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!" ); |
1802 | assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!" ); |
1803 | assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!" ); |
1804 | |
1805 | // Mask the immediate for VALIGN. |
1806 | if (IsVALIGN) |
1807 | ShiftVal &= (NumElts - 1); |
1808 | |
1809 | // If palignr is shifting the pair of vectors more than the size of two |
1810 | // lanes, emit zero. |
1811 | if (ShiftVal >= 32) |
1812 | return llvm::Constant::getNullValue(Ty: Op0->getType()); |
1813 | |
1814 | // If palignr is shifting the pair of input vectors more than one lane, |
1815 | // but less than two lanes, convert to shifting in zeroes. |
1816 | if (ShiftVal > 16) { |
1817 | ShiftVal -= 16; |
1818 | Op1 = Op0; |
1819 | Op0 = llvm::Constant::getNullValue(Ty: Op0->getType()); |
1820 | } |
1821 | |
1822 | int Indices[64]; |
1823 | // 256-bit palignr operates on 128-bit lanes so we need to handle that |
1824 | for (unsigned l = 0; l < NumElts; l += 16) { |
1825 | for (unsigned i = 0; i != 16; ++i) { |
1826 | unsigned Idx = ShiftVal + i; |
1827 | if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. |
1828 | Idx += NumElts - 16; // End of lane, switch operand. |
1829 | Indices[l + i] = Idx + l; |
1830 | } |
1831 | } |
1832 | |
1833 | Value *Align = Builder.CreateShuffleVector( |
1834 | V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr" ); |
1835 | |
1836 | return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru); |
1837 | } |
1838 | |
1839 | static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, |
1840 | bool ZeroMask, bool IndexForm) { |
1841 | Type *Ty = CI.getType(); |
1842 | unsigned VecWidth = Ty->getPrimitiveSizeInBits(); |
1843 | unsigned EltWidth = Ty->getScalarSizeInBits(); |
1844 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
1845 | Intrinsic::ID IID; |
1846 | if (VecWidth == 128 && EltWidth == 32 && IsFloat) |
1847 | IID = Intrinsic::x86_avx512_vpermi2var_ps_128; |
1848 | else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) |
1849 | IID = Intrinsic::x86_avx512_vpermi2var_d_128; |
1850 | else if (VecWidth == 128 && EltWidth == 64 && IsFloat) |
1851 | IID = Intrinsic::x86_avx512_vpermi2var_pd_128; |
1852 | else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) |
1853 | IID = Intrinsic::x86_avx512_vpermi2var_q_128; |
1854 | else if (VecWidth == 256 && EltWidth == 32 && IsFloat) |
1855 | IID = Intrinsic::x86_avx512_vpermi2var_ps_256; |
1856 | else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) |
1857 | IID = Intrinsic::x86_avx512_vpermi2var_d_256; |
1858 | else if (VecWidth == 256 && EltWidth == 64 && IsFloat) |
1859 | IID = Intrinsic::x86_avx512_vpermi2var_pd_256; |
1860 | else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) |
1861 | IID = Intrinsic::x86_avx512_vpermi2var_q_256; |
1862 | else if (VecWidth == 512 && EltWidth == 32 && IsFloat) |
1863 | IID = Intrinsic::x86_avx512_vpermi2var_ps_512; |
1864 | else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) |
1865 | IID = Intrinsic::x86_avx512_vpermi2var_d_512; |
1866 | else if (VecWidth == 512 && EltWidth == 64 && IsFloat) |
1867 | IID = Intrinsic::x86_avx512_vpermi2var_pd_512; |
1868 | else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) |
1869 | IID = Intrinsic::x86_avx512_vpermi2var_q_512; |
1870 | else if (VecWidth == 128 && EltWidth == 16) |
1871 | IID = Intrinsic::x86_avx512_vpermi2var_hi_128; |
1872 | else if (VecWidth == 256 && EltWidth == 16) |
1873 | IID = Intrinsic::x86_avx512_vpermi2var_hi_256; |
1874 | else if (VecWidth == 512 && EltWidth == 16) |
1875 | IID = Intrinsic::x86_avx512_vpermi2var_hi_512; |
1876 | else if (VecWidth == 128 && EltWidth == 8) |
1877 | IID = Intrinsic::x86_avx512_vpermi2var_qi_128; |
1878 | else if (VecWidth == 256 && EltWidth == 8) |
1879 | IID = Intrinsic::x86_avx512_vpermi2var_qi_256; |
1880 | else if (VecWidth == 512 && EltWidth == 8) |
1881 | IID = Intrinsic::x86_avx512_vpermi2var_qi_512; |
1882 | else |
1883 | llvm_unreachable("Unexpected intrinsic" ); |
1884 | |
1885 | Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1), |
1886 | CI.getArgOperand(i: 2) }; |
1887 | |
1888 | // If this isn't index form we need to swap operand 0 and 1. |
1889 | if (!IndexForm) |
1890 | std::swap(a&: Args[0], b&: Args[1]); |
1891 | |
1892 | Value *V = Builder.CreateIntrinsic(ID: IID, Args); |
1893 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) |
1894 | : Builder.CreateBitCast(V: CI.getArgOperand(i: 1), |
1895 | DestTy: Ty); |
1896 | return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru); |
1897 | } |
1898 | |
1899 | static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, |
1900 | Intrinsic::ID IID) { |
1901 | Type *Ty = CI.getType(); |
1902 | Value *Op0 = CI.getOperand(i_nocapture: 0); |
1903 | Value *Op1 = CI.getOperand(i_nocapture: 1); |
1904 | Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1}); |
1905 | |
1906 | if (CI.arg_size() == 4) { // For masked intrinsics. |
1907 | Value *VecSrc = CI.getOperand(i_nocapture: 2); |
1908 | Value *Mask = CI.getOperand(i_nocapture: 3); |
1909 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1910 | } |
1911 | return Res; |
1912 | } |
1913 | |
1914 | static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, |
1915 | bool IsRotateRight) { |
1916 | Type *Ty = CI.getType(); |
1917 | Value *Src = CI.getArgOperand(i: 0); |
1918 | Value *Amt = CI.getArgOperand(i: 1); |
1919 | |
1920 | // Amount may be scalar immediate, in which case create a splat vector. |
1921 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1922 | // we only care about the lowest log2 bits anyway. |
1923 | if (Amt->getType() != Ty) { |
1924 | unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements(); |
1925 | Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false); |
1926 | Amt = Builder.CreateVectorSplat(NumElts, V: Amt); |
1927 | } |
1928 | |
1929 | Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; |
1930 | Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Src, Src, Amt}); |
1931 | |
1932 | if (CI.arg_size() == 4) { // For masked intrinsics. |
1933 | Value *VecSrc = CI.getOperand(i_nocapture: 2); |
1934 | Value *Mask = CI.getOperand(i_nocapture: 3); |
1935 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1936 | } |
1937 | return Res; |
1938 | } |
1939 | |
1940 | static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, |
1941 | bool IsSigned) { |
1942 | Type *Ty = CI.getType(); |
1943 | Value *LHS = CI.getArgOperand(i: 0); |
1944 | Value *RHS = CI.getArgOperand(i: 1); |
1945 | |
1946 | CmpInst::Predicate Pred; |
1947 | switch (Imm) { |
1948 | case 0x0: |
1949 | Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; |
1950 | break; |
1951 | case 0x1: |
1952 | Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; |
1953 | break; |
1954 | case 0x2: |
1955 | Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; |
1956 | break; |
1957 | case 0x3: |
1958 | Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; |
1959 | break; |
1960 | case 0x4: |
1961 | Pred = ICmpInst::ICMP_EQ; |
1962 | break; |
1963 | case 0x5: |
1964 | Pred = ICmpInst::ICMP_NE; |
1965 | break; |
1966 | case 0x6: |
1967 | return Constant::getNullValue(Ty); // FALSE |
1968 | case 0x7: |
1969 | return Constant::getAllOnesValue(Ty); // TRUE |
1970 | default: |
1971 | llvm_unreachable("Unknown XOP vpcom/vpcomu predicate" ); |
1972 | } |
1973 | |
1974 | Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS); |
1975 | Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty); |
1976 | return Ext; |
1977 | } |
1978 | |
1979 | static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, |
1980 | bool IsShiftRight, bool ZeroMask) { |
1981 | Type *Ty = CI.getType(); |
1982 | Value *Op0 = CI.getArgOperand(i: 0); |
1983 | Value *Op1 = CI.getArgOperand(i: 1); |
1984 | Value *Amt = CI.getArgOperand(i: 2); |
1985 | |
1986 | if (IsShiftRight) |
1987 | std::swap(a&: Op0, b&: Op1); |
1988 | |
1989 | // Amount may be scalar immediate, in which case create a splat vector. |
1990 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1991 | // we only care about the lowest log2 bits anyway. |
1992 | if (Amt->getType() != Ty) { |
1993 | unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements(); |
1994 | Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false); |
1995 | Amt = Builder.CreateVectorSplat(NumElts, V: Amt); |
1996 | } |
1997 | |
1998 | Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl; |
1999 | Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1, Amt}); |
2000 | |
2001 | unsigned NumArgs = CI.arg_size(); |
2002 | if (NumArgs >= 4) { // For masked intrinsics. |
2003 | Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) : |
2004 | ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) : |
2005 | CI.getArgOperand(i: 0); |
2006 | Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1); |
2007 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
2008 | } |
2009 | return Res; |
2010 | } |
2011 | |
2012 | static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, |
2013 | Value *Mask, bool Aligned) { |
2014 | const Align Alignment = |
2015 | Aligned |
2016 | ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8) |
2017 | : Align(1); |
2018 | |
2019 | // If the mask is all ones just emit a regular store. |
2020 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
2021 | if (C->isAllOnesValue()) |
2022 | return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment); |
2023 | |
2024 | // Convert the mask from an integer type to a vector of i1. |
2025 | unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements(); |
2026 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
2027 | return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask); |
2028 | } |
2029 | |
2030 | static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, |
2031 | Value *Passthru, Value *Mask, bool Aligned) { |
2032 | Type *ValTy = Passthru->getType(); |
2033 | const Align Alignment = |
2034 | Aligned |
2035 | ? Align( |
2036 | Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() / |
2037 | 8) |
2038 | : Align(1); |
2039 | |
2040 | // If the mask is all ones just emit a regular store. |
2041 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
2042 | if (C->isAllOnesValue()) |
2043 | return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment); |
2044 | |
2045 | // Convert the mask from an integer type to a vector of i1. |
2046 | unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements(); |
2047 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
2048 | return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru); |
2049 | } |
2050 | |
2051 | static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) { |
2052 | Type *Ty = CI.getType(); |
2053 | Value *Op0 = CI.getArgOperand(i: 0); |
2054 | Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::abs, Types: Ty, |
2055 | Args: {Op0, Builder.getInt1(V: false)}); |
2056 | if (CI.arg_size() == 3) |
2057 | Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1)); |
2058 | return Res; |
2059 | } |
2060 | |
2061 | static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) { |
2062 | Type *Ty = CI.getType(); |
2063 | |
2064 | // Arguments have a vXi32 type so cast to vXi64. |
2065 | Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty); |
2066 | Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty); |
2067 | |
2068 | if (IsSigned) { |
2069 | // Shift left then arithmetic shift right. |
2070 | Constant *ShiftAmt = ConstantInt::get(Ty, V: 32); |
2071 | LHS = Builder.CreateShl(LHS, RHS: ShiftAmt); |
2072 | LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt); |
2073 | RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt); |
2074 | RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt); |
2075 | } else { |
2076 | // Clear the upper bits. |
2077 | Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff); |
2078 | LHS = Builder.CreateAnd(LHS, RHS: Mask); |
2079 | RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask); |
2080 | } |
2081 | |
2082 | Value *Res = Builder.CreateMul(LHS, RHS); |
2083 | |
2084 | if (CI.arg_size() == 4) |
2085 | Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2)); |
2086 | |
2087 | return Res; |
2088 | } |
2089 | |
2090 | // Applying mask on vector of i1's and make sure result is at least 8 bits wide. |
2091 | static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, |
2092 | Value *Mask) { |
2093 | unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements(); |
2094 | if (Mask) { |
2095 | const auto *C = dyn_cast<Constant>(Val: Mask); |
2096 | if (!C || !C->isAllOnesValue()) |
2097 | Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts)); |
2098 | } |
2099 | |
2100 | if (NumElts < 8) { |
2101 | int Indices[8]; |
2102 | for (unsigned i = 0; i != NumElts; ++i) |
2103 | Indices[i] = i; |
2104 | for (unsigned i = NumElts; i != 8; ++i) |
2105 | Indices[i] = NumElts + i % NumElts; |
2106 | Vec = Builder.CreateShuffleVector(V1: Vec, |
2107 | V2: Constant::getNullValue(Ty: Vec->getType()), |
2108 | Mask: Indices); |
2109 | } |
2110 | return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U))); |
2111 | } |
2112 | |
2113 | static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, |
2114 | unsigned CC, bool Signed) { |
2115 | Value *Op0 = CI.getArgOperand(i: 0); |
2116 | unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
2117 | |
2118 | Value *Cmp; |
2119 | if (CC == 3) { |
2120 | Cmp = Constant::getNullValue( |
2121 | Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts)); |
2122 | } else if (CC == 7) { |
2123 | Cmp = Constant::getAllOnesValue( |
2124 | Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts)); |
2125 | } else { |
2126 | ICmpInst::Predicate Pred; |
2127 | switch (CC) { |
2128 | default: llvm_unreachable("Unknown condition code" ); |
2129 | case 0: Pred = ICmpInst::ICMP_EQ; break; |
2130 | case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; |
2131 | case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; |
2132 | case 4: Pred = ICmpInst::ICMP_NE; break; |
2133 | case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; |
2134 | case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; |
2135 | } |
2136 | Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1)); |
2137 | } |
2138 | |
2139 | Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1); |
2140 | |
2141 | return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask); |
2142 | } |
2143 | |
2144 | // Replace a masked intrinsic with an older unmasked intrinsic. |
2145 | static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, |
2146 | Intrinsic::ID IID) { |
2147 | Value *Rep = |
2148 | Builder.CreateIntrinsic(ID: IID, Args: {CI.getArgOperand(i: 0), CI.getArgOperand(i: 1)}); |
2149 | return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2)); |
2150 | } |
2151 | |
2152 | static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) { |
2153 | Value* A = CI.getArgOperand(i: 0); |
2154 | Value* B = CI.getArgOperand(i: 1); |
2155 | Value* Src = CI.getArgOperand(i: 2); |
2156 | Value* Mask = CI.getArgOperand(i: 3); |
2157 | |
2158 | Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1)); |
2159 | Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode); |
2160 | Value* = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0); |
2161 | Value* = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0); |
2162 | Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2); |
2163 | return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0); |
2164 | } |
2165 | |
2166 | static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) { |
2167 | Value* Op = CI.getArgOperand(i: 0); |
2168 | Type* ReturnOp = CI.getType(); |
2169 | unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements(); |
2170 | Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts); |
2171 | return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2" ); |
2172 | } |
2173 | |
2174 | // Replace intrinsic with unmasked version and a select. |
2175 | static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, |
2176 | CallBase &CI, Value *&Rep) { |
2177 | Name = Name.substr(Start: 12); // Remove avx512.mask. |
2178 | |
2179 | unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); |
2180 | unsigned EltWidth = CI.getType()->getScalarSizeInBits(); |
2181 | Intrinsic::ID IID; |
2182 | if (Name.starts_with(Prefix: "max.p" )) { |
2183 | if (VecWidth == 128 && EltWidth == 32) |
2184 | IID = Intrinsic::x86_sse_max_ps; |
2185 | else if (VecWidth == 128 && EltWidth == 64) |
2186 | IID = Intrinsic::x86_sse2_max_pd; |
2187 | else if (VecWidth == 256 && EltWidth == 32) |
2188 | IID = Intrinsic::x86_avx_max_ps_256; |
2189 | else if (VecWidth == 256 && EltWidth == 64) |
2190 | IID = Intrinsic::x86_avx_max_pd_256; |
2191 | else |
2192 | llvm_unreachable("Unexpected intrinsic" ); |
2193 | } else if (Name.starts_with(Prefix: "min.p" )) { |
2194 | if (VecWidth == 128 && EltWidth == 32) |
2195 | IID = Intrinsic::x86_sse_min_ps; |
2196 | else if (VecWidth == 128 && EltWidth == 64) |
2197 | IID = Intrinsic::x86_sse2_min_pd; |
2198 | else if (VecWidth == 256 && EltWidth == 32) |
2199 | IID = Intrinsic::x86_avx_min_ps_256; |
2200 | else if (VecWidth == 256 && EltWidth == 64) |
2201 | IID = Intrinsic::x86_avx_min_pd_256; |
2202 | else |
2203 | llvm_unreachable("Unexpected intrinsic" ); |
2204 | } else if (Name.starts_with(Prefix: "pshuf.b." )) { |
2205 | if (VecWidth == 128) |
2206 | IID = Intrinsic::x86_ssse3_pshuf_b_128; |
2207 | else if (VecWidth == 256) |
2208 | IID = Intrinsic::x86_avx2_pshuf_b; |
2209 | else if (VecWidth == 512) |
2210 | IID = Intrinsic::x86_avx512_pshuf_b_512; |
2211 | else |
2212 | llvm_unreachable("Unexpected intrinsic" ); |
2213 | } else if (Name.starts_with(Prefix: "pmul.hr.sw." )) { |
2214 | if (VecWidth == 128) |
2215 | IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; |
2216 | else if (VecWidth == 256) |
2217 | IID = Intrinsic::x86_avx2_pmul_hr_sw; |
2218 | else if (VecWidth == 512) |
2219 | IID = Intrinsic::x86_avx512_pmul_hr_sw_512; |
2220 | else |
2221 | llvm_unreachable("Unexpected intrinsic" ); |
2222 | } else if (Name.starts_with(Prefix: "pmulh.w." )) { |
2223 | if (VecWidth == 128) |
2224 | IID = Intrinsic::x86_sse2_pmulh_w; |
2225 | else if (VecWidth == 256) |
2226 | IID = Intrinsic::x86_avx2_pmulh_w; |
2227 | else if (VecWidth == 512) |
2228 | IID = Intrinsic::x86_avx512_pmulh_w_512; |
2229 | else |
2230 | llvm_unreachable("Unexpected intrinsic" ); |
2231 | } else if (Name.starts_with(Prefix: "pmulhu.w." )) { |
2232 | if (VecWidth == 128) |
2233 | IID = Intrinsic::x86_sse2_pmulhu_w; |
2234 | else if (VecWidth == 256) |
2235 | IID = Intrinsic::x86_avx2_pmulhu_w; |
2236 | else if (VecWidth == 512) |
2237 | IID = Intrinsic::x86_avx512_pmulhu_w_512; |
2238 | else |
2239 | llvm_unreachable("Unexpected intrinsic" ); |
2240 | } else if (Name.starts_with(Prefix: "pmaddw.d." )) { |
2241 | if (VecWidth == 128) |
2242 | IID = Intrinsic::x86_sse2_pmadd_wd; |
2243 | else if (VecWidth == 256) |
2244 | IID = Intrinsic::x86_avx2_pmadd_wd; |
2245 | else if (VecWidth == 512) |
2246 | IID = Intrinsic::x86_avx512_pmaddw_d_512; |
2247 | else |
2248 | llvm_unreachable("Unexpected intrinsic" ); |
2249 | } else if (Name.starts_with(Prefix: "pmaddubs.w." )) { |
2250 | if (VecWidth == 128) |
2251 | IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; |
2252 | else if (VecWidth == 256) |
2253 | IID = Intrinsic::x86_avx2_pmadd_ub_sw; |
2254 | else if (VecWidth == 512) |
2255 | IID = Intrinsic::x86_avx512_pmaddubs_w_512; |
2256 | else |
2257 | llvm_unreachable("Unexpected intrinsic" ); |
2258 | } else if (Name.starts_with(Prefix: "packsswb." )) { |
2259 | if (VecWidth == 128) |
2260 | IID = Intrinsic::x86_sse2_packsswb_128; |
2261 | else if (VecWidth == 256) |
2262 | IID = Intrinsic::x86_avx2_packsswb; |
2263 | else if (VecWidth == 512) |
2264 | IID = Intrinsic::x86_avx512_packsswb_512; |
2265 | else |
2266 | llvm_unreachable("Unexpected intrinsic" ); |
2267 | } else if (Name.starts_with(Prefix: "packssdw." )) { |
2268 | if (VecWidth == 128) |
2269 | IID = Intrinsic::x86_sse2_packssdw_128; |
2270 | else if (VecWidth == 256) |
2271 | IID = Intrinsic::x86_avx2_packssdw; |
2272 | else if (VecWidth == 512) |
2273 | IID = Intrinsic::x86_avx512_packssdw_512; |
2274 | else |
2275 | llvm_unreachable("Unexpected intrinsic" ); |
2276 | } else if (Name.starts_with(Prefix: "packuswb." )) { |
2277 | if (VecWidth == 128) |
2278 | IID = Intrinsic::x86_sse2_packuswb_128; |
2279 | else if (VecWidth == 256) |
2280 | IID = Intrinsic::x86_avx2_packuswb; |
2281 | else if (VecWidth == 512) |
2282 | IID = Intrinsic::x86_avx512_packuswb_512; |
2283 | else |
2284 | llvm_unreachable("Unexpected intrinsic" ); |
2285 | } else if (Name.starts_with(Prefix: "packusdw." )) { |
2286 | if (VecWidth == 128) |
2287 | IID = Intrinsic::x86_sse41_packusdw; |
2288 | else if (VecWidth == 256) |
2289 | IID = Intrinsic::x86_avx2_packusdw; |
2290 | else if (VecWidth == 512) |
2291 | IID = Intrinsic::x86_avx512_packusdw_512; |
2292 | else |
2293 | llvm_unreachable("Unexpected intrinsic" ); |
2294 | } else if (Name.starts_with(Prefix: "vpermilvar." )) { |
2295 | if (VecWidth == 128 && EltWidth == 32) |
2296 | IID = Intrinsic::x86_avx_vpermilvar_ps; |
2297 | else if (VecWidth == 128 && EltWidth == 64) |
2298 | IID = Intrinsic::x86_avx_vpermilvar_pd; |
2299 | else if (VecWidth == 256 && EltWidth == 32) |
2300 | IID = Intrinsic::x86_avx_vpermilvar_ps_256; |
2301 | else if (VecWidth == 256 && EltWidth == 64) |
2302 | IID = Intrinsic::x86_avx_vpermilvar_pd_256; |
2303 | else if (VecWidth == 512 && EltWidth == 32) |
2304 | IID = Intrinsic::x86_avx512_vpermilvar_ps_512; |
2305 | else if (VecWidth == 512 && EltWidth == 64) |
2306 | IID = Intrinsic::x86_avx512_vpermilvar_pd_512; |
2307 | else |
2308 | llvm_unreachable("Unexpected intrinsic" ); |
2309 | } else if (Name == "cvtpd2dq.256" ) { |
2310 | IID = Intrinsic::x86_avx_cvt_pd2dq_256; |
2311 | } else if (Name == "cvtpd2ps.256" ) { |
2312 | IID = Intrinsic::x86_avx_cvt_pd2_ps_256; |
2313 | } else if (Name == "cvttpd2dq.256" ) { |
2314 | IID = Intrinsic::x86_avx_cvtt_pd2dq_256; |
2315 | } else if (Name == "cvttps2dq.128" ) { |
2316 | IID = Intrinsic::x86_sse2_cvttps2dq; |
2317 | } else if (Name == "cvttps2dq.256" ) { |
2318 | IID = Intrinsic::x86_avx_cvtt_ps2dq_256; |
2319 | } else if (Name.starts_with(Prefix: "permvar." )) { |
2320 | bool IsFloat = CI.getType()->isFPOrFPVectorTy(); |
2321 | if (VecWidth == 256 && EltWidth == 32 && IsFloat) |
2322 | IID = Intrinsic::x86_avx2_permps; |
2323 | else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) |
2324 | IID = Intrinsic::x86_avx2_permd; |
2325 | else if (VecWidth == 256 && EltWidth == 64 && IsFloat) |
2326 | IID = Intrinsic::x86_avx512_permvar_df_256; |
2327 | else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) |
2328 | IID = Intrinsic::x86_avx512_permvar_di_256; |
2329 | else if (VecWidth == 512 && EltWidth == 32 && IsFloat) |
2330 | IID = Intrinsic::x86_avx512_permvar_sf_512; |
2331 | else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) |
2332 | IID = Intrinsic::x86_avx512_permvar_si_512; |
2333 | else if (VecWidth == 512 && EltWidth == 64 && IsFloat) |
2334 | IID = Intrinsic::x86_avx512_permvar_df_512; |
2335 | else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) |
2336 | IID = Intrinsic::x86_avx512_permvar_di_512; |
2337 | else if (VecWidth == 128 && EltWidth == 16) |
2338 | IID = Intrinsic::x86_avx512_permvar_hi_128; |
2339 | else if (VecWidth == 256 && EltWidth == 16) |
2340 | IID = Intrinsic::x86_avx512_permvar_hi_256; |
2341 | else if (VecWidth == 512 && EltWidth == 16) |
2342 | IID = Intrinsic::x86_avx512_permvar_hi_512; |
2343 | else if (VecWidth == 128 && EltWidth == 8) |
2344 | IID = Intrinsic::x86_avx512_permvar_qi_128; |
2345 | else if (VecWidth == 256 && EltWidth == 8) |
2346 | IID = Intrinsic::x86_avx512_permvar_qi_256; |
2347 | else if (VecWidth == 512 && EltWidth == 8) |
2348 | IID = Intrinsic::x86_avx512_permvar_qi_512; |
2349 | else |
2350 | llvm_unreachable("Unexpected intrinsic" ); |
2351 | } else if (Name.starts_with(Prefix: "dbpsadbw." )) { |
2352 | if (VecWidth == 128) |
2353 | IID = Intrinsic::x86_avx512_dbpsadbw_128; |
2354 | else if (VecWidth == 256) |
2355 | IID = Intrinsic::x86_avx512_dbpsadbw_256; |
2356 | else if (VecWidth == 512) |
2357 | IID = Intrinsic::x86_avx512_dbpsadbw_512; |
2358 | else |
2359 | llvm_unreachable("Unexpected intrinsic" ); |
2360 | } else if (Name.starts_with(Prefix: "pmultishift.qb." )) { |
2361 | if (VecWidth == 128) |
2362 | IID = Intrinsic::x86_avx512_pmultishift_qb_128; |
2363 | else if (VecWidth == 256) |
2364 | IID = Intrinsic::x86_avx512_pmultishift_qb_256; |
2365 | else if (VecWidth == 512) |
2366 | IID = Intrinsic::x86_avx512_pmultishift_qb_512; |
2367 | else |
2368 | llvm_unreachable("Unexpected intrinsic" ); |
2369 | } else if (Name.starts_with(Prefix: "conflict." )) { |
2370 | if (Name[9] == 'd' && VecWidth == 128) |
2371 | IID = Intrinsic::x86_avx512_conflict_d_128; |
2372 | else if (Name[9] == 'd' && VecWidth == 256) |
2373 | IID = Intrinsic::x86_avx512_conflict_d_256; |
2374 | else if (Name[9] == 'd' && VecWidth == 512) |
2375 | IID = Intrinsic::x86_avx512_conflict_d_512; |
2376 | else if (Name[9] == 'q' && VecWidth == 128) |
2377 | IID = Intrinsic::x86_avx512_conflict_q_128; |
2378 | else if (Name[9] == 'q' && VecWidth == 256) |
2379 | IID = Intrinsic::x86_avx512_conflict_q_256; |
2380 | else if (Name[9] == 'q' && VecWidth == 512) |
2381 | IID = Intrinsic::x86_avx512_conflict_q_512; |
2382 | else |
2383 | llvm_unreachable("Unexpected intrinsic" ); |
2384 | } else if (Name.starts_with(Prefix: "pavg." )) { |
2385 | if (Name[5] == 'b' && VecWidth == 128) |
2386 | IID = Intrinsic::x86_sse2_pavg_b; |
2387 | else if (Name[5] == 'b' && VecWidth == 256) |
2388 | IID = Intrinsic::x86_avx2_pavg_b; |
2389 | else if (Name[5] == 'b' && VecWidth == 512) |
2390 | IID = Intrinsic::x86_avx512_pavg_b_512; |
2391 | else if (Name[5] == 'w' && VecWidth == 128) |
2392 | IID = Intrinsic::x86_sse2_pavg_w; |
2393 | else if (Name[5] == 'w' && VecWidth == 256) |
2394 | IID = Intrinsic::x86_avx2_pavg_w; |
2395 | else if (Name[5] == 'w' && VecWidth == 512) |
2396 | IID = Intrinsic::x86_avx512_pavg_w_512; |
2397 | else |
2398 | llvm_unreachable("Unexpected intrinsic" ); |
2399 | } else |
2400 | return false; |
2401 | |
2402 | SmallVector<Value *, 4> Args(CI.args()); |
2403 | Args.pop_back(); |
2404 | Args.pop_back(); |
2405 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
2406 | unsigned NumArgs = CI.arg_size(); |
2407 | Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep, |
2408 | Op1: CI.getArgOperand(i: NumArgs - 2)); |
2409 | return true; |
2410 | } |
2411 | |
2412 | /// Upgrade comment in call to inline asm that represents an objc retain release |
2413 | /// marker. |
2414 | void llvm::UpgradeInlineAsmString(std::string *AsmStr) { |
2415 | size_t Pos; |
2416 | if (AsmStr->find(s: "mov\tfp" ) == 0 && |
2417 | AsmStr->find(s: "objc_retainAutoreleaseReturnValue" ) != std::string::npos && |
2418 | (Pos = AsmStr->find(s: "# marker" )) != std::string::npos) { |
2419 | AsmStr->replace(pos: Pos, n1: 1, s: ";" ); |
2420 | } |
2421 | } |
2422 | |
2423 | static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, |
2424 | Function *F, IRBuilder<> &Builder) { |
2425 | Value *Rep = nullptr; |
2426 | |
2427 | if (Name == "abs.i" || Name == "abs.ll" ) { |
2428 | Value *Arg = CI->getArgOperand(i: 0); |
2429 | Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg" ); |
2430 | Value *Cmp = Builder.CreateICmpSGE( |
2431 | LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond" ); |
2432 | Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs" ); |
2433 | } else if (Name == "abs.bf16" || Name == "abs.bf16x2" ) { |
2434 | Type *Ty = (Name == "abs.bf16" ) |
2435 | ? Builder.getBFloatTy() |
2436 | : FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 2); |
2437 | Value *Arg = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Ty); |
2438 | Value *Abs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::nvvm_fabs, V: Arg); |
2439 | Rep = Builder.CreateBitCast(V: Abs, DestTy: CI->getType()); |
2440 | } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d" ) { |
2441 | Intrinsic::ID IID = (Name == "fabs.ftz.f" ) ? Intrinsic::nvvm_fabs_ftz |
2442 | : Intrinsic::nvvm_fabs; |
2443 | Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0)); |
2444 | } else if (Name.starts_with(Prefix: "atomic.load.add.f32.p" ) || |
2445 | Name.starts_with(Prefix: "atomic.load.add.f64.p" )) { |
2446 | Value *Ptr = CI->getArgOperand(i: 0); |
2447 | Value *Val = CI->getArgOperand(i: 1); |
2448 | Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(), |
2449 | Ordering: AtomicOrdering::SequentiallyConsistent); |
2450 | } else if (Name.starts_with(Prefix: "atomic.load.inc.32.p" ) || |
2451 | Name.starts_with(Prefix: "atomic.load.dec.32.p" )) { |
2452 | Value *Ptr = CI->getArgOperand(i: 0); |
2453 | Value *Val = CI->getArgOperand(i: 1); |
2454 | auto Op = Name.starts_with(Prefix: "atomic.load.inc" ) ? AtomicRMWInst::UIncWrap |
2455 | : AtomicRMWInst::UDecWrap; |
2456 | Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, Align: MaybeAlign(), |
2457 | Ordering: AtomicOrdering::SequentiallyConsistent); |
2458 | } else if (Name.consume_front(Prefix: "max." ) && |
2459 | (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
2460 | Name == "ui" || Name == "ull" )) { |
2461 | Value *Arg0 = CI->getArgOperand(i: 0); |
2462 | Value *Arg1 = CI->getArgOperand(i: 1); |
2463 | Value *Cmp = Name.starts_with(Prefix: "u" ) |
2464 | ? Builder.CreateICmpUGE(LHS: Arg0, RHS: Arg1, Name: "max.cond" ) |
2465 | : Builder.CreateICmpSGE(LHS: Arg0, RHS: Arg1, Name: "max.cond" ); |
2466 | Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "max" ); |
2467 | } else if (Name.consume_front(Prefix: "min." ) && |
2468 | (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
2469 | Name == "ui" || Name == "ull" )) { |
2470 | Value *Arg0 = CI->getArgOperand(i: 0); |
2471 | Value *Arg1 = CI->getArgOperand(i: 1); |
2472 | Value *Cmp = Name.starts_with(Prefix: "u" ) |
2473 | ? Builder.CreateICmpULE(LHS: Arg0, RHS: Arg1, Name: "min.cond" ) |
2474 | : Builder.CreateICmpSLE(LHS: Arg0, RHS: Arg1, Name: "min.cond" ); |
2475 | Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "min" ); |
2476 | } else if (Name == "clz.ll" ) { |
2477 | // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. |
2478 | Value *Arg = CI->getArgOperand(i: 0); |
2479 | Value *Ctlz = Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Arg->getType()}, |
2480 | Args: {Arg, Builder.getFalse()}, |
2481 | /*FMFSource=*/nullptr, Name: "ctlz" ); |
2482 | Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc" ); |
2483 | } else if (Name == "popc.ll" ) { |
2484 | // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an |
2485 | // i64. |
2486 | Value *Arg = CI->getArgOperand(i: 0); |
2487 | Value *Popc = Builder.CreateIntrinsic(ID: Intrinsic::ctpop, Types: {Arg->getType()}, |
2488 | Args: Arg, /*FMFSource=*/nullptr, Name: "ctpop" ); |
2489 | Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc" ); |
2490 | } else if (Name == "h2f" ) { |
2491 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::convert_from_fp16, |
2492 | Types: {Builder.getFloatTy()}, Args: CI->getArgOperand(i: 0), |
2493 | /*FMFSource=*/nullptr, Name: "h2f" ); |
2494 | } else if (Name.consume_front(Prefix: "bitcast." ) && |
2495 | (Name == "f2i" || Name == "i2f" || Name == "ll2d" || |
2496 | Name == "d2ll" )) { |
2497 | Rep = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType()); |
2498 | } else if (Name == "rotate.b32" ) { |
2499 | Value *Arg = CI->getOperand(i_nocapture: 0); |
2500 | Value *ShiftAmt = CI->getOperand(i_nocapture: 1); |
2501 | Rep = Builder.CreateIntrinsic(RetTy: Builder.getInt32Ty(), ID: Intrinsic::fshl, |
2502 | Args: {Arg, Arg, ShiftAmt}); |
2503 | } else if (Name == "rotate.b64" ) { |
2504 | Type *Int64Ty = Builder.getInt64Ty(); |
2505 | Value *Arg = CI->getOperand(i_nocapture: 0); |
2506 | Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty); |
2507 | Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl, |
2508 | Args: {Arg, Arg, ZExtShiftAmt}); |
2509 | } else if (Name == "rotate.right.b64" ) { |
2510 | Type *Int64Ty = Builder.getInt64Ty(); |
2511 | Value *Arg = CI->getOperand(i_nocapture: 0); |
2512 | Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty); |
2513 | Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshr, |
2514 | Args: {Arg, Arg, ZExtShiftAmt}); |
2515 | } else if (Name == "swap.lo.hi.b64" ) { |
2516 | Type *Int64Ty = Builder.getInt64Ty(); |
2517 | Value *Arg = CI->getOperand(i_nocapture: 0); |
2518 | Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl, |
2519 | Args: {Arg, Arg, Builder.getInt64(C: 32)}); |
2520 | } else if ((Name.consume_front(Prefix: "ptr.gen.to." ) && |
2521 | consumeNVVMPtrAddrSpace(Name)) || |
2522 | (Name.consume_front(Prefix: "ptr." ) && consumeNVVMPtrAddrSpace(Name) && |
2523 | Name.starts_with(Prefix: ".to.gen" ))) { |
2524 | Rep = Builder.CreateAddrSpaceCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType()); |
2525 | } else if (Name.consume_front(Prefix: "ldg.global" )) { |
2526 | Value *Ptr = CI->getArgOperand(i: 0); |
2527 | Align PtrAlign = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getAlignValue(); |
2528 | // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL |
2529 | Value *ASC = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Builder.getPtrTy(AddrSpace: 1)); |
2530 | Instruction *LD = Builder.CreateAlignedLoad(Ty: CI->getType(), Ptr: ASC, Align: PtrAlign); |
2531 | MDNode *MD = MDNode::get(Context&: Builder.getContext(), MDs: {}); |
2532 | LD->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD); |
2533 | return LD; |
2534 | } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync" ) { |
2535 | Value *Arg = |
2536 | Name.ends_with(Suffix: '0') ? Builder.getInt32(C: 0) : CI->getArgOperand(i: 0); |
2537 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_aligned_all, |
2538 | Types: {}, Args: {Arg}); |
2539 | } else if (Name == "barrier" ) { |
2540 | Rep = Builder.CreateIntrinsic( |
2541 | ID: Intrinsic::nvvm_barrier_cta_sync_aligned_count, Types: {}, |
2542 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)}); |
2543 | } else if (Name == "barrier.sync" ) { |
2544 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_all, Types: {}, |
2545 | Args: {CI->getArgOperand(i: 0)}); |
2546 | } else if (Name == "barrier.sync.cnt" ) { |
2547 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_count, Types: {}, |
2548 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)}); |
2549 | } else { |
2550 | Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); |
2551 | if (IID != Intrinsic::not_intrinsic && |
2552 | !F->getReturnType()->getScalarType()->isBFloatTy()) { |
2553 | rename(GV: F); |
2554 | Function *NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID); |
2555 | SmallVector<Value *, 2> Args; |
2556 | for (size_t I = 0; I < NewFn->arg_size(); ++I) { |
2557 | Value *Arg = CI->getArgOperand(i: I); |
2558 | Type *OldType = Arg->getType(); |
2559 | Type *NewType = NewFn->getArg(i: I)->getType(); |
2560 | Args.push_back( |
2561 | Elt: (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy()) |
2562 | ? Builder.CreateBitCast(V: Arg, DestTy: NewType) |
2563 | : Arg); |
2564 | } |
2565 | Rep = Builder.CreateCall(Callee: NewFn, Args); |
2566 | if (F->getReturnType()->isIntegerTy()) |
2567 | Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType()); |
2568 | } |
2569 | } |
2570 | |
2571 | return Rep; |
2572 | } |
2573 | |
2574 | static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, |
2575 | IRBuilder<> &Builder) { |
2576 | LLVMContext &C = F->getContext(); |
2577 | Value *Rep = nullptr; |
2578 | |
2579 | if (Name.starts_with(Prefix: "sse4a.movnt." )) { |
2580 | SmallVector<Metadata *, 1> Elts; |
2581 | Elts.push_back( |
2582 | Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
2583 | MDNode *Node = MDNode::get(Context&: C, MDs: Elts); |
2584 | |
2585 | Value *Arg0 = CI->getArgOperand(i: 0); |
2586 | Value *Arg1 = CI->getArgOperand(i: 1); |
2587 | |
2588 | // Nontemporal (unaligned) store of the 0'th element of the float/double |
2589 | // vector. |
2590 | Value * = |
2591 | Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement" ); |
2592 | |
2593 | StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Arg0, Align: Align(1)); |
2594 | SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
2595 | } else if (Name.starts_with(Prefix: "avx.movnt." ) || |
2596 | Name.starts_with(Prefix: "avx512.storent." )) { |
2597 | SmallVector<Metadata *, 1> Elts; |
2598 | Elts.push_back( |
2599 | Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
2600 | MDNode *Node = MDNode::get(Context&: C, MDs: Elts); |
2601 | |
2602 | Value *Arg0 = CI->getArgOperand(i: 0); |
2603 | Value *Arg1 = CI->getArgOperand(i: 1); |
2604 | |
2605 | StoreInst *SI = Builder.CreateAlignedStore( |
2606 | Val: Arg1, Ptr: Arg0, |
2607 | Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); |
2608 | SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
2609 | } else if (Name == "sse2.storel.dq" ) { |
2610 | Value *Arg0 = CI->getArgOperand(i: 0); |
2611 | Value *Arg1 = CI->getArgOperand(i: 1); |
2612 | |
2613 | auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2); |
2614 | Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast" ); |
2615 | Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0); |
2616 | Builder.CreateAlignedStore(Val: Elt, Ptr: Arg0, Align: Align(1)); |
2617 | } else if (Name.starts_with(Prefix: "sse.storeu." ) || |
2618 | Name.starts_with(Prefix: "sse2.storeu." ) || |
2619 | Name.starts_with(Prefix: "avx.storeu." )) { |
2620 | Value *Arg0 = CI->getArgOperand(i: 0); |
2621 | Value *Arg1 = CI->getArgOperand(i: 1); |
2622 | Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1)); |
2623 | } else if (Name == "avx512.mask.store.ss" ) { |
2624 | Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1)); |
2625 | upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1), |
2626 | Mask, Aligned: false); |
2627 | } else if (Name.starts_with(Prefix: "avx512.mask.store" )) { |
2628 | // "avx512.mask.storeu." or "avx512.mask.store." |
2629 | bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". |
2630 | upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1), |
2631 | Mask: CI->getArgOperand(i: 2), Aligned); |
2632 | } else if (Name.starts_with(Prefix: "sse2.pcmp" ) || Name.starts_with(Prefix: "avx2.pcmp" )) { |
2633 | // Upgrade packed integer vector compare intrinsics to compare instructions. |
2634 | // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." |
2635 | bool CmpEq = Name[9] == 'e'; |
2636 | Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, |
2637 | LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
2638 | Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "" ); |
2639 | } else if (Name.starts_with(Prefix: "avx512.broadcastm" )) { |
2640 | Type *ExtTy = Type::getInt32Ty(C); |
2641 | if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8)) |
2642 | ExtTy = Type::getInt64Ty(C); |
2643 | unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / |
2644 | ExtTy->getPrimitiveSizeInBits(); |
2645 | Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy); |
2646 | Rep = Builder.CreateVectorSplat(NumElts, V: Rep); |
2647 | } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd" ) { |
2648 | Value *Vec = CI->getArgOperand(i: 0); |
2649 | Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0); |
2650 | Elt0 = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: Elt0->getType(), Args: Elt0); |
2651 | Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0); |
2652 | } else if (Name.starts_with(Prefix: "avx.sqrt.p" ) || |
2653 | Name.starts_with(Prefix: "sse2.sqrt.p" ) || |
2654 | Name.starts_with(Prefix: "sse.sqrt.p" )) { |
2655 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(), |
2656 | Args: {CI->getArgOperand(i: 0)}); |
2657 | } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p" )) { |
2658 | if (CI->arg_size() == 4 && |
2659 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) || |
2660 | cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) { |
2661 | Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 |
2662 | : Intrinsic::x86_avx512_sqrt_pd_512; |
2663 | |
2664 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)}; |
2665 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
2666 | } else { |
2667 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(), |
2668 | Args: {CI->getArgOperand(i: 0)}); |
2669 | } |
2670 | Rep = |
2671 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
2672 | } else if (Name.starts_with(Prefix: "avx512.ptestm" ) || |
2673 | Name.starts_with(Prefix: "avx512.ptestnm" )) { |
2674 | Value *Op0 = CI->getArgOperand(i: 0); |
2675 | Value *Op1 = CI->getArgOperand(i: 1); |
2676 | Value *Mask = CI->getArgOperand(i: 2); |
2677 | Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1); |
2678 | llvm::Type *Ty = Op0->getType(); |
2679 | Value *Zero = llvm::Constant::getNullValue(Ty); |
2680 | ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm" ) |
2681 | ? ICmpInst::ICMP_NE |
2682 | : ICmpInst::ICMP_EQ; |
2683 | Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero); |
2684 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask); |
2685 | } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast" )) { |
2686 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType()) |
2687 | ->getNumElements(); |
2688 | Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0)); |
2689 | Rep = |
2690 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
2691 | } else if (Name.starts_with(Prefix: "avx512.kunpck" )) { |
2692 | unsigned NumElts = CI->getType()->getScalarSizeInBits(); |
2693 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts); |
2694 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts); |
2695 | int Indices[64]; |
2696 | for (unsigned i = 0; i != NumElts; ++i) |
2697 | Indices[i] = i; |
2698 | |
2699 | // First extract half of each vector. This gives better codegen than |
2700 | // doing it in a single shuffle. |
2701 | LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2)); |
2702 | RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2)); |
2703 | // Concat the vectors. |
2704 | // NOTE: Operands have to be swapped to match intrinsic definition. |
2705 | Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts)); |
2706 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2707 | } else if (Name == "avx512.kand.w" ) { |
2708 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2709 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2710 | Rep = Builder.CreateAnd(LHS, RHS); |
2711 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2712 | } else if (Name == "avx512.kandn.w" ) { |
2713 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2714 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2715 | LHS = Builder.CreateNot(V: LHS); |
2716 | Rep = Builder.CreateAnd(LHS, RHS); |
2717 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2718 | } else if (Name == "avx512.kor.w" ) { |
2719 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2720 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2721 | Rep = Builder.CreateOr(LHS, RHS); |
2722 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2723 | } else if (Name == "avx512.kxor.w" ) { |
2724 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2725 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2726 | Rep = Builder.CreateXor(LHS, RHS); |
2727 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2728 | } else if (Name == "avx512.kxnor.w" ) { |
2729 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2730 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2731 | LHS = Builder.CreateNot(V: LHS); |
2732 | Rep = Builder.CreateXor(LHS, RHS); |
2733 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2734 | } else if (Name == "avx512.knot.w" ) { |
2735 | Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2736 | Rep = Builder.CreateNot(V: Rep); |
2737 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2738 | } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w" ) { |
2739 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2740 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2741 | Rep = Builder.CreateOr(LHS, RHS); |
2742 | Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty()); |
2743 | Value *C; |
2744 | if (Name[14] == 'c') |
2745 | C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty()); |
2746 | else |
2747 | C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty()); |
2748 | Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C); |
2749 | Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty()); |
2750 | } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" || |
2751 | Name == "sse.sub.ss" || Name == "sse2.sub.sd" || |
2752 | Name == "sse.mul.ss" || Name == "sse2.mul.sd" || |
2753 | Name == "sse.div.ss" || Name == "sse2.div.sd" ) { |
2754 | Type *I32Ty = Type::getInt32Ty(C); |
2755 | Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0), |
2756 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2757 | Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), |
2758 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2759 | Value *EltOp; |
2760 | if (Name.contains(Other: ".add." )) |
2761 | EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1); |
2762 | else if (Name.contains(Other: ".sub." )) |
2763 | EltOp = Builder.CreateFSub(L: Elt0, R: Elt1); |
2764 | else if (Name.contains(Other: ".mul." )) |
2765 | EltOp = Builder.CreateFMul(L: Elt0, R: Elt1); |
2766 | else |
2767 | EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1); |
2768 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp, |
2769 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2770 | } else if (Name.starts_with(Prefix: "avx512.mask.pcmp" )) { |
2771 | // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." |
2772 | bool CmpEq = Name[16] == 'e'; |
2773 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true); |
2774 | } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb." )) { |
2775 | Type *OpTy = CI->getArgOperand(i: 0)->getType(); |
2776 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2777 | Intrinsic::ID IID; |
2778 | switch (VecWidth) { |
2779 | default: |
2780 | llvm_unreachable("Unexpected intrinsic" ); |
2781 | case 128: |
2782 | IID = Intrinsic::x86_avx512_vpshufbitqmb_128; |
2783 | break; |
2784 | case 256: |
2785 | IID = Intrinsic::x86_avx512_vpshufbitqmb_256; |
2786 | break; |
2787 | case 512: |
2788 | IID = Intrinsic::x86_avx512_vpshufbitqmb_512; |
2789 | break; |
2790 | } |
2791 | |
2792 | Rep = |
2793 | Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)}); |
2794 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2)); |
2795 | } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p" )) { |
2796 | Type *OpTy = CI->getArgOperand(i: 0)->getType(); |
2797 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2798 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
2799 | Intrinsic::ID IID; |
2800 | if (VecWidth == 128 && EltWidth == 32) |
2801 | IID = Intrinsic::x86_avx512_fpclass_ps_128; |
2802 | else if (VecWidth == 256 && EltWidth == 32) |
2803 | IID = Intrinsic::x86_avx512_fpclass_ps_256; |
2804 | else if (VecWidth == 512 && EltWidth == 32) |
2805 | IID = Intrinsic::x86_avx512_fpclass_ps_512; |
2806 | else if (VecWidth == 128 && EltWidth == 64) |
2807 | IID = Intrinsic::x86_avx512_fpclass_pd_128; |
2808 | else if (VecWidth == 256 && EltWidth == 64) |
2809 | IID = Intrinsic::x86_avx512_fpclass_pd_256; |
2810 | else if (VecWidth == 512 && EltWidth == 64) |
2811 | IID = Intrinsic::x86_avx512_fpclass_pd_512; |
2812 | else |
2813 | llvm_unreachable("Unexpected intrinsic" ); |
2814 | |
2815 | Rep = |
2816 | Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)}); |
2817 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2)); |
2818 | } else if (Name.starts_with(Prefix: "avx512.cmp.p" )) { |
2819 | SmallVector<Value *, 4> Args(CI->args()); |
2820 | Type *OpTy = Args[0]->getType(); |
2821 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2822 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
2823 | Intrinsic::ID IID; |
2824 | if (VecWidth == 128 && EltWidth == 32) |
2825 | IID = Intrinsic::x86_avx512_mask_cmp_ps_128; |
2826 | else if (VecWidth == 256 && EltWidth == 32) |
2827 | IID = Intrinsic::x86_avx512_mask_cmp_ps_256; |
2828 | else if (VecWidth == 512 && EltWidth == 32) |
2829 | IID = Intrinsic::x86_avx512_mask_cmp_ps_512; |
2830 | else if (VecWidth == 128 && EltWidth == 64) |
2831 | IID = Intrinsic::x86_avx512_mask_cmp_pd_128; |
2832 | else if (VecWidth == 256 && EltWidth == 64) |
2833 | IID = Intrinsic::x86_avx512_mask_cmp_pd_256; |
2834 | else if (VecWidth == 512 && EltWidth == 64) |
2835 | IID = Intrinsic::x86_avx512_mask_cmp_pd_512; |
2836 | else |
2837 | llvm_unreachable("Unexpected intrinsic" ); |
2838 | |
2839 | Value *Mask = Constant::getAllOnesValue(Ty: CI->getType()); |
2840 | if (VecWidth == 512) |
2841 | std::swap(a&: Mask, b&: Args.back()); |
2842 | Args.push_back(Elt: Mask); |
2843 | |
2844 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
2845 | } else if (Name.starts_with(Prefix: "avx512.mask.cmp." )) { |
2846 | // Integer compare intrinsics. |
2847 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2848 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true); |
2849 | } else if (Name.starts_with(Prefix: "avx512.mask.ucmp." )) { |
2850 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2851 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false); |
2852 | } else if (Name.starts_with(Prefix: "avx512.cvtb2mask." ) || |
2853 | Name.starts_with(Prefix: "avx512.cvtw2mask." ) || |
2854 | Name.starts_with(Prefix: "avx512.cvtd2mask." ) || |
2855 | Name.starts_with(Prefix: "avx512.cvtq2mask." )) { |
2856 | Value *Op = CI->getArgOperand(i: 0); |
2857 | Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType()); |
2858 | Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero); |
2859 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr); |
2860 | } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || |
2861 | Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs" ) || |
2862 | Name.starts_with(Prefix: "avx512.mask.pabs" )) { |
2863 | Rep = upgradeAbs(Builder, CI&: *CI); |
2864 | } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || |
2865 | Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs" ) || |
2866 | Name.starts_with(Prefix: "avx512.mask.pmaxs" )) { |
2867 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax); |
2868 | } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" || |
2869 | Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu" ) || |
2870 | Name.starts_with(Prefix: "avx512.mask.pmaxu" )) { |
2871 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax); |
2872 | } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" || |
2873 | Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins" ) || |
2874 | Name.starts_with(Prefix: "avx512.mask.pmins" )) { |
2875 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin); |
2876 | } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" || |
2877 | Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu" ) || |
2878 | Name.starts_with(Prefix: "avx512.mask.pminu" )) { |
2879 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin); |
2880 | } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" || |
2881 | Name == "avx512.pmulu.dq.512" || |
2882 | Name.starts_with(Prefix: "avx512.mask.pmulu.dq." )) { |
2883 | Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false); |
2884 | } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" || |
2885 | Name == "avx512.pmul.dq.512" || |
2886 | Name.starts_with(Prefix: "avx512.mask.pmul.dq." )) { |
2887 | Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true); |
2888 | } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" || |
2889 | Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd" ) { |
2890 | Rep = |
2891 | Builder.CreateSIToFP(V: CI->getArgOperand(i: 1), |
2892 | DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2893 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2894 | } else if (Name == "avx512.cvtusi2sd" ) { |
2895 | Rep = |
2896 | Builder.CreateUIToFP(V: CI->getArgOperand(i: 1), |
2897 | DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2898 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2899 | } else if (Name == "sse2.cvtss2sd" ) { |
2900 | Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0); |
2901 | Rep = Builder.CreateFPExt( |
2902 | V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2903 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2904 | } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" || |
2905 | Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" || |
2906 | Name.starts_with(Prefix: "avx512.mask.cvtdq2pd." ) || |
2907 | Name.starts_with(Prefix: "avx512.mask.cvtudq2pd." ) || |
2908 | Name.starts_with(Prefix: "avx512.mask.cvtdq2ps." ) || |
2909 | Name.starts_with(Prefix: "avx512.mask.cvtudq2ps." ) || |
2910 | Name.starts_with(Prefix: "avx512.mask.cvtqq2pd." ) || |
2911 | Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd." ) || |
2912 | Name == "avx512.mask.cvtqq2ps.256" || |
2913 | Name == "avx512.mask.cvtqq2ps.512" || |
2914 | Name == "avx512.mask.cvtuqq2ps.256" || |
2915 | Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" || |
2916 | Name == "avx.cvt.ps2.pd.256" || |
2917 | Name == "avx512.mask.cvtps2pd.128" || |
2918 | Name == "avx512.mask.cvtps2pd.256" ) { |
2919 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2920 | Rep = CI->getArgOperand(i: 0); |
2921 | auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType()); |
2922 | |
2923 | unsigned NumDstElts = DstTy->getNumElements(); |
2924 | if (NumDstElts < SrcTy->getNumElements()) { |
2925 | assert(NumDstElts == 2 && "Unexpected vector size" ); |
2926 | Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1}); |
2927 | } |
2928 | |
2929 | bool IsPS2PD = SrcTy->getElementType()->isFloatTy(); |
2930 | bool IsUnsigned = Name.contains(Other: "cvtu" ); |
2931 | if (IsPS2PD) |
2932 | Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd" ); |
2933 | else if (CI->arg_size() == 4 && |
2934 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) || |
2935 | cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) { |
2936 | Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round |
2937 | : Intrinsic::x86_avx512_sitofp_round; |
2938 | Rep = Builder.CreateIntrinsic(ID: IID, Types: {DstTy, SrcTy}, |
2939 | Args: {Rep, CI->getArgOperand(i: 3)}); |
2940 | } else { |
2941 | Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt" ) |
2942 | : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt" ); |
2943 | } |
2944 | |
2945 | if (CI->arg_size() >= 3) |
2946 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2947 | Op1: CI->getArgOperand(i: 1)); |
2948 | } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps." ) || |
2949 | Name.starts_with(Prefix: "vcvtph2ps." )) { |
2950 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2951 | Rep = CI->getArgOperand(i: 0); |
2952 | auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType()); |
2953 | unsigned NumDstElts = DstTy->getNumElements(); |
2954 | if (NumDstElts != SrcTy->getNumElements()) { |
2955 | assert(NumDstElts == 4 && "Unexpected vector size" ); |
2956 | Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3}); |
2957 | } |
2958 | Rep = Builder.CreateBitCast( |
2959 | V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts)); |
2960 | Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps" ); |
2961 | if (CI->arg_size() >= 3) |
2962 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2963 | Op1: CI->getArgOperand(i: 1)); |
2964 | } else if (Name.starts_with(Prefix: "avx512.mask.load" )) { |
2965 | // "avx512.mask.loadu." or "avx512.mask.load." |
2966 | bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu". |
2967 | Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1), |
2968 | Mask: CI->getArgOperand(i: 2), Aligned); |
2969 | } else if (Name.starts_with(Prefix: "avx512.mask.expand.load." )) { |
2970 | auto *ResultTy = cast<FixedVectorType>(Val: CI->getType()); |
2971 | Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2972 | NumElts: ResultTy->getNumElements()); |
2973 | |
2974 | Rep = Builder.CreateIntrinsic( |
2975 | ID: Intrinsic::masked_expandload, Types: ResultTy, |
2976 | Args: {CI->getOperand(i_nocapture: 0), MaskVec, CI->getOperand(i_nocapture: 1)}); |
2977 | } else if (Name.starts_with(Prefix: "avx512.mask.compress.store." )) { |
2978 | auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType()); |
2979 | Value *MaskVec = |
2980 | getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2981 | NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements()); |
2982 | |
2983 | Rep = Builder.CreateIntrinsic( |
2984 | ID: Intrinsic::masked_compressstore, Types: ResultTy, |
2985 | Args: {CI->getArgOperand(i: 1), CI->getArgOperand(i: 0), MaskVec}); |
2986 | } else if (Name.starts_with(Prefix: "avx512.mask.compress." ) || |
2987 | Name.starts_with(Prefix: "avx512.mask.expand." )) { |
2988 | auto *ResultTy = cast<FixedVectorType>(Val: CI->getType()); |
2989 | |
2990 | Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2991 | NumElts: ResultTy->getNumElements()); |
2992 | |
2993 | bool IsCompress = Name[12] == 'c'; |
2994 | Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress |
2995 | : Intrinsic::x86_avx512_mask_expand; |
2996 | Rep = Builder.CreateIntrinsic( |
2997 | ID: IID, Types: ResultTy, Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec}); |
2998 | } else if (Name.starts_with(Prefix: "xop.vpcom" )) { |
2999 | bool IsSigned; |
3000 | if (Name.ends_with(Suffix: "ub" ) || Name.ends_with(Suffix: "uw" ) || Name.ends_with(Suffix: "ud" ) || |
3001 | Name.ends_with(Suffix: "uq" )) |
3002 | IsSigned = false; |
3003 | else if (Name.ends_with(Suffix: "b" ) || Name.ends_with(Suffix: "w" ) || |
3004 | Name.ends_with(Suffix: "d" ) || Name.ends_with(Suffix: "q" )) |
3005 | IsSigned = true; |
3006 | else |
3007 | llvm_unreachable("Unknown suffix" ); |
3008 | |
3009 | unsigned Imm; |
3010 | if (CI->arg_size() == 3) { |
3011 | Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3012 | } else { |
3013 | Name = Name.substr(Start: 9); // strip off "xop.vpcom" |
3014 | if (Name.starts_with(Prefix: "lt" )) |
3015 | Imm = 0; |
3016 | else if (Name.starts_with(Prefix: "le" )) |
3017 | Imm = 1; |
3018 | else if (Name.starts_with(Prefix: "gt" )) |
3019 | Imm = 2; |
3020 | else if (Name.starts_with(Prefix: "ge" )) |
3021 | Imm = 3; |
3022 | else if (Name.starts_with(Prefix: "eq" )) |
3023 | Imm = 4; |
3024 | else if (Name.starts_with(Prefix: "ne" )) |
3025 | Imm = 5; |
3026 | else if (Name.starts_with(Prefix: "false" )) |
3027 | Imm = 6; |
3028 | else if (Name.starts_with(Prefix: "true" )) |
3029 | Imm = 7; |
3030 | else |
3031 | llvm_unreachable("Unknown condition" ); |
3032 | } |
3033 | |
3034 | Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned); |
3035 | } else if (Name.starts_with(Prefix: "xop.vpcmov" )) { |
3036 | Value *Sel = CI->getArgOperand(i: 2); |
3037 | Value *NotSel = Builder.CreateNot(V: Sel); |
3038 | Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel); |
3039 | Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel); |
3040 | Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1); |
3041 | } else if (Name.starts_with(Prefix: "xop.vprot" ) || Name.starts_with(Prefix: "avx512.prol" ) || |
3042 | Name.starts_with(Prefix: "avx512.mask.prol" )) { |
3043 | Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false); |
3044 | } else if (Name.starts_with(Prefix: "avx512.pror" ) || |
3045 | Name.starts_with(Prefix: "avx512.mask.pror" )) { |
3046 | Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true); |
3047 | } else if (Name.starts_with(Prefix: "avx512.vpshld." ) || |
3048 | Name.starts_with(Prefix: "avx512.mask.vpshld" ) || |
3049 | Name.starts_with(Prefix: "avx512.maskz.vpshld" )) { |
3050 | bool ZeroMask = Name[11] == 'z'; |
3051 | Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask); |
3052 | } else if (Name.starts_with(Prefix: "avx512.vpshrd." ) || |
3053 | Name.starts_with(Prefix: "avx512.mask.vpshrd" ) || |
3054 | Name.starts_with(Prefix: "avx512.maskz.vpshrd" )) { |
3055 | bool ZeroMask = Name[11] == 'z'; |
3056 | Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask); |
3057 | } else if (Name == "sse42.crc32.64.8" ) { |
3058 | Value *Trunc0 = |
3059 | Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C)); |
3060 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::x86_sse42_crc32_32_8, |
3061 | Args: {Trunc0, CI->getArgOperand(i: 1)}); |
3062 | Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "" ); |
3063 | } else if (Name.starts_with(Prefix: "avx.vbroadcast.s" ) || |
3064 | Name.starts_with(Prefix: "avx512.vbroadcast.s" )) { |
3065 | // Replace broadcasts with a series of insertelements. |
3066 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3067 | Type *EltTy = VecTy->getElementType(); |
3068 | unsigned EltNum = VecTy->getNumElements(); |
3069 | Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0)); |
3070 | Type *I32Ty = Type::getInt32Ty(C); |
3071 | Rep = PoisonValue::get(T: VecTy); |
3072 | for (unsigned I = 0; I < EltNum; ++I) |
3073 | Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I)); |
3074 | } else if (Name.starts_with(Prefix: "sse41.pmovsx" ) || |
3075 | Name.starts_with(Prefix: "sse41.pmovzx" ) || |
3076 | Name.starts_with(Prefix: "avx2.pmovsx" ) || |
3077 | Name.starts_with(Prefix: "avx2.pmovzx" ) || |
3078 | Name.starts_with(Prefix: "avx512.mask.pmovsx" ) || |
3079 | Name.starts_with(Prefix: "avx512.mask.pmovzx" )) { |
3080 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
3081 | unsigned NumDstElts = DstTy->getNumElements(); |
3082 | |
3083 | // Extract a subvector of the first NumDstElts lanes and sign/zero extend. |
3084 | SmallVector<int, 8> ShuffleMask(NumDstElts); |
3085 | for (unsigned i = 0; i != NumDstElts; ++i) |
3086 | ShuffleMask[i] = i; |
3087 | |
3088 | Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask); |
3089 | |
3090 | bool DoSext = Name.contains(Other: "pmovsx" ); |
3091 | Rep = |
3092 | DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy); |
3093 | // If there are 3 arguments, it's a masked intrinsic so we need a select. |
3094 | if (CI->arg_size() == 3) |
3095 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3096 | Op1: CI->getArgOperand(i: 1)); |
3097 | } else if (Name == "avx512.mask.pmov.qd.256" || |
3098 | Name == "avx512.mask.pmov.qd.512" || |
3099 | Name == "avx512.mask.pmov.wb.256" || |
3100 | Name == "avx512.mask.pmov.wb.512" ) { |
3101 | Type *Ty = CI->getArgOperand(i: 1)->getType(); |
3102 | Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty); |
3103 | Rep = |
3104 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
3105 | } else if (Name.starts_with(Prefix: "avx.vbroadcastf128" ) || |
3106 | Name == "avx2.vbroadcasti128" ) { |
3107 | // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. |
3108 | Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType(); |
3109 | unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); |
3110 | auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts); |
3111 | Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: CI->getArgOperand(i: 0), Align: Align(1)); |
3112 | if (NumSrcElts == 2) |
3113 | Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1}); |
3114 | else |
3115 | Rep = Builder.CreateShuffleVector(V: Load, |
3116 | Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3}); |
3117 | } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i" ) || |
3118 | Name.starts_with(Prefix: "avx512.mask.shuf.f" )) { |
3119 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3120 | Type *VT = CI->getType(); |
3121 | unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; |
3122 | unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); |
3123 | unsigned ControlBitsMask = NumLanes - 1; |
3124 | unsigned NumControlBits = NumLanes / 2; |
3125 | SmallVector<int, 8> ShuffleMask(0); |
3126 | |
3127 | for (unsigned l = 0; l != NumLanes; ++l) { |
3128 | unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; |
3129 | // We actually need the other source. |
3130 | if (l >= NumLanes / 2) |
3131 | LaneMask += NumLanes; |
3132 | for (unsigned i = 0; i != NumElementsInLane; ++i) |
3133 | ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i); |
3134 | } |
3135 | Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0), |
3136 | V2: CI->getArgOperand(i: 1), Mask: ShuffleMask); |
3137 | Rep = |
3138 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3)); |
3139 | } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf" ) || |
3140 | Name.starts_with(Prefix: "avx512.mask.broadcasti" )) { |
3141 | unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType()) |
3142 | ->getNumElements(); |
3143 | unsigned NumDstElts = |
3144 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3145 | |
3146 | SmallVector<int, 8> ShuffleMask(NumDstElts); |
3147 | for (unsigned i = 0; i != NumDstElts; ++i) |
3148 | ShuffleMask[i] = i % NumSrcElts; |
3149 | |
3150 | Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0), |
3151 | V2: CI->getArgOperand(i: 0), Mask: ShuffleMask); |
3152 | Rep = |
3153 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
3154 | } else if (Name.starts_with(Prefix: "avx2.pbroadcast" ) || |
3155 | Name.starts_with(Prefix: "avx2.vbroadcast" ) || |
3156 | Name.starts_with(Prefix: "avx512.pbroadcast" ) || |
3157 | Name.starts_with(Prefix: "avx512.mask.broadcast.s" )) { |
3158 | // Replace vp?broadcasts with a vector shuffle. |
3159 | Value *Op = CI->getArgOperand(i: 0); |
3160 | ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount(); |
3161 | Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC); |
3162 | SmallVector<int, 8> M; |
3163 | ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M); |
3164 | Rep = Builder.CreateShuffleVector(V: Op, Mask: M); |
3165 | |
3166 | if (CI->arg_size() == 3) |
3167 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3168 | Op1: CI->getArgOperand(i: 1)); |
3169 | } else if (Name.starts_with(Prefix: "sse2.padds." ) || |
3170 | Name.starts_with(Prefix: "avx2.padds." ) || |
3171 | Name.starts_with(Prefix: "avx512.padds." ) || |
3172 | Name.starts_with(Prefix: "avx512.mask.padds." )) { |
3173 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat); |
3174 | } else if (Name.starts_with(Prefix: "sse2.psubs." ) || |
3175 | Name.starts_with(Prefix: "avx2.psubs." ) || |
3176 | Name.starts_with(Prefix: "avx512.psubs." ) || |
3177 | Name.starts_with(Prefix: "avx512.mask.psubs." )) { |
3178 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat); |
3179 | } else if (Name.starts_with(Prefix: "sse2.paddus." ) || |
3180 | Name.starts_with(Prefix: "avx2.paddus." ) || |
3181 | Name.starts_with(Prefix: "avx512.mask.paddus." )) { |
3182 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat); |
3183 | } else if (Name.starts_with(Prefix: "sse2.psubus." ) || |
3184 | Name.starts_with(Prefix: "avx2.psubus." ) || |
3185 | Name.starts_with(Prefix: "avx512.mask.psubus." )) { |
3186 | Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat); |
3187 | } else if (Name.starts_with(Prefix: "avx512.mask.palignr." )) { |
3188 | Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0), |
3189 | Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2), |
3190 | Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), |
3191 | IsVALIGN: false); |
3192 | } else if (Name.starts_with(Prefix: "avx512.mask.valign." )) { |
3193 | Rep = upgradeX86ALIGNIntrinsics( |
3194 | Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1), |
3195 | Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true); |
3196 | } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq" ) { |
3197 | // 128/256-bit shift left specified in bits. |
3198 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3199 | Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), |
3200 | Shift: Shift / 8); // Shift is in bits. |
3201 | } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq" ) { |
3202 | // 128/256-bit shift right specified in bits. |
3203 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3204 | Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), |
3205 | Shift: Shift / 8); // Shift is in bits. |
3206 | } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" || |
3207 | Name == "avx512.psll.dq.512" ) { |
3208 | // 128/256/512-bit shift left specified in bytes. |
3209 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3210 | Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift); |
3211 | } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" || |
3212 | Name == "avx512.psrl.dq.512" ) { |
3213 | // 128/256/512-bit shift right specified in bytes. |
3214 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3215 | Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift); |
3216 | } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp" ) || |
3217 | Name.starts_with(Prefix: "avx.blend.p" ) || Name == "avx2.pblendw" || |
3218 | Name.starts_with(Prefix: "avx2.pblendd." )) { |
3219 | Value *Op0 = CI->getArgOperand(i: 0); |
3220 | Value *Op1 = CI->getArgOperand(i: 1); |
3221 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3222 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3223 | unsigned NumElts = VecTy->getNumElements(); |
3224 | |
3225 | SmallVector<int, 16> Idxs(NumElts); |
3226 | for (unsigned i = 0; i != NumElts; ++i) |
3227 | Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i; |
3228 | |
3229 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3230 | } else if (Name.starts_with(Prefix: "avx.vinsertf128." ) || |
3231 | Name == "avx2.vinserti128" || |
3232 | Name.starts_with(Prefix: "avx512.mask.insert" )) { |
3233 | Value *Op0 = CI->getArgOperand(i: 0); |
3234 | Value *Op1 = CI->getArgOperand(i: 1); |
3235 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3236 | unsigned DstNumElts = |
3237 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3238 | unsigned SrcNumElts = |
3239 | cast<FixedVectorType>(Val: Op1->getType())->getNumElements(); |
3240 | unsigned Scale = DstNumElts / SrcNumElts; |
3241 | |
3242 | // Mask off the high bits of the immediate value; hardware ignores those. |
3243 | Imm = Imm % Scale; |
3244 | |
3245 | // Extend the second operand into a vector the size of the destination. |
3246 | SmallVector<int, 8> Idxs(DstNumElts); |
3247 | for (unsigned i = 0; i != SrcNumElts; ++i) |
3248 | Idxs[i] = i; |
3249 | for (unsigned i = SrcNumElts; i != DstNumElts; ++i) |
3250 | Idxs[i] = SrcNumElts; |
3251 | Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs); |
3252 | |
3253 | // Insert the second operand into the first operand. |
3254 | |
3255 | // Note that there is no guarantee that instruction lowering will actually |
3256 | // produce a vinsertf128 instruction for the created shuffles. In |
3257 | // particular, the 0 immediate case involves no lane changes, so it can |
3258 | // be handled as a blend. |
3259 | |
3260 | // Example of shuffle mask for 32-bit elements: |
3261 | // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
3262 | // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > |
3263 | |
3264 | // First fill with identify mask. |
3265 | for (unsigned i = 0; i != DstNumElts; ++i) |
3266 | Idxs[i] = i; |
3267 | // Then replace the elements where we need to insert. |
3268 | for (unsigned i = 0; i != SrcNumElts; ++i) |
3269 | Idxs[i + Imm * SrcNumElts] = i + DstNumElts; |
3270 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs); |
3271 | |
3272 | // If the intrinsic has a mask operand, handle that. |
3273 | if (CI->arg_size() == 5) |
3274 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, |
3275 | Op1: CI->getArgOperand(i: 3)); |
3276 | } else if (Name.starts_with(Prefix: "avx.vextractf128." ) || |
3277 | Name == "avx2.vextracti128" || |
3278 | Name.starts_with(Prefix: "avx512.mask.vextract" )) { |
3279 | Value *Op0 = CI->getArgOperand(i: 0); |
3280 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3281 | unsigned DstNumElts = |
3282 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3283 | unsigned SrcNumElts = |
3284 | cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
3285 | unsigned Scale = SrcNumElts / DstNumElts; |
3286 | |
3287 | // Mask off the high bits of the immediate value; hardware ignores those. |
3288 | Imm = Imm % Scale; |
3289 | |
3290 | // Get indexes for the subvector of the input vector. |
3291 | SmallVector<int, 8> Idxs(DstNumElts); |
3292 | for (unsigned i = 0; i != DstNumElts; ++i) { |
3293 | Idxs[i] = i + (Imm * DstNumElts); |
3294 | } |
3295 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3296 | |
3297 | // If the intrinsic has a mask operand, handle that. |
3298 | if (CI->arg_size() == 4) |
3299 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3300 | Op1: CI->getArgOperand(i: 2)); |
3301 | } else if (Name.starts_with(Prefix: "avx512.mask.perm.df." ) || |
3302 | Name.starts_with(Prefix: "avx512.mask.perm.di." )) { |
3303 | Value *Op0 = CI->getArgOperand(i: 0); |
3304 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3305 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3306 | unsigned NumElts = VecTy->getNumElements(); |
3307 | |
3308 | SmallVector<int, 8> Idxs(NumElts); |
3309 | for (unsigned i = 0; i != NumElts; ++i) |
3310 | Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); |
3311 | |
3312 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3313 | |
3314 | if (CI->arg_size() == 4) |
3315 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3316 | Op1: CI->getArgOperand(i: 2)); |
3317 | } else if (Name.starts_with(Prefix: "avx.vperm2f128." ) || Name == "avx2.vperm2i128" ) { |
3318 | // The immediate permute control byte looks like this: |
3319 | // [1:0] - select 128 bits from sources for low half of destination |
3320 | // [2] - ignore |
3321 | // [3] - zero low half of destination |
3322 | // [5:4] - select 128 bits from sources for high half of destination |
3323 | // [6] - ignore |
3324 | // [7] - zero high half of destination |
3325 | |
3326 | uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3327 | |
3328 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3329 | unsigned HalfSize = NumElts / 2; |
3330 | SmallVector<int, 8> ShuffleMask(NumElts); |
3331 | |
3332 | // Determine which operand(s) are actually in use for this instruction. |
3333 | Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0); |
3334 | Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0); |
3335 | |
3336 | // If needed, replace operands based on zero mask. |
3337 | V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0; |
3338 | V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1; |
3339 | |
3340 | // Permute low half of result. |
3341 | unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; |
3342 | for (unsigned i = 0; i < HalfSize; ++i) |
3343 | ShuffleMask[i] = StartIndex + i; |
3344 | |
3345 | // Permute high half of result. |
3346 | StartIndex = (Imm & 0x10) ? HalfSize : 0; |
3347 | for (unsigned i = 0; i < HalfSize; ++i) |
3348 | ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; |
3349 | |
3350 | Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask); |
3351 | |
3352 | } else if (Name.starts_with(Prefix: "avx.vpermil." ) || Name == "sse2.pshuf.d" || |
3353 | Name.starts_with(Prefix: "avx512.mask.vpermil.p" ) || |
3354 | Name.starts_with(Prefix: "avx512.mask.pshuf.d." )) { |
3355 | Value *Op0 = CI->getArgOperand(i: 0); |
3356 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3357 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3358 | unsigned NumElts = VecTy->getNumElements(); |
3359 | // Calculate the size of each index in the immediate. |
3360 | unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); |
3361 | unsigned IdxMask = ((1 << IdxSize) - 1); |
3362 | |
3363 | SmallVector<int, 8> Idxs(NumElts); |
3364 | // Lookup the bits for this element, wrapping around the immediate every |
3365 | // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need |
3366 | // to offset by the first index of each group. |
3367 | for (unsigned i = 0; i != NumElts; ++i) |
3368 | Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); |
3369 | |
3370 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3371 | |
3372 | if (CI->arg_size() == 4) |
3373 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3374 | Op1: CI->getArgOperand(i: 2)); |
3375 | } else if (Name == "sse2.pshufl.w" || |
3376 | Name.starts_with(Prefix: "avx512.mask.pshufl.w." )) { |
3377 | Value *Op0 = CI->getArgOperand(i: 0); |
3378 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3379 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3380 | |
3381 | SmallVector<int, 16> Idxs(NumElts); |
3382 | for (unsigned l = 0; l != NumElts; l += 8) { |
3383 | for (unsigned i = 0; i != 4; ++i) |
3384 | Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; |
3385 | for (unsigned i = 4; i != 8; ++i) |
3386 | Idxs[i + l] = i + l; |
3387 | } |
3388 | |
3389 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3390 | |
3391 | if (CI->arg_size() == 4) |
3392 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3393 | Op1: CI->getArgOperand(i: 2)); |
3394 | } else if (Name == "sse2.pshufh.w" || |
3395 | Name.starts_with(Prefix: "avx512.mask.pshufh.w." )) { |
3396 | Value *Op0 = CI->getArgOperand(i: 0); |
3397 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3398 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3399 | |
3400 | SmallVector<int, 16> Idxs(NumElts); |
3401 | for (unsigned l = 0; l != NumElts; l += 8) { |
3402 | for (unsigned i = 0; i != 4; ++i) |
3403 | Idxs[i + l] = i + l; |
3404 | for (unsigned i = 0; i != 4; ++i) |
3405 | Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; |
3406 | } |
3407 | |
3408 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3409 | |
3410 | if (CI->arg_size() == 4) |
3411 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3412 | Op1: CI->getArgOperand(i: 2)); |
3413 | } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p" )) { |
3414 | Value *Op0 = CI->getArgOperand(i: 0); |
3415 | Value *Op1 = CI->getArgOperand(i: 1); |
3416 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3417 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3418 | |
3419 | unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3420 | unsigned HalfLaneElts = NumLaneElts / 2; |
3421 | |
3422 | SmallVector<int, 16> Idxs(NumElts); |
3423 | for (unsigned i = 0; i != NumElts; ++i) { |
3424 | // Base index is the starting element of the lane. |
3425 | Idxs[i] = i - (i % NumLaneElts); |
3426 | // If we are half way through the lane switch to the other source. |
3427 | if ((i % NumLaneElts) >= HalfLaneElts) |
3428 | Idxs[i] += NumElts; |
3429 | // Now select the specific element. By adding HalfLaneElts bits from |
3430 | // the immediate. Wrapping around the immediate every 8-bits. |
3431 | Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); |
3432 | } |
3433 | |
3434 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3435 | |
3436 | Rep = |
3437 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3)); |
3438 | } else if (Name.starts_with(Prefix: "avx512.mask.movddup" ) || |
3439 | Name.starts_with(Prefix: "avx512.mask.movshdup" ) || |
3440 | Name.starts_with(Prefix: "avx512.mask.movsldup" )) { |
3441 | Value *Op0 = CI->getArgOperand(i: 0); |
3442 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3443 | unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3444 | |
3445 | unsigned Offset = 0; |
3446 | if (Name.starts_with(Prefix: "avx512.mask.movshdup." )) |
3447 | Offset = 1; |
3448 | |
3449 | SmallVector<int, 16> Idxs(NumElts); |
3450 | for (unsigned l = 0; l != NumElts; l += NumLaneElts) |
3451 | for (unsigned i = 0; i != NumLaneElts; i += 2) { |
3452 | Idxs[i + l + 0] = i + l + Offset; |
3453 | Idxs[i + l + 1] = i + l + Offset; |
3454 | } |
3455 | |
3456 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3457 | |
3458 | Rep = |
3459 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
3460 | } else if (Name.starts_with(Prefix: "avx512.mask.punpckl" ) || |
3461 | Name.starts_with(Prefix: "avx512.mask.unpckl." )) { |
3462 | Value *Op0 = CI->getArgOperand(i: 0); |
3463 | Value *Op1 = CI->getArgOperand(i: 1); |
3464 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3465 | int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3466 | |
3467 | SmallVector<int, 64> Idxs(NumElts); |
3468 | for (int l = 0; l != NumElts; l += NumLaneElts) |
3469 | for (int i = 0; i != NumLaneElts; ++i) |
3470 | Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); |
3471 | |
3472 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3473 | |
3474 | Rep = |
3475 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3476 | } else if (Name.starts_with(Prefix: "avx512.mask.punpckh" ) || |
3477 | Name.starts_with(Prefix: "avx512.mask.unpckh." )) { |
3478 | Value *Op0 = CI->getArgOperand(i: 0); |
3479 | Value *Op1 = CI->getArgOperand(i: 1); |
3480 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3481 | int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); |
3482 | |
3483 | SmallVector<int, 64> Idxs(NumElts); |
3484 | for (int l = 0; l != NumElts; l += NumLaneElts) |
3485 | for (int i = 0; i != NumLaneElts; ++i) |
3486 | Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); |
3487 | |
3488 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3489 | |
3490 | Rep = |
3491 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3492 | } else if (Name.starts_with(Prefix: "avx512.mask.and." ) || |
3493 | Name.starts_with(Prefix: "avx512.mask.pand." )) { |
3494 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3495 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3496 | Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3497 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3498 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3499 | Rep = |
3500 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3501 | } else if (Name.starts_with(Prefix: "avx512.mask.andn." ) || |
3502 | Name.starts_with(Prefix: "avx512.mask.pandn." )) { |
3503 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3504 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3505 | Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy)); |
3506 | Rep = Builder.CreateAnd(LHS: Rep, |
3507 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3508 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3509 | Rep = |
3510 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3511 | } else if (Name.starts_with(Prefix: "avx512.mask.or." ) || |
3512 | Name.starts_with(Prefix: "avx512.mask.por." )) { |
3513 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3514 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3515 | Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3516 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3517 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3518 | Rep = |
3519 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3520 | } else if (Name.starts_with(Prefix: "avx512.mask.xor." ) || |
3521 | Name.starts_with(Prefix: "avx512.mask.pxor." )) { |
3522 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3523 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3524 | Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3525 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3526 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3527 | Rep = |
3528 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3529 | } else if (Name.starts_with(Prefix: "avx512.mask.padd." )) { |
3530 | Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3531 | Rep = |
3532 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3533 | } else if (Name.starts_with(Prefix: "avx512.mask.psub." )) { |
3534 | Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3535 | Rep = |
3536 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3537 | } else if (Name.starts_with(Prefix: "avx512.mask.pmull." )) { |
3538 | Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3539 | Rep = |
3540 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3541 | } else if (Name.starts_with(Prefix: "avx512.mask.add.p" )) { |
3542 | if (Name.ends_with(Suffix: ".512" )) { |
3543 | Intrinsic::ID IID; |
3544 | if (Name[17] == 's') |
3545 | IID = Intrinsic::x86_avx512_add_ps_512; |
3546 | else |
3547 | IID = Intrinsic::x86_avx512_add_pd_512; |
3548 | |
3549 | Rep = Builder.CreateIntrinsic( |
3550 | ID: IID, |
3551 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3552 | } else { |
3553 | Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3554 | } |
3555 | Rep = |
3556 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3557 | } else if (Name.starts_with(Prefix: "avx512.mask.div.p" )) { |
3558 | if (Name.ends_with(Suffix: ".512" )) { |
3559 | Intrinsic::ID IID; |
3560 | if (Name[17] == 's') |
3561 | IID = Intrinsic::x86_avx512_div_ps_512; |
3562 | else |
3563 | IID = Intrinsic::x86_avx512_div_pd_512; |
3564 | |
3565 | Rep = Builder.CreateIntrinsic( |
3566 | ID: IID, |
3567 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3568 | } else { |
3569 | Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3570 | } |
3571 | Rep = |
3572 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3573 | } else if (Name.starts_with(Prefix: "avx512.mask.mul.p" )) { |
3574 | if (Name.ends_with(Suffix: ".512" )) { |
3575 | Intrinsic::ID IID; |
3576 | if (Name[17] == 's') |
3577 | IID = Intrinsic::x86_avx512_mul_ps_512; |
3578 | else |
3579 | IID = Intrinsic::x86_avx512_mul_pd_512; |
3580 | |
3581 | Rep = Builder.CreateIntrinsic( |
3582 | ID: IID, |
3583 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3584 | } else { |
3585 | Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3586 | } |
3587 | Rep = |
3588 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3589 | } else if (Name.starts_with(Prefix: "avx512.mask.sub.p" )) { |
3590 | if (Name.ends_with(Suffix: ".512" )) { |
3591 | Intrinsic::ID IID; |
3592 | if (Name[17] == 's') |
3593 | IID = Intrinsic::x86_avx512_sub_ps_512; |
3594 | else |
3595 | IID = Intrinsic::x86_avx512_sub_pd_512; |
3596 | |
3597 | Rep = Builder.CreateIntrinsic( |
3598 | ID: IID, |
3599 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3600 | } else { |
3601 | Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3602 | } |
3603 | Rep = |
3604 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3605 | } else if ((Name.starts_with(Prefix: "avx512.mask.max.p" ) || |
3606 | Name.starts_with(Prefix: "avx512.mask.min.p" )) && |
3607 | Name.drop_front(N: 18) == ".512" ) { |
3608 | bool IsDouble = Name[17] == 'd'; |
3609 | bool IsMin = Name[13] == 'i'; |
3610 | static const Intrinsic::ID MinMaxTbl[2][2] = { |
3611 | {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512}, |
3612 | {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}}; |
3613 | Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; |
3614 | |
3615 | Rep = Builder.CreateIntrinsic( |
3616 | ID: IID, |
3617 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)}); |
3618 | Rep = |
3619 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2)); |
3620 | } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt." )) { |
3621 | Rep = |
3622 | Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: CI->getType(), |
3623 | Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)}); |
3624 | Rep = |
3625 | emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1)); |
3626 | } else if (Name.starts_with(Prefix: "avx512.mask.psll" )) { |
3627 | bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); |
3628 | bool IsVariable = Name[16] == 'v'; |
3629 | char Size = Name[16] == '.' ? Name[17] |
3630 | : Name[17] == '.' ? Name[18] |
3631 | : Name[18] == '.' ? Name[19] |
3632 | : Name[20]; |
3633 | |
3634 | Intrinsic::ID IID; |
3635 | if (IsVariable && Name[17] != '.') { |
3636 | if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di |
3637 | IID = Intrinsic::x86_avx2_psllv_q; |
3638 | else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di |
3639 | IID = Intrinsic::x86_avx2_psllv_q_256; |
3640 | else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si |
3641 | IID = Intrinsic::x86_avx2_psllv_d; |
3642 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si |
3643 | IID = Intrinsic::x86_avx2_psllv_d_256; |
3644 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi |
3645 | IID = Intrinsic::x86_avx512_psllv_w_128; |
3646 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi |
3647 | IID = Intrinsic::x86_avx512_psllv_w_256; |
3648 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi |
3649 | IID = Intrinsic::x86_avx512_psllv_w_512; |
3650 | else |
3651 | llvm_unreachable("Unexpected size" ); |
3652 | } else if (Name.ends_with(Suffix: ".128" )) { |
3653 | if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 |
3654 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d |
3655 | : Intrinsic::x86_sse2_psll_d; |
3656 | else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 |
3657 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q |
3658 | : Intrinsic::x86_sse2_psll_q; |
3659 | else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 |
3660 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w |
3661 | : Intrinsic::x86_sse2_psll_w; |
3662 | else |
3663 | llvm_unreachable("Unexpected size" ); |
3664 | } else if (Name.ends_with(Suffix: ".256" )) { |
3665 | if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 |
3666 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d |
3667 | : Intrinsic::x86_avx2_psll_d; |
3668 | else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 |
3669 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q |
3670 | : Intrinsic::x86_avx2_psll_q; |
3671 | else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 |
3672 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w |
3673 | : Intrinsic::x86_avx2_psll_w; |
3674 | else |
3675 | llvm_unreachable("Unexpected size" ); |
3676 | } else { |
3677 | if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 |
3678 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 |
3679 | : IsVariable ? Intrinsic::x86_avx512_psllv_d_512 |
3680 | : Intrinsic::x86_avx512_psll_d_512; |
3681 | else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 |
3682 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 |
3683 | : IsVariable ? Intrinsic::x86_avx512_psllv_q_512 |
3684 | : Intrinsic::x86_avx512_psll_q_512; |
3685 | else if (Size == 'w') // psll.wi.512, pslli.w, psll.w |
3686 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 |
3687 | : Intrinsic::x86_avx512_psll_w_512; |
3688 | else |
3689 | llvm_unreachable("Unexpected size" ); |
3690 | } |
3691 | |
3692 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3693 | } else if (Name.starts_with(Prefix: "avx512.mask.psrl" )) { |
3694 | bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); |
3695 | bool IsVariable = Name[16] == 'v'; |
3696 | char Size = Name[16] == '.' ? Name[17] |
3697 | : Name[17] == '.' ? Name[18] |
3698 | : Name[18] == '.' ? Name[19] |
3699 | : Name[20]; |
3700 | |
3701 | Intrinsic::ID IID; |
3702 | if (IsVariable && Name[17] != '.') { |
3703 | if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di |
3704 | IID = Intrinsic::x86_avx2_psrlv_q; |
3705 | else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di |
3706 | IID = Intrinsic::x86_avx2_psrlv_q_256; |
3707 | else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si |
3708 | IID = Intrinsic::x86_avx2_psrlv_d; |
3709 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si |
3710 | IID = Intrinsic::x86_avx2_psrlv_d_256; |
3711 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi |
3712 | IID = Intrinsic::x86_avx512_psrlv_w_128; |
3713 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi |
3714 | IID = Intrinsic::x86_avx512_psrlv_w_256; |
3715 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi |
3716 | IID = Intrinsic::x86_avx512_psrlv_w_512; |
3717 | else |
3718 | llvm_unreachable("Unexpected size" ); |
3719 | } else if (Name.ends_with(Suffix: ".128" )) { |
3720 | if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 |
3721 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d |
3722 | : Intrinsic::x86_sse2_psrl_d; |
3723 | else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 |
3724 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q |
3725 | : Intrinsic::x86_sse2_psrl_q; |
3726 | else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 |
3727 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w |
3728 | : Intrinsic::x86_sse2_psrl_w; |
3729 | else |
3730 | llvm_unreachable("Unexpected size" ); |
3731 | } else if (Name.ends_with(Suffix: ".256" )) { |
3732 | if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 |
3733 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d |
3734 | : Intrinsic::x86_avx2_psrl_d; |
3735 | else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 |
3736 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q |
3737 | : Intrinsic::x86_avx2_psrl_q; |
3738 | else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 |
3739 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w |
3740 | : Intrinsic::x86_avx2_psrl_w; |
3741 | else |
3742 | llvm_unreachable("Unexpected size" ); |
3743 | } else { |
3744 | if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 |
3745 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 |
3746 | : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 |
3747 | : Intrinsic::x86_avx512_psrl_d_512; |
3748 | else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 |
3749 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 |
3750 | : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 |
3751 | : Intrinsic::x86_avx512_psrl_q_512; |
3752 | else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) |
3753 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 |
3754 | : Intrinsic::x86_avx512_psrl_w_512; |
3755 | else |
3756 | llvm_unreachable("Unexpected size" ); |
3757 | } |
3758 | |
3759 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3760 | } else if (Name.starts_with(Prefix: "avx512.mask.psra" )) { |
3761 | bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); |
3762 | bool IsVariable = Name[16] == 'v'; |
3763 | char Size = Name[16] == '.' ? Name[17] |
3764 | : Name[17] == '.' ? Name[18] |
3765 | : Name[18] == '.' ? Name[19] |
3766 | : Name[20]; |
3767 | |
3768 | Intrinsic::ID IID; |
3769 | if (IsVariable && Name[17] != '.') { |
3770 | if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si |
3771 | IID = Intrinsic::x86_avx2_psrav_d; |
3772 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si |
3773 | IID = Intrinsic::x86_avx2_psrav_d_256; |
3774 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi |
3775 | IID = Intrinsic::x86_avx512_psrav_w_128; |
3776 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi |
3777 | IID = Intrinsic::x86_avx512_psrav_w_256; |
3778 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi |
3779 | IID = Intrinsic::x86_avx512_psrav_w_512; |
3780 | else |
3781 | llvm_unreachable("Unexpected size" ); |
3782 | } else if (Name.ends_with(Suffix: ".128" )) { |
3783 | if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 |
3784 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d |
3785 | : Intrinsic::x86_sse2_psra_d; |
3786 | else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 |
3787 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 |
3788 | : IsVariable ? Intrinsic::x86_avx512_psrav_q_128 |
3789 | : Intrinsic::x86_avx512_psra_q_128; |
3790 | else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 |
3791 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w |
3792 | : Intrinsic::x86_sse2_psra_w; |
3793 | else |
3794 | llvm_unreachable("Unexpected size" ); |
3795 | } else if (Name.ends_with(Suffix: ".256" )) { |
3796 | if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 |
3797 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d |
3798 | : Intrinsic::x86_avx2_psra_d; |
3799 | else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 |
3800 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 |
3801 | : IsVariable ? Intrinsic::x86_avx512_psrav_q_256 |
3802 | : Intrinsic::x86_avx512_psra_q_256; |
3803 | else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 |
3804 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w |
3805 | : Intrinsic::x86_avx2_psra_w; |
3806 | else |
3807 | llvm_unreachable("Unexpected size" ); |
3808 | } else { |
3809 | if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 |
3810 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 |
3811 | : IsVariable ? Intrinsic::x86_avx512_psrav_d_512 |
3812 | : Intrinsic::x86_avx512_psra_d_512; |
3813 | else if (Size == 'q') // psra.qi.512, psrai.q, psra.q |
3814 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 |
3815 | : IsVariable ? Intrinsic::x86_avx512_psrav_q_512 |
3816 | : Intrinsic::x86_avx512_psra_q_512; |
3817 | else if (Size == 'w') // psra.wi.512, psrai.w, psra.w |
3818 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 |
3819 | : Intrinsic::x86_avx512_psra_w_512; |
3820 | else |
3821 | llvm_unreachable("Unexpected size" ); |
3822 | } |
3823 | |
3824 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3825 | } else if (Name.starts_with(Prefix: "avx512.mask.move.s" )) { |
3826 | Rep = upgradeMaskedMove(Builder, CI&: *CI); |
3827 | } else if (Name.starts_with(Prefix: "avx512.cvtmask2" )) { |
3828 | Rep = upgradeMaskToInt(Builder, CI&: *CI); |
3829 | } else if (Name.ends_with(Suffix: ".movntdqa" )) { |
3830 | MDNode *Node = MDNode::get( |
3831 | Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
3832 | |
3833 | LoadInst *LI = Builder.CreateAlignedLoad( |
3834 | Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0), |
3835 | Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); |
3836 | LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
3837 | Rep = LI; |
3838 | } else if (Name.starts_with(Prefix: "fma.vfmadd." ) || |
3839 | Name.starts_with(Prefix: "fma.vfmsub." ) || |
3840 | Name.starts_with(Prefix: "fma.vfnmadd." ) || |
3841 | Name.starts_with(Prefix: "fma.vfnmsub." )) { |
3842 | bool NegMul = Name[6] == 'n'; |
3843 | bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; |
3844 | bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; |
3845 | |
3846 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3847 | CI->getArgOperand(i: 2)}; |
3848 | |
3849 | if (IsScalar) { |
3850 | Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0); |
3851 | Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0); |
3852 | Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0); |
3853 | } |
3854 | |
3855 | if (NegMul && !IsScalar) |
3856 | Ops[0] = Builder.CreateFNeg(V: Ops[0]); |
3857 | if (NegMul && IsScalar) |
3858 | Ops[1] = Builder.CreateFNeg(V: Ops[1]); |
3859 | if (NegAcc) |
3860 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3861 | |
3862 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops); |
3863 | |
3864 | if (IsScalar) |
3865 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
3866 | } else if (Name.starts_with(Prefix: "fma4.vfmadd.s" )) { |
3867 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3868 | CI->getArgOperand(i: 2)}; |
3869 | |
3870 | Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0); |
3871 | Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0); |
3872 | Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0); |
3873 | |
3874 | Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops); |
3875 | |
3876 | Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()), |
3877 | NewElt: Rep, Idx: (uint64_t)0); |
3878 | } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s" ) || |
3879 | Name.starts_with(Prefix: "avx512.maskz.vfmadd.s" ) || |
3880 | Name.starts_with(Prefix: "avx512.mask3.vfmadd.s" ) || |
3881 | Name.starts_with(Prefix: "avx512.mask3.vfmsub.s" ) || |
3882 | Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s" )) { |
3883 | bool IsMask3 = Name[11] == '3'; |
3884 | bool IsMaskZ = Name[11] == 'z'; |
3885 | // Drop the "avx512.mask." to make it easier. |
3886 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3887 | bool NegMul = Name[2] == 'n'; |
3888 | bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; |
3889 | |
3890 | Value *A = CI->getArgOperand(i: 0); |
3891 | Value *B = CI->getArgOperand(i: 1); |
3892 | Value *C = CI->getArgOperand(i: 2); |
3893 | |
3894 | if (NegMul && (IsMask3 || IsMaskZ)) |
3895 | A = Builder.CreateFNeg(V: A); |
3896 | if (NegMul && !(IsMask3 || IsMaskZ)) |
3897 | B = Builder.CreateFNeg(V: B); |
3898 | if (NegAcc) |
3899 | C = Builder.CreateFNeg(V: C); |
3900 | |
3901 | A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0); |
3902 | B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0); |
3903 | C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0); |
3904 | |
3905 | if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) || |
3906 | cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) { |
3907 | Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)}; |
3908 | |
3909 | Intrinsic::ID IID; |
3910 | if (Name.back() == 'd') |
3911 | IID = Intrinsic::x86_avx512_vfmadd_f64; |
3912 | else |
3913 | IID = Intrinsic::x86_avx512_vfmadd_f32; |
3914 | Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops); |
3915 | } else { |
3916 | Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C); |
3917 | } |
3918 | |
3919 | Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType()) |
3920 | : IsMask3 ? C |
3921 | : A; |
3922 | |
3923 | // For Mask3 with NegAcc, we need to create a new extractelement that |
3924 | // avoids the negation above. |
3925 | if (NegAcc && IsMask3) |
3926 | PassThru = |
3927 | Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0); |
3928 | |
3929 | Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3930 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep, |
3931 | Idx: (uint64_t)0); |
3932 | } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p" ) || |
3933 | Name.starts_with(Prefix: "avx512.mask.vfnmadd.p" ) || |
3934 | Name.starts_with(Prefix: "avx512.mask.vfnmsub.p" ) || |
3935 | Name.starts_with(Prefix: "avx512.mask3.vfmadd.p" ) || |
3936 | Name.starts_with(Prefix: "avx512.mask3.vfmsub.p" ) || |
3937 | Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p" ) || |
3938 | Name.starts_with(Prefix: "avx512.maskz.vfmadd.p" )) { |
3939 | bool IsMask3 = Name[11] == '3'; |
3940 | bool IsMaskZ = Name[11] == 'z'; |
3941 | // Drop the "avx512.mask." to make it easier. |
3942 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3943 | bool NegMul = Name[2] == 'n'; |
3944 | bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; |
3945 | |
3946 | Value *A = CI->getArgOperand(i: 0); |
3947 | Value *B = CI->getArgOperand(i: 1); |
3948 | Value *C = CI->getArgOperand(i: 2); |
3949 | |
3950 | if (NegMul && (IsMask3 || IsMaskZ)) |
3951 | A = Builder.CreateFNeg(V: A); |
3952 | if (NegMul && !(IsMask3 || IsMaskZ)) |
3953 | B = Builder.CreateFNeg(V: B); |
3954 | if (NegAcc) |
3955 | C = Builder.CreateFNeg(V: C); |
3956 | |
3957 | if (CI->arg_size() == 5 && |
3958 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) || |
3959 | cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) { |
3960 | Intrinsic::ID IID; |
3961 | // Check the character before ".512" in string. |
3962 | if (Name[Name.size() - 5] == 's') |
3963 | IID = Intrinsic::x86_avx512_vfmadd_ps_512; |
3964 | else |
3965 | IID = Intrinsic::x86_avx512_vfmadd_pd_512; |
3966 | |
3967 | Rep = Builder.CreateIntrinsic(ID: IID, Args: {A, B, C, CI->getArgOperand(i: 4)}); |
3968 | } else { |
3969 | Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C); |
3970 | } |
3971 | |
3972 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) |
3973 | : IsMask3 ? CI->getArgOperand(i: 2) |
3974 | : CI->getArgOperand(i: 0); |
3975 | |
3976 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3977 | } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p" )) { |
3978 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3979 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
3980 | Intrinsic::ID IID; |
3981 | if (VecWidth == 128 && EltWidth == 32) |
3982 | IID = Intrinsic::x86_fma_vfmaddsub_ps; |
3983 | else if (VecWidth == 256 && EltWidth == 32) |
3984 | IID = Intrinsic::x86_fma_vfmaddsub_ps_256; |
3985 | else if (VecWidth == 128 && EltWidth == 64) |
3986 | IID = Intrinsic::x86_fma_vfmaddsub_pd; |
3987 | else if (VecWidth == 256 && EltWidth == 64) |
3988 | IID = Intrinsic::x86_fma_vfmaddsub_pd_256; |
3989 | else |
3990 | llvm_unreachable("Unexpected intrinsic" ); |
3991 | |
3992 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3993 | CI->getArgOperand(i: 2)}; |
3994 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3995 | Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops); |
3996 | } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p" ) || |
3997 | Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p" ) || |
3998 | Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p" ) || |
3999 | Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p" )) { |
4000 | bool IsMask3 = Name[11] == '3'; |
4001 | bool IsMaskZ = Name[11] == 'z'; |
4002 | // Drop the "avx512.mask." to make it easier. |
4003 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
4004 | bool IsSubAdd = Name[3] == 's'; |
4005 | if (CI->arg_size() == 5) { |
4006 | Intrinsic::ID IID; |
4007 | // Check the character before ".512" in string. |
4008 | if (Name[Name.size() - 5] == 's') |
4009 | IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; |
4010 | else |
4011 | IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; |
4012 | |
4013 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4014 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)}; |
4015 | if (IsSubAdd) |
4016 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
4017 | |
4018 | Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops); |
4019 | } else { |
4020 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
4021 | |
4022 | Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4023 | CI->getArgOperand(i: 2)}; |
4024 | |
4025 | Function *FMA = Intrinsic::getOrInsertDeclaration( |
4026 | M: CI->getModule(), id: Intrinsic::fma, Tys: Ops[0]->getType()); |
4027 | Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops); |
4028 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
4029 | Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops); |
4030 | |
4031 | if (IsSubAdd) |
4032 | std::swap(a&: Even, b&: Odd); |
4033 | |
4034 | SmallVector<int, 32> Idxs(NumElts); |
4035 | for (int i = 0; i != NumElts; ++i) |
4036 | Idxs[i] = i + (i % 2) * NumElts; |
4037 | |
4038 | Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs); |
4039 | } |
4040 | |
4041 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) |
4042 | : IsMask3 ? CI->getArgOperand(i: 2) |
4043 | : CI->getArgOperand(i: 0); |
4044 | |
4045 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4046 | } else if (Name.starts_with(Prefix: "avx512.mask.pternlog." ) || |
4047 | Name.starts_with(Prefix: "avx512.maskz.pternlog." )) { |
4048 | bool ZeroMask = Name[11] == 'z'; |
4049 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4050 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
4051 | Intrinsic::ID IID; |
4052 | if (VecWidth == 128 && EltWidth == 32) |
4053 | IID = Intrinsic::x86_avx512_pternlog_d_128; |
4054 | else if (VecWidth == 256 && EltWidth == 32) |
4055 | IID = Intrinsic::x86_avx512_pternlog_d_256; |
4056 | else if (VecWidth == 512 && EltWidth == 32) |
4057 | IID = Intrinsic::x86_avx512_pternlog_d_512; |
4058 | else if (VecWidth == 128 && EltWidth == 64) |
4059 | IID = Intrinsic::x86_avx512_pternlog_q_128; |
4060 | else if (VecWidth == 256 && EltWidth == 64) |
4061 | IID = Intrinsic::x86_avx512_pternlog_q_256; |
4062 | else if (VecWidth == 512 && EltWidth == 64) |
4063 | IID = Intrinsic::x86_avx512_pternlog_q_512; |
4064 | else |
4065 | llvm_unreachable("Unexpected intrinsic" ); |
4066 | |
4067 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4068 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)}; |
4069 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
4070 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4071 | : CI->getArgOperand(i: 0); |
4072 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru); |
4073 | } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52" ) || |
4074 | Name.starts_with(Prefix: "avx512.maskz.vpmadd52" )) { |
4075 | bool ZeroMask = Name[11] == 'z'; |
4076 | bool High = Name[20] == 'h' || Name[21] == 'h'; |
4077 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4078 | Intrinsic::ID IID; |
4079 | if (VecWidth == 128 && !High) |
4080 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; |
4081 | else if (VecWidth == 256 && !High) |
4082 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; |
4083 | else if (VecWidth == 512 && !High) |
4084 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; |
4085 | else if (VecWidth == 128 && High) |
4086 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; |
4087 | else if (VecWidth == 256 && High) |
4088 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; |
4089 | else if (VecWidth == 512 && High) |
4090 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; |
4091 | else |
4092 | llvm_unreachable("Unexpected intrinsic" ); |
4093 | |
4094 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4095 | CI->getArgOperand(i: 2)}; |
4096 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
4097 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4098 | : CI->getArgOperand(i: 0); |
4099 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4100 | } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var." ) || |
4101 | Name.starts_with(Prefix: "avx512.mask.vpermt2var." ) || |
4102 | Name.starts_with(Prefix: "avx512.maskz.vpermt2var." )) { |
4103 | bool ZeroMask = Name[11] == 'z'; |
4104 | bool IndexForm = Name[17] == 'i'; |
4105 | Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm); |
4106 | } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd." ) || |
4107 | Name.starts_with(Prefix: "avx512.maskz.vpdpbusd." ) || |
4108 | Name.starts_with(Prefix: "avx512.mask.vpdpbusds." ) || |
4109 | Name.starts_with(Prefix: "avx512.maskz.vpdpbusds." )) { |
4110 | bool ZeroMask = Name[11] == 'z'; |
4111 | bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; |
4112 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4113 | Intrinsic::ID IID; |
4114 | if (VecWidth == 128 && !IsSaturating) |
4115 | IID = Intrinsic::x86_avx512_vpdpbusd_128; |
4116 | else if (VecWidth == 256 && !IsSaturating) |
4117 | IID = Intrinsic::x86_avx512_vpdpbusd_256; |
4118 | else if (VecWidth == 512 && !IsSaturating) |
4119 | IID = Intrinsic::x86_avx512_vpdpbusd_512; |
4120 | else if (VecWidth == 128 && IsSaturating) |
4121 | IID = Intrinsic::x86_avx512_vpdpbusds_128; |
4122 | else if (VecWidth == 256 && IsSaturating) |
4123 | IID = Intrinsic::x86_avx512_vpdpbusds_256; |
4124 | else if (VecWidth == 512 && IsSaturating) |
4125 | IID = Intrinsic::x86_avx512_vpdpbusds_512; |
4126 | else |
4127 | llvm_unreachable("Unexpected intrinsic" ); |
4128 | |
4129 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4130 | CI->getArgOperand(i: 2)}; |
4131 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
4132 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4133 | : CI->getArgOperand(i: 0); |
4134 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4135 | } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd." ) || |
4136 | Name.starts_with(Prefix: "avx512.maskz.vpdpwssd." ) || |
4137 | Name.starts_with(Prefix: "avx512.mask.vpdpwssds." ) || |
4138 | Name.starts_with(Prefix: "avx512.maskz.vpdpwssds." )) { |
4139 | bool ZeroMask = Name[11] == 'z'; |
4140 | bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; |
4141 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4142 | Intrinsic::ID IID; |
4143 | if (VecWidth == 128 && !IsSaturating) |
4144 | IID = Intrinsic::x86_avx512_vpdpwssd_128; |
4145 | else if (VecWidth == 256 && !IsSaturating) |
4146 | IID = Intrinsic::x86_avx512_vpdpwssd_256; |
4147 | else if (VecWidth == 512 && !IsSaturating) |
4148 | IID = Intrinsic::x86_avx512_vpdpwssd_512; |
4149 | else if (VecWidth == 128 && IsSaturating) |
4150 | IID = Intrinsic::x86_avx512_vpdpwssds_128; |
4151 | else if (VecWidth == 256 && IsSaturating) |
4152 | IID = Intrinsic::x86_avx512_vpdpwssds_256; |
4153 | else if (VecWidth == 512 && IsSaturating) |
4154 | IID = Intrinsic::x86_avx512_vpdpwssds_512; |
4155 | else |
4156 | llvm_unreachable("Unexpected intrinsic" ); |
4157 | |
4158 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4159 | CI->getArgOperand(i: 2)}; |
4160 | Rep = Builder.CreateIntrinsic(ID: IID, Args); |
4161 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4162 | : CI->getArgOperand(i: 0); |
4163 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4164 | } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" || |
4165 | Name == "addcarry.u32" || Name == "addcarry.u64" || |
4166 | Name == "subborrow.u32" || Name == "subborrow.u64" ) { |
4167 | Intrinsic::ID IID; |
4168 | if (Name[0] == 'a' && Name.back() == '2') |
4169 | IID = Intrinsic::x86_addcarry_32; |
4170 | else if (Name[0] == 'a' && Name.back() == '4') |
4171 | IID = Intrinsic::x86_addcarry_64; |
4172 | else if (Name[0] == 's' && Name.back() == '2') |
4173 | IID = Intrinsic::x86_subborrow_32; |
4174 | else if (Name[0] == 's' && Name.back() == '4') |
4175 | IID = Intrinsic::x86_subborrow_64; |
4176 | else |
4177 | llvm_unreachable("Unexpected intrinsic" ); |
4178 | |
4179 | // Make a call with 3 operands. |
4180 | Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4181 | CI->getArgOperand(i: 2)}; |
4182 | Value *NewCall = Builder.CreateIntrinsic(ID: IID, Args); |
4183 | |
4184 | // Extract the second result and store it. |
4185 | Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1); |
4186 | Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 3), Align: Align(1)); |
4187 | // Replace the original call result with the first result of the new call. |
4188 | Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0); |
4189 | |
4190 | CI->replaceAllUsesWith(V: CF); |
4191 | Rep = nullptr; |
4192 | } else if (Name.starts_with(Prefix: "avx512.mask." ) && |
4193 | upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) { |
4194 | // Rep will be updated by the call in the condition. |
4195 | } |
4196 | |
4197 | return Rep; |
4198 | } |
4199 | |
4200 | static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, |
4201 | Function *F, IRBuilder<> &Builder) { |
4202 | if (Name.starts_with(Prefix: "neon.bfcvt" )) { |
4203 | if (Name.starts_with(Prefix: "neon.bfcvtn2" )) { |
4204 | SmallVector<int, 32> LoMask(4); |
4205 | std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0); |
4206 | SmallVector<int, 32> ConcatMask(8); |
4207 | std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0); |
4208 | Value *Inactive = Builder.CreateShuffleVector(V: CI->getOperand(i_nocapture: 0), Mask: LoMask); |
4209 | Value *Trunc = |
4210 | Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 1), DestTy: Inactive->getType()); |
4211 | return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask); |
4212 | } else if (Name.starts_with(Prefix: "neon.bfcvtn" )) { |
4213 | SmallVector<int, 32> ConcatMask(8); |
4214 | std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0); |
4215 | Type *V4BF16 = |
4216 | FixedVectorType::get(ElementType: Type::getBFloatTy(C&: F->getContext()), NumElts: 4); |
4217 | Value *Trunc = Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), DestTy: V4BF16); |
4218 | dbgs() << "Trunc: " << *Trunc << "\n" ; |
4219 | return Builder.CreateShuffleVector( |
4220 | V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask); |
4221 | } else { |
4222 | return Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), |
4223 | DestTy: Type::getBFloatTy(C&: F->getContext())); |
4224 | } |
4225 | } else if (Name.starts_with(Prefix: "sve.fcvt" )) { |
4226 | Intrinsic::ID NewID = |
4227 | StringSwitch<Intrinsic::ID>(Name) |
4228 | .Case(S: "sve.fcvt.bf16f32" , Value: Intrinsic::aarch64_sve_fcvt_bf16f32_v2) |
4229 | .Case(S: "sve.fcvtnt.bf16f32" , |
4230 | Value: Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2) |
4231 | .Default(Value: Intrinsic::not_intrinsic); |
4232 | if (NewID == Intrinsic::not_intrinsic) |
4233 | llvm_unreachable("Unhandled Intrinsic!" ); |
4234 | |
4235 | SmallVector<Value *, 3> Args(CI->args()); |
4236 | |
4237 | // The original intrinsics incorrectly used a predicate based on the |
4238 | // smallest element type rather than the largest. |
4239 | Type *BadPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8); |
4240 | Type *GoodPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4); |
4241 | |
4242 | if (Args[1]->getType() != BadPredTy) |
4243 | llvm_unreachable("Unexpected predicate type!" ); |
4244 | |
4245 | Args[1] = Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_to_svbool, |
4246 | Types: BadPredTy, Args: Args[1]); |
4247 | Args[1] = Builder.CreateIntrinsic( |
4248 | ID: Intrinsic::aarch64_sve_convert_from_svbool, Types: GoodPredTy, Args: Args[1]); |
4249 | |
4250 | return Builder.CreateIntrinsic(ID: NewID, Args, /*FMFSource=*/nullptr, |
4251 | Name: CI->getName()); |
4252 | } |
4253 | |
4254 | llvm_unreachable("Unhandled Intrinsic!" ); |
4255 | } |
4256 | |
4257 | static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, |
4258 | IRBuilder<> &Builder) { |
4259 | if (Name == "mve.vctp64.old" ) { |
4260 | // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the |
4261 | // correct type. |
4262 | Value *VCTP = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_vctp64, Types: {}, |
4263 | Args: CI->getArgOperand(i: 0), |
4264 | /*FMFSource=*/nullptr, Name: CI->getName()); |
4265 | Value *C1 = Builder.CreateIntrinsic( |
4266 | ID: Intrinsic::arm_mve_pred_v2i, |
4267 | Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}, Args: VCTP); |
4268 | return Builder.CreateIntrinsic( |
4269 | ID: Intrinsic::arm_mve_pred_i2v, |
4270 | Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: C1); |
4271 | } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" || |
4272 | Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" || |
4273 | Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" || |
4274 | Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" || |
4275 | Name == |
4276 | "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" || |
4277 | Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" || |
4278 | Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" || |
4279 | Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" || |
4280 | Name == |
4281 | "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" || |
4282 | Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" || |
4283 | Name == "cde.vcx1q.predicated.v2i64.v4i1" || |
4284 | Name == "cde.vcx1qa.predicated.v2i64.v4i1" || |
4285 | Name == "cde.vcx2q.predicated.v2i64.v4i1" || |
4286 | Name == "cde.vcx2qa.predicated.v2i64.v4i1" || |
4287 | Name == "cde.vcx3q.predicated.v2i64.v4i1" || |
4288 | Name == "cde.vcx3qa.predicated.v2i64.v4i1" ) { |
4289 | std::vector<Type *> Tys; |
4290 | unsigned ID = CI->getIntrinsicID(); |
4291 | Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2); |
4292 | switch (ID) { |
4293 | case Intrinsic::arm_mve_mull_int_predicated: |
4294 | case Intrinsic::arm_mve_vqdmull_predicated: |
4295 | case Intrinsic::arm_mve_vldr_gather_base_predicated: |
4296 | Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty}; |
4297 | break; |
4298 | case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: |
4299 | case Intrinsic::arm_mve_vstr_scatter_base_predicated: |
4300 | case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: |
4301 | Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(), |
4302 | V2I1Ty}; |
4303 | break; |
4304 | case Intrinsic::arm_mve_vldr_gather_offset_predicated: |
4305 | Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), |
4306 | CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty}; |
4307 | break; |
4308 | case Intrinsic::arm_mve_vstr_scatter_offset_predicated: |
4309 | Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(), |
4310 | CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty}; |
4311 | break; |
4312 | case Intrinsic::arm_cde_vcx1q_predicated: |
4313 | case Intrinsic::arm_cde_vcx1qa_predicated: |
4314 | case Intrinsic::arm_cde_vcx2q_predicated: |
4315 | case Intrinsic::arm_cde_vcx2qa_predicated: |
4316 | case Intrinsic::arm_cde_vcx3q_predicated: |
4317 | case Intrinsic::arm_cde_vcx3qa_predicated: |
4318 | Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty}; |
4319 | break; |
4320 | default: |
4321 | llvm_unreachable("Unhandled Intrinsic!" ); |
4322 | } |
4323 | |
4324 | std::vector<Value *> Ops; |
4325 | for (Value *Op : CI->args()) { |
4326 | Type *Ty = Op->getType(); |
4327 | if (Ty->getScalarSizeInBits() == 1) { |
4328 | Value *C1 = Builder.CreateIntrinsic( |
4329 | ID: Intrinsic::arm_mve_pred_v2i, |
4330 | Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: Op); |
4331 | Op = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_pred_i2v, Types: {V2I1Ty}, Args: C1); |
4332 | } |
4333 | Ops.push_back(x: Op); |
4334 | } |
4335 | |
4336 | return Builder.CreateIntrinsic(ID, Types: Tys, Args: Ops, /*FMFSource=*/nullptr, |
4337 | Name: CI->getName()); |
4338 | } |
4339 | llvm_unreachable("Unknown function for ARM CallBase upgrade." ); |
4340 | } |
4341 | |
4342 | // These are expected to have the arguments: |
4343 | // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile) |
4344 | // |
4345 | // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value). |
4346 | // |
4347 | static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, |
4348 | Function *F, IRBuilder<> &Builder) { |
4349 | AtomicRMWInst::BinOp RMWOp = |
4350 | StringSwitch<AtomicRMWInst::BinOp>(Name) |
4351 | .StartsWith(S: "ds.fadd" , Value: AtomicRMWInst::FAdd) |
4352 | .StartsWith(S: "ds.fmin" , Value: AtomicRMWInst::FMin) |
4353 | .StartsWith(S: "ds.fmax" , Value: AtomicRMWInst::FMax) |
4354 | .StartsWith(S: "atomic.inc." , Value: AtomicRMWInst::UIncWrap) |
4355 | .StartsWith(S: "atomic.dec." , Value: AtomicRMWInst::UDecWrap) |
4356 | .StartsWith(S: "global.atomic.fadd" , Value: AtomicRMWInst::FAdd) |
4357 | .StartsWith(S: "flat.atomic.fadd" , Value: AtomicRMWInst::FAdd) |
4358 | .StartsWith(S: "global.atomic.fmin" , Value: AtomicRMWInst::FMin) |
4359 | .StartsWith(S: "flat.atomic.fmin" , Value: AtomicRMWInst::FMin) |
4360 | .StartsWith(S: "global.atomic.fmax" , Value: AtomicRMWInst::FMax) |
4361 | .StartsWith(S: "flat.atomic.fmax" , Value: AtomicRMWInst::FMax); |
4362 | |
4363 | unsigned NumOperands = CI->getNumOperands(); |
4364 | if (NumOperands < 3) // Malformed bitcode. |
4365 | return nullptr; |
4366 | |
4367 | Value *Ptr = CI->getArgOperand(i: 0); |
4368 | PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType()); |
4369 | if (!PtrTy) // Malformed. |
4370 | return nullptr; |
4371 | |
4372 | Value *Val = CI->getArgOperand(i: 1); |
4373 | if (Val->getType() != CI->getType()) // Malformed. |
4374 | return nullptr; |
4375 | |
4376 | ConstantInt *OrderArg = nullptr; |
4377 | bool IsVolatile = false; |
4378 | |
4379 | // These should have 5 arguments (plus the callee). A separate version of the |
4380 | // ds_fadd intrinsic was defined for bf16 which was missing arguments. |
4381 | if (NumOperands > 3) |
4382 | OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2)); |
4383 | |
4384 | // Ignore scope argument at 3 |
4385 | |
4386 | if (NumOperands > 5) { |
4387 | ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4)); |
4388 | IsVolatile = !VolatileArg || !VolatileArg->isZero(); |
4389 | } |
4390 | |
4391 | AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent; |
4392 | if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue())) |
4393 | Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue()); |
4394 | if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered) |
4395 | Order = AtomicOrdering::SequentiallyConsistent; |
4396 | |
4397 | LLVMContext &Ctx = F->getContext(); |
4398 | |
4399 | // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat> |
4400 | Type *RetTy = CI->getType(); |
4401 | if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) { |
4402 | if (VT->getElementType()->isIntegerTy(Bitwidth: 16)) { |
4403 | VectorType *AsBF16 = |
4404 | VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount()); |
4405 | Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16); |
4406 | } |
4407 | } |
4408 | |
4409 | // The scope argument never really worked correctly. Use agent as the most |
4410 | // conservative option which should still always produce the instruction. |
4411 | SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent" ); |
4412 | AtomicRMWInst *RMW = |
4413 | Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID); |
4414 | |
4415 | unsigned AddrSpace = PtrTy->getAddressSpace(); |
4416 | if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { |
4417 | MDNode *EmptyMD = MDNode::get(Context&: F->getContext(), MDs: {}); |
4418 | RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory" , Node: EmptyMD); |
4419 | if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) |
4420 | RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode" , Node: EmptyMD); |
4421 | } |
4422 | |
4423 | if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { |
4424 | MDBuilder MDB(F->getContext()); |
4425 | MDNode *RangeNotPrivate = |
4426 | MDB.createRange(Lo: APInt(32, AMDGPUAS::PRIVATE_ADDRESS), |
4427 | Hi: APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); |
4428 | RMW->setMetadata(KindID: LLVMContext::MD_noalias_addrspace, Node: RangeNotPrivate); |
4429 | } |
4430 | |
4431 | if (IsVolatile) |
4432 | RMW->setVolatile(true); |
4433 | |
4434 | return Builder.CreateBitCast(V: RMW, DestTy: RetTy); |
4435 | } |
4436 | |
4437 | /// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a |
4438 | /// plain MDNode, as it's the verifier's job to check these are the correct |
4439 | /// types later. |
4440 | static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) { |
4441 | if (Op < CI->arg_size()) { |
4442 | if (MetadataAsValue *MAV = |
4443 | dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) { |
4444 | Metadata *MD = MAV->getMetadata(); |
4445 | return dyn_cast_if_present<MDNode>(Val: MD); |
4446 | } |
4447 | } |
4448 | return nullptr; |
4449 | } |
4450 | |
4451 | /// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field. |
4452 | static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) { |
4453 | if (Op < CI->arg_size()) |
4454 | if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) |
4455 | return MAV->getMetadata(); |
4456 | return nullptr; |
4457 | } |
4458 | |
4459 | static MDNode *getDebugLocSafe(const Instruction *I) { |
4460 | // The MDNode attached to this instruction might not be the correct type, |
4461 | // as the verifier has not yet be run. Fetch it as a bare MDNode. |
4462 | return I->getDebugLoc().getAsMDNode(); |
4463 | } |
4464 | |
4465 | /// Convert debug intrinsic calls to non-instruction debug records. |
4466 | /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value. |
4467 | /// \p CI - The debug intrinsic call. |
4468 | static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) { |
4469 | DbgRecord *DR = nullptr; |
4470 | if (Name == "label" ) { |
4471 | DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(Label: unwrapMAVOp(CI, Op: 0), |
4472 | DL: CI->getDebugLoc()); |
4473 | } else if (Name == "assign" ) { |
4474 | DR = DbgVariableRecord::createUnresolvedDbgVariableRecord( |
4475 | Type: DbgVariableRecord::LocationType::Assign, Val: unwrapMAVMetadataOp(CI, Op: 0), |
4476 | Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: unwrapMAVOp(CI, Op: 3), |
4477 | Address: unwrapMAVMetadataOp(CI, Op: 4), |
4478 | /*The address is a Value ref, it will be stored as a Metadata */ |
4479 | AddressExpression: unwrapMAVOp(CI, Op: 5), DI: getDebugLocSafe(I: CI)); |
4480 | } else if (Name == "declare" ) { |
4481 | DR = DbgVariableRecord::createUnresolvedDbgVariableRecord( |
4482 | Type: DbgVariableRecord::LocationType::Declare, Val: unwrapMAVMetadataOp(CI, Op: 0), |
4483 | Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: nullptr, Address: nullptr, AddressExpression: nullptr, |
4484 | DI: getDebugLocSafe(I: CI)); |
4485 | } else if (Name == "addr" ) { |
4486 | // Upgrade dbg.addr to dbg.value with DW_OP_deref. |
4487 | MDNode *ExprNode = unwrapMAVOp(CI, Op: 2); |
4488 | // Don't try to add something to the expression if it's not an expression. |
4489 | // Instead, allow the verifier to fail later. |
4490 | if (DIExpression *Expr = dyn_cast<DIExpression>(Val: ExprNode)) { |
4491 | ExprNode = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref); |
4492 | } |
4493 | DR = DbgVariableRecord::createUnresolvedDbgVariableRecord( |
4494 | Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0), |
4495 | Variable: unwrapMAVOp(CI, Op: 1), Expression: ExprNode, AssignID: nullptr, Address: nullptr, AddressExpression: nullptr, |
4496 | DI: getDebugLocSafe(I: CI)); |
4497 | } else if (Name == "value" ) { |
4498 | // An old version of dbg.value had an extra offset argument. |
4499 | unsigned VarOp = 1; |
4500 | unsigned ExprOp = 2; |
4501 | if (CI->arg_size() == 4) { |
4502 | auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)); |
4503 | // Nonzero offset dbg.values get dropped without a replacement. |
4504 | if (!Offset || !Offset->isZeroValue()) |
4505 | return; |
4506 | VarOp = 2; |
4507 | ExprOp = 3; |
4508 | } |
4509 | DR = DbgVariableRecord::createUnresolvedDbgVariableRecord( |
4510 | Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0), |
4511 | Variable: unwrapMAVOp(CI, Op: VarOp), Expression: unwrapMAVOp(CI, Op: ExprOp), AssignID: nullptr, Address: nullptr, |
4512 | AddressExpression: nullptr, DI: getDebugLocSafe(I: CI)); |
4513 | } |
4514 | assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord" ); |
4515 | CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator()); |
4516 | } |
4517 | |
4518 | /// Upgrade a call to an old intrinsic. All argument and return casting must be |
4519 | /// provided to seamlessly integrate with existing context. |
4520 | void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { |
4521 | // Note dyn_cast to Function is not quite the same as getCalledFunction, which |
4522 | // checks the callee's function type matches. It's likely we need to handle |
4523 | // type changes here. |
4524 | Function *F = dyn_cast<Function>(Val: CI->getCalledOperand()); |
4525 | if (!F) |
4526 | return; |
4527 | |
4528 | LLVMContext &C = CI->getContext(); |
4529 | IRBuilder<> Builder(C); |
4530 | Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator()); |
4531 | |
4532 | if (!NewFn) { |
4533 | // Get the Function's name. |
4534 | StringRef Name = F->getName(); |
4535 | |
4536 | assert(Name.starts_with("llvm." ) && "Intrinsic doesn't start with 'llvm.'" ); |
4537 | Name = Name.substr(Start: 5); |
4538 | |
4539 | bool IsX86 = Name.consume_front(Prefix: "x86." ); |
4540 | bool IsNVVM = Name.consume_front(Prefix: "nvvm." ); |
4541 | bool IsAArch64 = Name.consume_front(Prefix: "aarch64." ); |
4542 | bool IsARM = Name.consume_front(Prefix: "arm." ); |
4543 | bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn." ); |
4544 | bool IsDbg = Name.consume_front(Prefix: "dbg." ); |
4545 | Value *Rep = nullptr; |
4546 | |
4547 | if (!IsX86 && Name == "stackprotectorcheck" ) { |
4548 | Rep = nullptr; |
4549 | } else if (IsNVVM) { |
4550 | Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder); |
4551 | } else if (IsX86) { |
4552 | Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); |
4553 | } else if (IsAArch64) { |
4554 | Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder); |
4555 | } else if (IsARM) { |
4556 | Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder); |
4557 | } else if (IsAMDGCN) { |
4558 | Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder); |
4559 | } else if (IsDbg) { |
4560 | upgradeDbgIntrinsicToDbgRecord(Name, CI); |
4561 | } else { |
4562 | llvm_unreachable("Unknown function for CallBase upgrade." ); |
4563 | } |
4564 | |
4565 | if (Rep) |
4566 | CI->replaceAllUsesWith(V: Rep); |
4567 | CI->eraseFromParent(); |
4568 | return; |
4569 | } |
4570 | |
4571 | const auto &DefaultCase = [&]() -> void { |
4572 | if (CI->getFunctionType() == NewFn->getFunctionType()) { |
4573 | // Handle generic mangling change. |
4574 | assert( |
4575 | (CI->getCalledFunction()->getName() != NewFn->getName()) && |
4576 | "Unknown function for CallBase upgrade and isn't just a name change" ); |
4577 | CI->setCalledFunction(NewFn); |
4578 | return; |
4579 | } |
4580 | |
4581 | // This must be an upgrade from a named to a literal struct. |
4582 | if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) { |
4583 | assert(OldST != NewFn->getReturnType() && |
4584 | "Return type must have changed" ); |
4585 | assert(OldST->getNumElements() == |
4586 | cast<StructType>(NewFn->getReturnType())->getNumElements() && |
4587 | "Must have same number of elements" ); |
4588 | |
4589 | SmallVector<Value *> Args(CI->args()); |
4590 | CallInst *NewCI = Builder.CreateCall(Callee: NewFn, Args); |
4591 | NewCI->setAttributes(CI->getAttributes()); |
4592 | Value *Res = PoisonValue::get(T: OldST); |
4593 | for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { |
4594 | Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx); |
4595 | Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx); |
4596 | } |
4597 | CI->replaceAllUsesWith(V: Res); |
4598 | CI->eraseFromParent(); |
4599 | return; |
4600 | } |
4601 | |
4602 | // We're probably about to produce something invalid. Let the verifier catch |
4603 | // it instead of dying here. |
4604 | CI->setCalledOperand( |
4605 | ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType())); |
4606 | return; |
4607 | }; |
4608 | CallInst *NewCall = nullptr; |
4609 | switch (NewFn->getIntrinsicID()) { |
4610 | default: { |
4611 | DefaultCase(); |
4612 | return; |
4613 | } |
4614 | case Intrinsic::arm_neon_vst1: |
4615 | case Intrinsic::arm_neon_vst2: |
4616 | case Intrinsic::arm_neon_vst3: |
4617 | case Intrinsic::arm_neon_vst4: |
4618 | case Intrinsic::arm_neon_vst2lane: |
4619 | case Intrinsic::arm_neon_vst3lane: |
4620 | case Intrinsic::arm_neon_vst4lane: { |
4621 | SmallVector<Value *, 4> Args(CI->args()); |
4622 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4623 | break; |
4624 | } |
4625 | case Intrinsic::aarch64_sve_bfmlalb_lane_v2: |
4626 | case Intrinsic::aarch64_sve_bfmlalt_lane_v2: |
4627 | case Intrinsic::aarch64_sve_bfdot_lane_v2: { |
4628 | LLVMContext &Ctx = F->getParent()->getContext(); |
4629 | SmallVector<Value *, 4> Args(CI->args()); |
4630 | Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), |
4631 | V: cast<ConstantInt>(Val: Args[3])->getZExtValue()); |
4632 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4633 | break; |
4634 | } |
4635 | case Intrinsic::aarch64_sve_ld3_sret: |
4636 | case Intrinsic::aarch64_sve_ld4_sret: |
4637 | case Intrinsic::aarch64_sve_ld2_sret: { |
4638 | StringRef Name = F->getName(); |
4639 | Name = Name.substr(Start: 5); |
4640 | unsigned N = StringSwitch<unsigned>(Name) |
4641 | .StartsWith(S: "aarch64.sve.ld2" , Value: 2) |
4642 | .StartsWith(S: "aarch64.sve.ld3" , Value: 3) |
4643 | .StartsWith(S: "aarch64.sve.ld4" , Value: 4) |
4644 | .Default(Value: 0); |
4645 | auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType()); |
4646 | unsigned MinElts = RetTy->getMinNumElements() / N; |
4647 | SmallVector<Value *, 2> Args(CI->args()); |
4648 | Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args); |
4649 | Value *Ret = llvm::PoisonValue::get(T: RetTy); |
4650 | for (unsigned I = 0; I < N; I++) { |
4651 | Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I); |
4652 | Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx: I * MinElts); |
4653 | } |
4654 | NewCall = dyn_cast<CallInst>(Val: Ret); |
4655 | break; |
4656 | } |
4657 | |
4658 | case Intrinsic::coro_end: { |
4659 | SmallVector<Value *, 3> Args(CI->args()); |
4660 | Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext())); |
4661 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4662 | break; |
4663 | } |
4664 | |
4665 | case Intrinsic::vector_extract: { |
4666 | StringRef Name = F->getName(); |
4667 | Name = Name.substr(Start: 5); // Strip llvm |
4668 | if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get" )) { |
4669 | DefaultCase(); |
4670 | return; |
4671 | } |
4672 | auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType()); |
4673 | unsigned MinElts = RetTy->getMinNumElements(); |
4674 | unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
4675 | Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4676 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx}); |
4677 | break; |
4678 | } |
4679 | |
4680 | case Intrinsic::vector_insert: { |
4681 | StringRef Name = F->getName(); |
4682 | Name = Name.substr(Start: 5); |
4683 | if (!Name.starts_with(Prefix: "aarch64.sve.tuple" )) { |
4684 | DefaultCase(); |
4685 | return; |
4686 | } |
4687 | if (Name.starts_with(Prefix: "aarch64.sve.tuple.set" )) { |
4688 | unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
4689 | auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType()); |
4690 | Value *NewIdx = |
4691 | ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements()); |
4692 | NewCall = Builder.CreateCall( |
4693 | Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx}); |
4694 | break; |
4695 | } |
4696 | if (Name.starts_with(Prefix: "aarch64.sve.tuple.create" )) { |
4697 | unsigned N = StringSwitch<unsigned>(Name) |
4698 | .StartsWith(S: "aarch64.sve.tuple.create2" , Value: 2) |
4699 | .StartsWith(S: "aarch64.sve.tuple.create3" , Value: 3) |
4700 | .StartsWith(S: "aarch64.sve.tuple.create4" , Value: 4) |
4701 | .Default(Value: 0); |
4702 | assert(N > 1 && "Create is expected to be between 2-4" ); |
4703 | auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType()); |
4704 | Value *Ret = llvm::PoisonValue::get(T: RetTy); |
4705 | unsigned MinElts = RetTy->getMinNumElements() / N; |
4706 | for (unsigned I = 0; I < N; I++) { |
4707 | Value *V = CI->getArgOperand(i: I); |
4708 | Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx: I * MinElts); |
4709 | } |
4710 | NewCall = dyn_cast<CallInst>(Val: Ret); |
4711 | } |
4712 | break; |
4713 | } |
4714 | |
4715 | case Intrinsic::arm_neon_bfdot: |
4716 | case Intrinsic::arm_neon_bfmmla: |
4717 | case Intrinsic::arm_neon_bfmlalb: |
4718 | case Intrinsic::arm_neon_bfmlalt: |
4719 | case Intrinsic::aarch64_neon_bfdot: |
4720 | case Intrinsic::aarch64_neon_bfmmla: |
4721 | case Intrinsic::aarch64_neon_bfmlalb: |
4722 | case Intrinsic::aarch64_neon_bfmlalt: { |
4723 | SmallVector<Value *, 3> Args; |
4724 | assert(CI->arg_size() == 3 && |
4725 | "Mismatch between function args and call args" ); |
4726 | size_t OperandWidth = |
4727 | CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits(); |
4728 | assert((OperandWidth == 64 || OperandWidth == 128) && |
4729 | "Unexpected operand width" ); |
4730 | Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16); |
4731 | auto Iter = CI->args().begin(); |
4732 | Args.push_back(Elt: *Iter++); |
4733 | Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy)); |
4734 | Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy)); |
4735 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4736 | break; |
4737 | } |
4738 | |
4739 | case Intrinsic::bitreverse: |
4740 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4741 | break; |
4742 | |
4743 | case Intrinsic::ctlz: |
4744 | case Intrinsic::cttz: |
4745 | assert(CI->arg_size() == 1 && |
4746 | "Mismatch between function args and call args" ); |
4747 | NewCall = |
4748 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()}); |
4749 | break; |
4750 | |
4751 | case Intrinsic::objectsize: { |
4752 | Value *NullIsUnknownSize = |
4753 | CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2); |
4754 | Value *Dynamic = |
4755 | CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3); |
4756 | NewCall = Builder.CreateCall( |
4757 | Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic}); |
4758 | break; |
4759 | } |
4760 | |
4761 | case Intrinsic::ctpop: |
4762 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4763 | break; |
4764 | |
4765 | case Intrinsic::convert_from_fp16: |
4766 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4767 | break; |
4768 | |
4769 | case Intrinsic::dbg_value: { |
4770 | StringRef Name = F->getName(); |
4771 | Name = Name.substr(Start: 5); // Strip llvm. |
4772 | // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`. |
4773 | if (Name.starts_with(Prefix: "dbg.addr" )) { |
4774 | DIExpression *Expr = cast<DIExpression>( |
4775 | Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata()); |
4776 | Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref); |
4777 | NewCall = |
4778 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4779 | MetadataAsValue::get(Context&: C, MD: Expr)}); |
4780 | break; |
4781 | } |
4782 | |
4783 | // Upgrade from the old version that had an extra offset argument. |
4784 | assert(CI->arg_size() == 4); |
4785 | // Drop nonzero offsets instead of attempting to upgrade them. |
4786 | if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1))) |
4787 | if (Offset->isZeroValue()) { |
4788 | NewCall = Builder.CreateCall( |
4789 | Callee: NewFn, |
4790 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)}); |
4791 | break; |
4792 | } |
4793 | CI->eraseFromParent(); |
4794 | return; |
4795 | } |
4796 | |
4797 | case Intrinsic::ptr_annotation: |
4798 | // Upgrade from versions that lacked the annotation attribute argument. |
4799 | if (CI->arg_size() != 4) { |
4800 | DefaultCase(); |
4801 | return; |
4802 | } |
4803 | |
4804 | // Create a new call with an added null annotation attribute argument. |
4805 | NewCall = Builder.CreateCall( |
4806 | Callee: NewFn, |
4807 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2), |
4808 | CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())}); |
4809 | NewCall->takeName(V: CI); |
4810 | CI->replaceAllUsesWith(V: NewCall); |
4811 | CI->eraseFromParent(); |
4812 | return; |
4813 | |
4814 | case Intrinsic::var_annotation: |
4815 | // Upgrade from versions that lacked the annotation attribute argument. |
4816 | if (CI->arg_size() != 4) { |
4817 | DefaultCase(); |
4818 | return; |
4819 | } |
4820 | // Create a new call with an added null annotation attribute argument. |
4821 | NewCall = Builder.CreateCall( |
4822 | Callee: NewFn, |
4823 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2), |
4824 | CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())}); |
4825 | NewCall->takeName(V: CI); |
4826 | CI->replaceAllUsesWith(V: NewCall); |
4827 | CI->eraseFromParent(); |
4828 | return; |
4829 | |
4830 | case Intrinsic::riscv_aes32dsi: |
4831 | case Intrinsic::riscv_aes32dsmi: |
4832 | case Intrinsic::riscv_aes32esi: |
4833 | case Intrinsic::riscv_aes32esmi: |
4834 | case Intrinsic::riscv_sm4ks: |
4835 | case Intrinsic::riscv_sm4ed: { |
4836 | // The last argument to these intrinsics used to be i8 and changed to i32. |
4837 | // The type overload for sm4ks and sm4ed was removed. |
4838 | Value *Arg2 = CI->getArgOperand(i: 2); |
4839 | if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64)) |
4840 | return; |
4841 | |
4842 | Value *Arg0 = CI->getArgOperand(i: 0); |
4843 | Value *Arg1 = CI->getArgOperand(i: 1); |
4844 | if (CI->getType()->isIntegerTy(Bitwidth: 64)) { |
4845 | Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty()); |
4846 | Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty()); |
4847 | } |
4848 | |
4849 | Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C), |
4850 | V: cast<ConstantInt>(Val: Arg2)->getZExtValue()); |
4851 | |
4852 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2}); |
4853 | Value *Res = NewCall; |
4854 | if (Res->getType() != CI->getType()) |
4855 | Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true); |
4856 | NewCall->takeName(V: CI); |
4857 | CI->replaceAllUsesWith(V: Res); |
4858 | CI->eraseFromParent(); |
4859 | return; |
4860 | } |
4861 | case Intrinsic::nvvm_mapa_shared_cluster: { |
4862 | // Create a new call with the correct address space. |
4863 | NewCall = |
4864 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)}); |
4865 | Value *Res = NewCall; |
4866 | Res = Builder.CreateAddrSpaceCast( |
4867 | V: Res, DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED)); |
4868 | NewCall->takeName(V: CI); |
4869 | CI->replaceAllUsesWith(V: Res); |
4870 | CI->eraseFromParent(); |
4871 | return; |
4872 | } |
4873 | case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster: |
4874 | case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: { |
4875 | // Create a new call with the correct address space. |
4876 | SmallVector<Value *, 4> Args(CI->args()); |
4877 | Args[0] = Builder.CreateAddrSpaceCast( |
4878 | V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER)); |
4879 | |
4880 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4881 | NewCall->takeName(V: CI); |
4882 | CI->replaceAllUsesWith(V: NewCall); |
4883 | CI->eraseFromParent(); |
4884 | return; |
4885 | } |
4886 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d: |
4887 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d: |
4888 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d: |
4889 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d: |
4890 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d: |
4891 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d: |
4892 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d: |
4893 | case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: { |
4894 | SmallVector<Value *, 16> Args(CI->args()); |
4895 | |
4896 | // Create AddrSpaceCast to shared_cluster if needed. |
4897 | // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics(). |
4898 | unsigned AS = CI->getArgOperand(i: 0)->getType()->getPointerAddressSpace(); |
4899 | if (AS == NVPTXAS::ADDRESS_SPACE_SHARED) |
4900 | Args[0] = Builder.CreateAddrSpaceCast( |
4901 | V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER)); |
4902 | |
4903 | // Attach the flag argument for cta_group, with a |
4904 | // default value of 0. This handles case (2) in |
4905 | // shouldUpgradeNVPTXTMAG2SIntrinsics(). |
4906 | size_t NumArgs = CI->arg_size(); |
4907 | Value *FlagArg = CI->getArgOperand(i: NumArgs - 3); |
4908 | if (!FlagArg->getType()->isIntegerTy(Bitwidth: 1)) |
4909 | Args.push_back(Elt: ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)); |
4910 | |
4911 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4912 | NewCall->takeName(V: CI); |
4913 | CI->replaceAllUsesWith(V: NewCall); |
4914 | CI->eraseFromParent(); |
4915 | return; |
4916 | } |
4917 | case Intrinsic::riscv_sha256sig0: |
4918 | case Intrinsic::riscv_sha256sig1: |
4919 | case Intrinsic::riscv_sha256sum0: |
4920 | case Intrinsic::riscv_sha256sum1: |
4921 | case Intrinsic::riscv_sm3p0: |
4922 | case Intrinsic::riscv_sm3p1: { |
4923 | // The last argument to these intrinsics used to be i8 and changed to i32. |
4924 | // The type overload for sm4ks and sm4ed was removed. |
4925 | if (!CI->getType()->isIntegerTy(Bitwidth: 64)) |
4926 | return; |
4927 | |
4928 | Value *Arg = |
4929 | Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty()); |
4930 | |
4931 | NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg); |
4932 | Value *Res = |
4933 | Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true); |
4934 | NewCall->takeName(V: CI); |
4935 | CI->replaceAllUsesWith(V: Res); |
4936 | CI->eraseFromParent(); |
4937 | return; |
4938 | } |
4939 | |
4940 | case Intrinsic::x86_xop_vfrcz_ss: |
4941 | case Intrinsic::x86_xop_vfrcz_sd: |
4942 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)}); |
4943 | break; |
4944 | |
4945 | case Intrinsic::x86_xop_vpermil2pd: |
4946 | case Intrinsic::x86_xop_vpermil2ps: |
4947 | case Intrinsic::x86_xop_vpermil2pd_256: |
4948 | case Intrinsic::x86_xop_vpermil2ps_256: { |
4949 | SmallVector<Value *, 4> Args(CI->args()); |
4950 | VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType()); |
4951 | VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy); |
4952 | Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy); |
4953 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4954 | break; |
4955 | } |
4956 | |
4957 | case Intrinsic::x86_sse41_ptestc: |
4958 | case Intrinsic::x86_sse41_ptestz: |
4959 | case Intrinsic::x86_sse41_ptestnzc: { |
4960 | // The arguments for these intrinsics used to be v4f32, and changed |
4961 | // to v2i64. This is purely a nop, since those are bitwise intrinsics. |
4962 | // So, the only thing required is a bitcast for both arguments. |
4963 | // First, check the arguments have the old type. |
4964 | Value *Arg0 = CI->getArgOperand(i: 0); |
4965 | if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4)) |
4966 | return; |
4967 | |
4968 | // Old intrinsic, add bitcasts |
4969 | Value *Arg1 = CI->getArgOperand(i: 1); |
4970 | |
4971 | auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2); |
4972 | |
4973 | Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast" ); |
4974 | Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast" ); |
4975 | |
4976 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1}); |
4977 | break; |
4978 | } |
4979 | |
4980 | case Intrinsic::x86_rdtscp: { |
4981 | // This used to take 1 arguments. If we have no arguments, it is already |
4982 | // upgraded. |
4983 | if (CI->getNumOperands() == 0) |
4984 | return; |
4985 | |
4986 | NewCall = Builder.CreateCall(Callee: NewFn); |
4987 | // Extract the second result and store it. |
4988 | Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1); |
4989 | Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 0), Align: Align(1)); |
4990 | // Replace the original call result with the first result of the new call. |
4991 | Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0); |
4992 | |
4993 | NewCall->takeName(V: CI); |
4994 | CI->replaceAllUsesWith(V: TSC); |
4995 | CI->eraseFromParent(); |
4996 | return; |
4997 | } |
4998 | |
4999 | case Intrinsic::x86_sse41_insertps: |
5000 | case Intrinsic::x86_sse41_dppd: |
5001 | case Intrinsic::x86_sse41_dpps: |
5002 | case Intrinsic::x86_sse41_mpsadbw: |
5003 | case Intrinsic::x86_avx_dp_ps_256: |
5004 | case Intrinsic::x86_avx2_mpsadbw: { |
5005 | // Need to truncate the last argument from i32 to i8 -- this argument models |
5006 | // an inherently 8-bit immediate operand to these x86 instructions. |
5007 | SmallVector<Value *, 4> Args(CI->args()); |
5008 | |
5009 | // Replace the last argument with a trunc. |
5010 | Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc" ); |
5011 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
5012 | break; |
5013 | } |
5014 | |
5015 | case Intrinsic::x86_avx512_mask_cmp_pd_128: |
5016 | case Intrinsic::x86_avx512_mask_cmp_pd_256: |
5017 | case Intrinsic::x86_avx512_mask_cmp_pd_512: |
5018 | case Intrinsic::x86_avx512_mask_cmp_ps_128: |
5019 | case Intrinsic::x86_avx512_mask_cmp_ps_256: |
5020 | case Intrinsic::x86_avx512_mask_cmp_ps_512: { |
5021 | SmallVector<Value *, 4> Args(CI->args()); |
5022 | unsigned NumElts = |
5023 | cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements(); |
5024 | Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts); |
5025 | |
5026 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
5027 | Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr); |
5028 | |
5029 | NewCall->takeName(V: CI); |
5030 | CI->replaceAllUsesWith(V: Res); |
5031 | CI->eraseFromParent(); |
5032 | return; |
5033 | } |
5034 | |
5035 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128: |
5036 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256: |
5037 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512: |
5038 | case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128: |
5039 | case Intrinsic::x86_avx512bf16_cvtneps2bf16_256: |
5040 | case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: { |
5041 | SmallVector<Value *, 4> Args(CI->args()); |
5042 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
5043 | if (NewFn->getIntrinsicID() == |
5044 | Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) |
5045 | Args[1] = Builder.CreateBitCast( |
5046 | V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
5047 | |
5048 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
5049 | Value *Res = Builder.CreateBitCast( |
5050 | V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts)); |
5051 | |
5052 | NewCall->takeName(V: CI); |
5053 | CI->replaceAllUsesWith(V: Res); |
5054 | CI->eraseFromParent(); |
5055 | return; |
5056 | } |
5057 | case Intrinsic::x86_avx512bf16_dpbf16ps_128: |
5058 | case Intrinsic::x86_avx512bf16_dpbf16ps_256: |
5059 | case Intrinsic::x86_avx512bf16_dpbf16ps_512:{ |
5060 | SmallVector<Value *, 4> Args(CI->args()); |
5061 | unsigned NumElts = |
5062 | cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2; |
5063 | Args[1] = Builder.CreateBitCast( |
5064 | V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
5065 | Args[2] = Builder.CreateBitCast( |
5066 | V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
5067 | |
5068 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
5069 | break; |
5070 | } |
5071 | |
5072 | case Intrinsic::thread_pointer: { |
5073 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {}); |
5074 | break; |
5075 | } |
5076 | |
5077 | case Intrinsic::memcpy: |
5078 | case Intrinsic::memmove: |
5079 | case Intrinsic::memset: { |
5080 | // We have to make sure that the call signature is what we're expecting. |
5081 | // We only want to change the old signatures by removing the alignment arg: |
5082 | // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) |
5083 | // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) |
5084 | // @llvm.memset...(i8*, i8, i[32|64], i32, i1) |
5085 | // -> @llvm.memset...(i8*, i8, i[32|64], i1) |
5086 | // Note: i8*'s in the above can be any pointer type |
5087 | if (CI->arg_size() != 5) { |
5088 | DefaultCase(); |
5089 | return; |
5090 | } |
5091 | // Remove alignment argument (3), and add alignment attributes to the |
5092 | // dest/src pointers. |
5093 | Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
5094 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)}; |
5095 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
5096 | AttributeList OldAttrs = CI->getAttributes(); |
5097 | AttributeList NewAttrs = AttributeList::get( |
5098 | C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(), |
5099 | ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1), |
5100 | OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)}); |
5101 | NewCall->setAttributes(NewAttrs); |
5102 | auto *MemCI = cast<MemIntrinsic>(Val: NewCall); |
5103 | // All mem intrinsics support dest alignment. |
5104 | const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3)); |
5105 | MemCI->setDestAlignment(Align->getMaybeAlignValue()); |
5106 | // Memcpy/Memmove also support source alignment. |
5107 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI)) |
5108 | MTI->setSourceAlignment(Align->getMaybeAlignValue()); |
5109 | break; |
5110 | } |
5111 | } |
5112 | assert(NewCall && "Should have either set this variable or returned through " |
5113 | "the default case" ); |
5114 | NewCall->takeName(V: CI); |
5115 | CI->replaceAllUsesWith(V: NewCall); |
5116 | CI->eraseFromParent(); |
5117 | } |
5118 | |
5119 | void llvm::UpgradeCallsToIntrinsic(Function *F) { |
5120 | assert(F && "Illegal attempt to upgrade a non-existent intrinsic." ); |
5121 | |
5122 | // Check if this function should be upgraded and get the replacement function |
5123 | // if there is one. |
5124 | Function *NewFn; |
5125 | if (UpgradeIntrinsicFunction(F, NewFn)) { |
5126 | // Replace all users of the old function with the new function or new |
5127 | // instructions. This is not a range loop because the call is deleted. |
5128 | for (User *U : make_early_inc_range(Range: F->users())) |
5129 | if (CallBase *CB = dyn_cast<CallBase>(Val: U)) |
5130 | UpgradeIntrinsicCall(CI: CB, NewFn); |
5131 | |
5132 | // Remove old function, no longer used, from the module. |
5133 | F->eraseFromParent(); |
5134 | } |
5135 | } |
5136 | |
5137 | MDNode *llvm::UpgradeTBAANode(MDNode &MD) { |
5138 | const unsigned NumOperands = MD.getNumOperands(); |
5139 | if (NumOperands == 0) |
5140 | return &MD; // Invalid, punt to a verifier error. |
5141 | |
5142 | // Check if the tag uses struct-path aware TBAA format. |
5143 | if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3) |
5144 | return &MD; |
5145 | |
5146 | auto &Context = MD.getContext(); |
5147 | if (NumOperands == 3) { |
5148 | Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)}; |
5149 | MDNode *ScalarType = MDNode::get(Context, MDs: Elts); |
5150 | // Create a MDNode <ScalarType, ScalarType, offset 0, const> |
5151 | Metadata *Elts2[] = {ScalarType, ScalarType, |
5152 | ConstantAsMetadata::get( |
5153 | C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))), |
5154 | MD.getOperand(I: 2)}; |
5155 | return MDNode::get(Context, MDs: Elts2); |
5156 | } |
5157 | // Create a MDNode <MD, MD, offset 0> |
5158 | Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue( |
5159 | Ty: Type::getInt64Ty(C&: Context)))}; |
5160 | return MDNode::get(Context, MDs: Elts); |
5161 | } |
5162 | |
5163 | Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, |
5164 | Instruction *&Temp) { |
5165 | if (Opc != Instruction::BitCast) |
5166 | return nullptr; |
5167 | |
5168 | Temp = nullptr; |
5169 | Type *SrcTy = V->getType(); |
5170 | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && |
5171 | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { |
5172 | LLVMContext &Context = V->getContext(); |
5173 | |
5174 | // We have no information about target data layout, so we assume that |
5175 | // the maximum pointer size is 64bit. |
5176 | Type *MidTy = Type::getInt64Ty(C&: Context); |
5177 | Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy); |
5178 | |
5179 | return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy); |
5180 | } |
5181 | |
5182 | return nullptr; |
5183 | } |
5184 | |
5185 | Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { |
5186 | if (Opc != Instruction::BitCast) |
5187 | return nullptr; |
5188 | |
5189 | Type *SrcTy = C->getType(); |
5190 | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && |
5191 | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { |
5192 | LLVMContext &Context = C->getContext(); |
5193 | |
5194 | // We have no information about target data layout, so we assume that |
5195 | // the maximum pointer size is 64bit. |
5196 | Type *MidTy = Type::getInt64Ty(C&: Context); |
5197 | |
5198 | return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy), |
5199 | Ty: DestTy); |
5200 | } |
5201 | |
5202 | return nullptr; |
5203 | } |
5204 | |
5205 | /// Check the debug info version number, if it is out-dated, drop the debug |
5206 | /// info. Return true if module is modified. |
5207 | bool llvm::UpgradeDebugInfo(Module &M) { |
5208 | if (DisableAutoUpgradeDebugInfo) |
5209 | return false; |
5210 | |
5211 | // We need to get metadata before the module is verified (i.e., getModuleFlag |
5212 | // makes assumptions that we haven't verified yet). Carefully extract the flag |
5213 | // from the metadata. |
5214 | unsigned Version = 0; |
5215 | if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) { |
5216 | auto OpIt = find_if(Range: ModFlags->operands(), P: [](const MDNode *Flag) { |
5217 | if (Flag->getNumOperands() < 3) |
5218 | return false; |
5219 | if (MDString *K = dyn_cast_or_null<MDString>(Val: Flag->getOperand(I: 1))) |
5220 | return K->getString() == "Debug Info Version" ; |
5221 | return false; |
5222 | }); |
5223 | if (OpIt != ModFlags->op_end()) { |
5224 | const MDOperand &ValOp = (*OpIt)->getOperand(I: 2); |
5225 | if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD: ValOp)) |
5226 | Version = CI->getZExtValue(); |
5227 | } |
5228 | } |
5229 | |
5230 | if (Version == DEBUG_METADATA_VERSION) { |
5231 | bool BrokenDebugInfo = false; |
5232 | if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo)) |
5233 | report_fatal_error(reason: "Broken module found, compilation aborted!" ); |
5234 | if (!BrokenDebugInfo) |
5235 | // Everything is ok. |
5236 | return false; |
5237 | else { |
5238 | // Diagnose malformed debug info. |
5239 | DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); |
5240 | M.getContext().diagnose(DI: Diag); |
5241 | } |
5242 | } |
5243 | bool Modified = StripDebugInfo(M); |
5244 | if (Modified && Version != DEBUG_METADATA_VERSION) { |
5245 | // Diagnose a version mismatch. |
5246 | DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); |
5247 | M.getContext().diagnose(DI: DiagVersion); |
5248 | } |
5249 | return Modified; |
5250 | } |
5251 | |
5252 | static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, |
5253 | GlobalValue *GV, const Metadata *V) { |
5254 | Function *F = cast<Function>(Val: GV); |
5255 | |
5256 | constexpr StringLiteral DefaultValue = "1" ; |
5257 | StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue}; |
5258 | unsigned Length = 0; |
5259 | |
5260 | if (F->hasFnAttribute(Kind: Attr)) { |
5261 | // We expect the existing attribute to have the form "x[,y[,z]]". Here we |
5262 | // parse these elements placing them into Vect3 |
5263 | StringRef S = F->getFnAttribute(Kind: Attr).getValueAsString(); |
5264 | for (; Length < 3 && !S.empty(); Length++) { |
5265 | auto [Part, Rest] = S.split(Separator: ','); |
5266 | Vect3[Length] = Part.trim(); |
5267 | S = Rest; |
5268 | } |
5269 | } |
5270 | |
5271 | const unsigned Dim = DimC - 'x'; |
5272 | assert(Dim < 3 && "Unexpected dim char" ); |
5273 | |
5274 | const uint64_t VInt = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue(); |
5275 | |
5276 | // local variable required for StringRef in Vect3 to point to. |
5277 | const std::string VStr = llvm::utostr(X: VInt); |
5278 | Vect3[Dim] = VStr; |
5279 | Length = std::max(a: Length, b: Dim + 1); |
5280 | |
5281 | const std::string NewAttr = llvm::join(R: ArrayRef(Vect3, Length), Separator: "," ); |
5282 | F->addFnAttr(Kind: Attr, Val: NewAttr); |
5283 | } |
5284 | |
5285 | static inline bool isXYZ(StringRef S) { |
5286 | return S == "x" || S == "y" || S == "z" ; |
5287 | } |
5288 | |
5289 | bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, |
5290 | const Metadata *V) { |
5291 | if (K == "kernel" ) { |
5292 | if (!mdconst::extract<ConstantInt>(MD&: V)->isZero()) |
5293 | cast<Function>(Val: GV)->setCallingConv(CallingConv::PTX_Kernel); |
5294 | return true; |
5295 | } |
5296 | if (K == "align" ) { |
5297 | // V is a bitfeild specifying two 16-bit values. The alignment value is |
5298 | // specfied in low 16-bits, The index is specified in the high bits. For the |
5299 | // index, 0 indicates the return value while higher values correspond to |
5300 | // each parameter (idx = param + 1). |
5301 | const uint64_t AlignIdxValuePair = |
5302 | mdconst::extract<ConstantInt>(MD&: V)->getZExtValue(); |
5303 | const unsigned Idx = (AlignIdxValuePair >> 16); |
5304 | const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF); |
5305 | cast<Function>(Val: GV)->addAttributeAtIndex( |
5306 | i: Idx, Attr: Attribute::getWithStackAlignment(Context&: GV->getContext(), Alignment: StackAlign)); |
5307 | return true; |
5308 | } |
5309 | if (K == "maxclusterrank" || K == "cluster_max_blocks" ) { |
5310 | const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue(); |
5311 | cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxclusterrank" , Val: llvm::utostr(X: CV)); |
5312 | return true; |
5313 | } |
5314 | if (K == "minctasm" ) { |
5315 | const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue(); |
5316 | cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.minctasm" , Val: llvm::utostr(X: CV)); |
5317 | return true; |
5318 | } |
5319 | if (K == "maxnreg" ) { |
5320 | const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue(); |
5321 | cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxnreg" , Val: llvm::utostr(X: CV)); |
5322 | return true; |
5323 | } |
5324 | if (K.consume_front(Prefix: "maxntid" ) && isXYZ(S: K)) { |
5325 | upgradeNVVMFnVectorAttr(Attr: "nvvm.maxntid" , DimC: K[0], GV, V); |
5326 | return true; |
5327 | } |
5328 | if (K.consume_front(Prefix: "reqntid" ) && isXYZ(S: K)) { |
5329 | upgradeNVVMFnVectorAttr(Attr: "nvvm.reqntid" , DimC: K[0], GV, V); |
5330 | return true; |
5331 | } |
5332 | if (K.consume_front(Prefix: "cluster_dim_" ) && isXYZ(S: K)) { |
5333 | upgradeNVVMFnVectorAttr(Attr: "nvvm.cluster_dim" , DimC: K[0], GV, V); |
5334 | return true; |
5335 | } |
5336 | |
5337 | return false; |
5338 | } |
5339 | |
5340 | void llvm::UpgradeNVVMAnnotations(Module &M) { |
5341 | NamedMDNode *NamedMD = M.getNamedMetadata(Name: "nvvm.annotations" ); |
5342 | if (!NamedMD) |
5343 | return; |
5344 | |
5345 | SmallVector<MDNode *, 8> NewNodes; |
5346 | SmallSet<const MDNode *, 8> SeenNodes; |
5347 | for (MDNode *MD : NamedMD->operands()) { |
5348 | if (!SeenNodes.insert(Ptr: MD).second) |
5349 | continue; |
5350 | |
5351 | auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD: MD->getOperand(I: 0)); |
5352 | if (!GV) |
5353 | continue; |
5354 | |
5355 | assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands" ); |
5356 | |
5357 | SmallVector<Metadata *, 8> NewOperands{MD->getOperand(I: 0)}; |
5358 | // Each nvvm.annotations metadata entry will be of the following form: |
5359 | // !{ ptr @gv, !"key1", value1, !"key2", value2, ... } |
5360 | // start index = 1, to skip the global variable key |
5361 | // increment = 2, to skip the value for each property-value pairs |
5362 | for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) { |
5363 | MDString *K = cast<MDString>(Val: MD->getOperand(I: j)); |
5364 | const MDOperand &V = MD->getOperand(I: j + 1); |
5365 | bool Upgraded = upgradeSingleNVVMAnnotation(GV, K: K->getString(), V); |
5366 | if (!Upgraded) |
5367 | NewOperands.append(IL: {K, V}); |
5368 | } |
5369 | |
5370 | if (NewOperands.size() > 1) |
5371 | NewNodes.push_back(Elt: MDNode::get(Context&: M.getContext(), MDs: NewOperands)); |
5372 | } |
5373 | |
5374 | NamedMD->clearOperands(); |
5375 | for (MDNode *N : NewNodes) |
5376 | NamedMD->addOperand(M: N); |
5377 | } |
5378 | |
5379 | /// This checks for objc retain release marker which should be upgraded. It |
5380 | /// returns true if module is modified. |
5381 | static bool upgradeRetainReleaseMarker(Module &M) { |
5382 | bool Changed = false; |
5383 | const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker" ; |
5384 | NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey); |
5385 | if (ModRetainReleaseMarker) { |
5386 | MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0); |
5387 | if (Op) { |
5388 | MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0)); |
5389 | if (ID) { |
5390 | SmallVector<StringRef, 4> ValueComp; |
5391 | ID->getString().split(A&: ValueComp, Separator: "#" ); |
5392 | if (ValueComp.size() == 2) { |
5393 | std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); |
5394 | ID = MDString::get(Context&: M.getContext(), Str: NewValue); |
5395 | } |
5396 | M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID); |
5397 | M.eraseNamedMetadata(NMD: ModRetainReleaseMarker); |
5398 | Changed = true; |
5399 | } |
5400 | } |
5401 | } |
5402 | return Changed; |
5403 | } |
5404 | |
5405 | void llvm::UpgradeARCRuntime(Module &M) { |
5406 | // This lambda converts normal function calls to ARC runtime functions to |
5407 | // intrinsic calls. |
5408 | auto UpgradeToIntrinsic = [&](const char *OldFunc, |
5409 | llvm::Intrinsic::ID IntrinsicFunc) { |
5410 | Function *Fn = M.getFunction(Name: OldFunc); |
5411 | |
5412 | if (!Fn) |
5413 | return; |
5414 | |
5415 | Function *NewFn = |
5416 | llvm::Intrinsic::getOrInsertDeclaration(M: &M, id: IntrinsicFunc); |
5417 | |
5418 | for (User *U : make_early_inc_range(Range: Fn->users())) { |
5419 | CallInst *CI = dyn_cast<CallInst>(Val: U); |
5420 | if (!CI || CI->getCalledFunction() != Fn) |
5421 | continue; |
5422 | |
5423 | IRBuilder<> Builder(CI->getParent(), CI->getIterator()); |
5424 | FunctionType *NewFuncTy = NewFn->getFunctionType(); |
5425 | SmallVector<Value *, 2> Args; |
5426 | |
5427 | // Don't upgrade the intrinsic if it's not valid to bitcast the return |
5428 | // value to the return type of the old function. |
5429 | if (NewFuncTy->getReturnType() != CI->getType() && |
5430 | !CastInst::castIsValid(op: Instruction::BitCast, S: CI, |
5431 | DstTy: NewFuncTy->getReturnType())) |
5432 | continue; |
5433 | |
5434 | bool InvalidCast = false; |
5435 | |
5436 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
5437 | Value *Arg = CI->getArgOperand(i: I); |
5438 | |
5439 | // Bitcast argument to the parameter type of the new function if it's |
5440 | // not a variadic argument. |
5441 | if (I < NewFuncTy->getNumParams()) { |
5442 | // Don't upgrade the intrinsic if it's not valid to bitcast the argument |
5443 | // to the parameter type of the new function. |
5444 | if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg, |
5445 | DstTy: NewFuncTy->getParamType(i: I))) { |
5446 | InvalidCast = true; |
5447 | break; |
5448 | } |
5449 | Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I)); |
5450 | } |
5451 | Args.push_back(Elt: Arg); |
5452 | } |
5453 | |
5454 | if (InvalidCast) |
5455 | continue; |
5456 | |
5457 | // Create a call instruction that calls the new function. |
5458 | CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args); |
5459 | NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind()); |
5460 | NewCall->takeName(V: CI); |
5461 | |
5462 | // Bitcast the return value back to the type of the old call. |
5463 | Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType()); |
5464 | |
5465 | if (!CI->use_empty()) |
5466 | CI->replaceAllUsesWith(V: NewRetVal); |
5467 | CI->eraseFromParent(); |
5468 | } |
5469 | |
5470 | if (Fn->use_empty()) |
5471 | Fn->eraseFromParent(); |
5472 | }; |
5473 | |
5474 | // Unconditionally convert a call to "clang.arc.use" to a call to |
5475 | // "llvm.objc.clang.arc.use". |
5476 | UpgradeToIntrinsic("clang.arc.use" , llvm::Intrinsic::objc_clang_arc_use); |
5477 | |
5478 | // Upgrade the retain release marker. If there is no need to upgrade |
5479 | // the marker, that means either the module is already new enough to contain |
5480 | // new intrinsics or it is not ARC. There is no need to upgrade runtime call. |
5481 | if (!upgradeRetainReleaseMarker(M)) |
5482 | return; |
5483 | |
5484 | std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = { |
5485 | {"objc_autorelease" , llvm::Intrinsic::objc_autorelease}, |
5486 | {"objc_autoreleasePoolPop" , llvm::Intrinsic::objc_autoreleasePoolPop}, |
5487 | {"objc_autoreleasePoolPush" , llvm::Intrinsic::objc_autoreleasePoolPush}, |
5488 | {"objc_autoreleaseReturnValue" , |
5489 | llvm::Intrinsic::objc_autoreleaseReturnValue}, |
5490 | {"objc_copyWeak" , llvm::Intrinsic::objc_copyWeak}, |
5491 | {"objc_destroyWeak" , llvm::Intrinsic::objc_destroyWeak}, |
5492 | {"objc_initWeak" , llvm::Intrinsic::objc_initWeak}, |
5493 | {"objc_loadWeak" , llvm::Intrinsic::objc_loadWeak}, |
5494 | {"objc_loadWeakRetained" , llvm::Intrinsic::objc_loadWeakRetained}, |
5495 | {"objc_moveWeak" , llvm::Intrinsic::objc_moveWeak}, |
5496 | {"objc_release" , llvm::Intrinsic::objc_release}, |
5497 | {"objc_retain" , llvm::Intrinsic::objc_retain}, |
5498 | {"objc_retainAutorelease" , llvm::Intrinsic::objc_retainAutorelease}, |
5499 | {"objc_retainAutoreleaseReturnValue" , |
5500 | llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, |
5501 | {"objc_retainAutoreleasedReturnValue" , |
5502 | llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, |
5503 | {"objc_retainBlock" , llvm::Intrinsic::objc_retainBlock}, |
5504 | {"objc_storeStrong" , llvm::Intrinsic::objc_storeStrong}, |
5505 | {"objc_storeWeak" , llvm::Intrinsic::objc_storeWeak}, |
5506 | {"objc_unsafeClaimAutoreleasedReturnValue" , |
5507 | llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, |
5508 | {"objc_retainedObject" , llvm::Intrinsic::objc_retainedObject}, |
5509 | {"objc_unretainedObject" , llvm::Intrinsic::objc_unretainedObject}, |
5510 | {"objc_unretainedPointer" , llvm::Intrinsic::objc_unretainedPointer}, |
5511 | {"objc_retain_autorelease" , llvm::Intrinsic::objc_retain_autorelease}, |
5512 | {"objc_sync_enter" , llvm::Intrinsic::objc_sync_enter}, |
5513 | {"objc_sync_exit" , llvm::Intrinsic::objc_sync_exit}, |
5514 | {"objc_arc_annotation_topdown_bbstart" , |
5515 | llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, |
5516 | {"objc_arc_annotation_topdown_bbend" , |
5517 | llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, |
5518 | {"objc_arc_annotation_bottomup_bbstart" , |
5519 | llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, |
5520 | {"objc_arc_annotation_bottomup_bbend" , |
5521 | llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; |
5522 | |
5523 | for (auto &I : RuntimeFuncs) |
5524 | UpgradeToIntrinsic(I.first, I.second); |
5525 | } |
5526 | |
5527 | bool llvm::UpgradeModuleFlags(Module &M) { |
5528 | NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); |
5529 | if (!ModFlags) |
5530 | return false; |
5531 | |
5532 | bool HasObjCFlag = false, HasClassProperties = false, Changed = false; |
5533 | bool HasSwiftVersionFlag = false; |
5534 | uint8_t SwiftMajorVersion, SwiftMinorVersion; |
5535 | uint32_t SwiftABIVersion; |
5536 | auto Int8Ty = Type::getInt8Ty(C&: M.getContext()); |
5537 | auto Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
5538 | |
5539 | for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { |
5540 | MDNode *Op = ModFlags->getOperand(i: I); |
5541 | if (Op->getNumOperands() != 3) |
5542 | continue; |
5543 | MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1)); |
5544 | if (!ID) |
5545 | continue; |
5546 | auto SetBehavior = [&](Module::ModFlagBehavior B) { |
5547 | Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get( |
5548 | Ty: Type::getInt32Ty(C&: M.getContext()), V: B)), |
5549 | MDString::get(Context&: M.getContext(), Str: ID->getString()), |
5550 | Op->getOperand(I: 2)}; |
5551 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5552 | Changed = true; |
5553 | }; |
5554 | |
5555 | if (ID->getString() == "Objective-C Image Info Version" ) |
5556 | HasObjCFlag = true; |
5557 | if (ID->getString() == "Objective-C Class Properties" ) |
5558 | HasClassProperties = true; |
5559 | // Upgrade PIC from Error/Max to Min. |
5560 | if (ID->getString() == "PIC Level" ) { |
5561 | if (auto *Behavior = |
5562 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) { |
5563 | uint64_t V = Behavior->getLimitedValue(); |
5564 | if (V == Module::Error || V == Module::Max) |
5565 | SetBehavior(Module::Min); |
5566 | } |
5567 | } |
5568 | // Upgrade "PIE Level" from Error to Max. |
5569 | if (ID->getString() == "PIE Level" ) |
5570 | if (auto *Behavior = |
5571 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) |
5572 | if (Behavior->getLimitedValue() == Module::Error) |
5573 | SetBehavior(Module::Max); |
5574 | |
5575 | // Upgrade branch protection and return address signing module flags. The |
5576 | // module flag behavior for these fields were Error and now they are Min. |
5577 | if (ID->getString() == "branch-target-enforcement" || |
5578 | ID->getString().starts_with(Prefix: "sign-return-address" )) { |
5579 | if (auto *Behavior = |
5580 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) { |
5581 | if (Behavior->getLimitedValue() == Module::Error) { |
5582 | Type *Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
5583 | Metadata *Ops[3] = { |
5584 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)), |
5585 | Op->getOperand(I: 1), Op->getOperand(I: 2)}; |
5586 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5587 | Changed = true; |
5588 | } |
5589 | } |
5590 | } |
5591 | |
5592 | // Upgrade Objective-C Image Info Section. Removed the whitespce in the |
5593 | // section name so that llvm-lto will not complain about mismatching |
5594 | // module flags that is functionally the same. |
5595 | if (ID->getString() == "Objective-C Image Info Section" ) { |
5596 | if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) { |
5597 | SmallVector<StringRef, 4> ValueComp; |
5598 | Value->getString().split(A&: ValueComp, Separator: " " ); |
5599 | if (ValueComp.size() != 1) { |
5600 | std::string NewValue; |
5601 | for (auto &S : ValueComp) |
5602 | NewValue += S.str(); |
5603 | Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1), |
5604 | MDString::get(Context&: M.getContext(), Str: NewValue)}; |
5605 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5606 | Changed = true; |
5607 | } |
5608 | } |
5609 | } |
5610 | |
5611 | // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value. |
5612 | // If the higher bits are set, it adds new module flag for swift info. |
5613 | if (ID->getString() == "Objective-C Garbage Collection" ) { |
5614 | auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2)); |
5615 | if (Md) { |
5616 | assert(Md->getValue() && "Expected non-empty metadata" ); |
5617 | auto Type = Md->getValue()->getType(); |
5618 | if (Type == Int8Ty) |
5619 | continue; |
5620 | unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue(); |
5621 | if ((Val & 0xff) != Val) { |
5622 | HasSwiftVersionFlag = true; |
5623 | SwiftABIVersion = (Val & 0xff00) >> 8; |
5624 | SwiftMajorVersion = (Val & 0xff000000) >> 24; |
5625 | SwiftMinorVersion = (Val & 0xff0000) >> 16; |
5626 | } |
5627 | Metadata *Ops[3] = { |
5628 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)), |
5629 | Op->getOperand(I: 1), |
5630 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))}; |
5631 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5632 | Changed = true; |
5633 | } |
5634 | } |
5635 | |
5636 | if (ID->getString() == "amdgpu_code_object_version" ) { |
5637 | Metadata *Ops[3] = { |
5638 | Op->getOperand(I: 0), |
5639 | MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version" ), |
5640 | Op->getOperand(I: 2)}; |
5641 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5642 | Changed = true; |
5643 | } |
5644 | } |
5645 | |
5646 | // "Objective-C Class Properties" is recently added for Objective-C. We |
5647 | // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module |
5648 | // flag of value 0, so we can correclty downgrade this flag when trying to |
5649 | // link an ObjC bitcode without this module flag with an ObjC bitcode with |
5650 | // this module flag. |
5651 | if (HasObjCFlag && !HasClassProperties) { |
5652 | M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties" , |
5653 | Val: (uint32_t)0); |
5654 | Changed = true; |
5655 | } |
5656 | |
5657 | if (HasSwiftVersionFlag) { |
5658 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version" , |
5659 | Val: SwiftABIVersion); |
5660 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version" , |
5661 | Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion)); |
5662 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version" , |
5663 | Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion)); |
5664 | Changed = true; |
5665 | } |
5666 | |
5667 | return Changed; |
5668 | } |
5669 | |
5670 | void llvm::UpgradeSectionAttributes(Module &M) { |
5671 | auto TrimSpaces = [](StringRef Section) -> std::string { |
5672 | SmallVector<StringRef, 5> Components; |
5673 | Section.split(A&: Components, Separator: ','); |
5674 | |
5675 | SmallString<32> Buffer; |
5676 | raw_svector_ostream OS(Buffer); |
5677 | |
5678 | for (auto Component : Components) |
5679 | OS << ',' << Component.trim(); |
5680 | |
5681 | return std::string(OS.str().substr(Start: 1)); |
5682 | }; |
5683 | |
5684 | for (auto &GV : M.globals()) { |
5685 | if (!GV.hasSection()) |
5686 | continue; |
5687 | |
5688 | StringRef Section = GV.getSection(); |
5689 | |
5690 | if (!Section.starts_with(Prefix: "__DATA, __objc_catlist" )) |
5691 | continue; |
5692 | |
5693 | // __DATA, __objc_catlist, regular, no_dead_strip |
5694 | // __DATA,__objc_catlist,regular,no_dead_strip |
5695 | GV.setSection(TrimSpaces(Section)); |
5696 | } |
5697 | } |
5698 | |
5699 | namespace { |
5700 | // Prior to LLVM 10.0, the strictfp attribute could be used on individual |
5701 | // callsites within a function that did not also have the strictfp attribute. |
5702 | // Since 10.0, if strict FP semantics are needed within a function, the |
5703 | // function must have the strictfp attribute and all calls within the function |
5704 | // must also have the strictfp attribute. This latter restriction is |
5705 | // necessary to prevent unwanted libcall simplification when a function is |
5706 | // being cloned (such as for inlining). |
5707 | // |
5708 | // The "dangling" strictfp attribute usage was only used to prevent constant |
5709 | // folding and other libcall simplification. The nobuiltin attribute on the |
5710 | // callsite has the same effect. |
5711 | struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> { |
5712 | StrictFPUpgradeVisitor() = default; |
5713 | |
5714 | void visitCallBase(CallBase &Call) { |
5715 | if (!Call.isStrictFP()) |
5716 | return; |
5717 | if (isa<ConstrainedFPIntrinsic>(Val: &Call)) |
5718 | return; |
5719 | // If we get here, the caller doesn't have the strictfp attribute |
5720 | // but this callsite does. Replace the strictfp attribute with nobuiltin. |
5721 | Call.removeFnAttr(Kind: Attribute::StrictFP); |
5722 | Call.addFnAttr(Kind: Attribute::NoBuiltin); |
5723 | } |
5724 | }; |
5725 | |
5726 | /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata |
5727 | struct AMDGPUUnsafeFPAtomicsUpgradeVisitor |
5728 | : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> { |
5729 | AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default; |
5730 | |
5731 | void visitAtomicRMWInst(AtomicRMWInst &RMW) { |
5732 | if (!RMW.isFloatingPointOperation()) |
5733 | return; |
5734 | |
5735 | MDNode *Empty = MDNode::get(Context&: RMW.getContext(), MDs: {}); |
5736 | RMW.setMetadata(Kind: "amdgpu.no.fine.grained.host.memory" , Node: Empty); |
5737 | RMW.setMetadata(Kind: "amdgpu.no.remote.memory.access" , Node: Empty); |
5738 | RMW.setMetadata(Kind: "amdgpu.ignore.denormal.mode" , Node: Empty); |
5739 | } |
5740 | }; |
5741 | } // namespace |
5742 | |
5743 | void llvm::UpgradeFunctionAttributes(Function &F) { |
5744 | // If a function definition doesn't have the strictfp attribute, |
5745 | // convert any callsite strictfp attributes to nobuiltin. |
5746 | if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) { |
5747 | StrictFPUpgradeVisitor SFPV; |
5748 | SFPV.visit(F); |
5749 | } |
5750 | |
5751 | // Remove all incompatibile attributes from function. |
5752 | F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible( |
5753 | Ty: F.getReturnType(), AS: F.getAttributes().getRetAttrs())); |
5754 | for (auto &Arg : F.args()) |
5755 | Arg.removeAttrs( |
5756 | AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType(), AS: Arg.getAttributes())); |
5757 | |
5758 | // Older versions of LLVM treated an "implicit-section-name" attribute |
5759 | // similarly to directly setting the section on a Function. |
5760 | if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name" ); |
5761 | A.isValid() && A.isStringAttribute()) { |
5762 | F.setSection(A.getValueAsString()); |
5763 | F.removeFnAttr(Kind: "implicit-section-name" ); |
5764 | } |
5765 | |
5766 | if (!F.empty()) { |
5767 | // For some reason this is called twice, and the first time is before any |
5768 | // instructions are loaded into the body. |
5769 | |
5770 | if (Attribute A = F.getFnAttribute(Kind: "amdgpu-unsafe-fp-atomics" ); |
5771 | A.isValid()) { |
5772 | |
5773 | if (A.getValueAsBool()) { |
5774 | AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor; |
5775 | Visitor.visit(F); |
5776 | } |
5777 | |
5778 | // We will leave behind dead attribute uses on external declarations, but |
5779 | // clang never added these to declarations anyway. |
5780 | F.removeFnAttr(Kind: "amdgpu-unsafe-fp-atomics" ); |
5781 | } |
5782 | } |
5783 | } |
5784 | |
5785 | static bool isOldLoopArgument(Metadata *MD) { |
5786 | auto *T = dyn_cast_or_null<MDTuple>(Val: MD); |
5787 | if (!T) |
5788 | return false; |
5789 | if (T->getNumOperands() < 1) |
5790 | return false; |
5791 | auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0)); |
5792 | if (!S) |
5793 | return false; |
5794 | return S->getString().starts_with(Prefix: "llvm.vectorizer." ); |
5795 | } |
5796 | |
5797 | static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { |
5798 | StringRef OldPrefix = "llvm.vectorizer." ; |
5799 | assert(OldTag.starts_with(OldPrefix) && "Expected old prefix" ); |
5800 | |
5801 | if (OldTag == "llvm.vectorizer.unroll" ) |
5802 | return MDString::get(Context&: C, Str: "llvm.loop.interleave.count" ); |
5803 | |
5804 | return MDString::get( |
5805 | Context&: C, Str: (Twine("llvm.loop.vectorize." ) + OldTag.drop_front(N: OldPrefix.size())) |
5806 | .str()); |
5807 | } |
5808 | |
5809 | static Metadata *upgradeLoopArgument(Metadata *MD) { |
5810 | auto *T = dyn_cast_or_null<MDTuple>(Val: MD); |
5811 | if (!T) |
5812 | return MD; |
5813 | if (T->getNumOperands() < 1) |
5814 | return MD; |
5815 | auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0)); |
5816 | if (!OldTag) |
5817 | return MD; |
5818 | if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer." )) |
5819 | return MD; |
5820 | |
5821 | // This has an old tag. Upgrade it. |
5822 | SmallVector<Metadata *, 8> Ops; |
5823 | Ops.reserve(N: T->getNumOperands()); |
5824 | Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString())); |
5825 | for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) |
5826 | Ops.push_back(Elt: T->getOperand(I)); |
5827 | |
5828 | return MDTuple::get(Context&: T->getContext(), MDs: Ops); |
5829 | } |
5830 | |
5831 | MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { |
5832 | auto *T = dyn_cast<MDTuple>(Val: &N); |
5833 | if (!T) |
5834 | return &N; |
5835 | |
5836 | if (none_of(Range: T->operands(), P: isOldLoopArgument)) |
5837 | return &N; |
5838 | |
5839 | SmallVector<Metadata *, 8> Ops; |
5840 | Ops.reserve(N: T->getNumOperands()); |
5841 | for (Metadata *MD : T->operands()) |
5842 | Ops.push_back(Elt: upgradeLoopArgument(MD)); |
5843 | |
5844 | return MDTuple::get(Context&: T->getContext(), MDs: Ops); |
5845 | } |
5846 | |
5847 | std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { |
5848 | Triple T(TT); |
5849 | // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting |
5850 | // the address space of globals to 1. This does not apply to SPIRV Logical. |
5851 | if (((T.isAMDGPU() && !T.isAMDGCN()) || |
5852 | (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) && |
5853 | !DL.contains(Other: "-G" ) && !DL.starts_with(Prefix: "G" )) { |
5854 | return DL.empty() ? std::string("G1" ) : (DL + "-G1" ).str(); |
5855 | } |
5856 | |
5857 | if (T.isLoongArch64() || T.isRISCV64()) { |
5858 | // Make i32 a native type for 64-bit LoongArch and RISC-V. |
5859 | auto I = DL.find(Str: "-n64-" ); |
5860 | if (I != StringRef::npos) |
5861 | return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str(); |
5862 | return DL.str(); |
5863 | } |
5864 | |
5865 | std::string Res = DL.str(); |
5866 | // AMDGCN data layout upgrades. |
5867 | if (T.isAMDGCN()) { |
5868 | // Define address spaces for constants. |
5869 | if (!DL.contains(Other: "-G" ) && !DL.starts_with(Prefix: "G" )) |
5870 | Res.append(s: Res.empty() ? "G1" : "-G1" ); |
5871 | |
5872 | // Add missing non-integral declarations. |
5873 | // This goes before adding new address spaces to prevent incoherent string |
5874 | // values. |
5875 | if (!DL.contains(Other: "-ni" ) && !DL.starts_with(Prefix: "ni" )) |
5876 | Res.append(s: "-ni:7:8:9" ); |
5877 | // Update ni:7 to ni:7:8:9. |
5878 | if (DL.ends_with(Suffix: "ni:7" )) |
5879 | Res.append(s: ":8:9" ); |
5880 | if (DL.ends_with(Suffix: "ni:7:8" )) |
5881 | Res.append(s: ":9" ); |
5882 | |
5883 | // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer |
5884 | // resources) An empty data layout has already been upgraded to G1 by now. |
5885 | if (!DL.contains(Other: "-p7" ) && !DL.starts_with(Prefix: "p7" )) |
5886 | Res.append(s: "-p7:160:256:256:32" ); |
5887 | if (!DL.contains(Other: "-p8" ) && !DL.starts_with(Prefix: "p8" )) |
5888 | Res.append(s: "-p8:128:128:128:48" ); |
5889 | constexpr StringRef OldP8("-p8:128:128-" ); |
5890 | if (DL.contains(Other: OldP8)) |
5891 | Res.replace(pos: Res.find(svt: OldP8), n1: OldP8.size(), s: "-p8:128:128:128:48-" ); |
5892 | if (!DL.contains(Other: "-p9" ) && !DL.starts_with(Prefix: "p9" )) |
5893 | Res.append(s: "-p9:192:256:256:32" ); |
5894 | |
5895 | return Res; |
5896 | } |
5897 | |
5898 | auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() { |
5899 | // If the datalayout matches the expected format, add pointer size address |
5900 | // spaces to the datalayout. |
5901 | StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64" }; |
5902 | if (!DL.contains(Other: AddrSpaces)) { |
5903 | SmallVector<StringRef, 4> Groups; |
5904 | Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$" ); |
5905 | if (R.match(String: Res, Matches: &Groups)) |
5906 | Res = (Groups[1] + AddrSpaces + Groups[3]).str(); |
5907 | } |
5908 | }; |
5909 | |
5910 | // AArch64 data layout upgrades. |
5911 | if (T.isAArch64()) { |
5912 | // Add "-Fn32" |
5913 | if (!DL.empty() && !DL.contains(Other: "-Fn32" )) |
5914 | Res.append(s: "-Fn32" ); |
5915 | AddPtr32Ptr64AddrSpaces(); |
5916 | return Res; |
5917 | } |
5918 | |
5919 | if (T.isSPARC() || (T.isMIPS64() && !DL.contains(Other: "m:m" )) || T.isPPC64() || |
5920 | T.isWasm()) { |
5921 | // Mips64 with o32 ABI did not add "-i128:128". |
5922 | // Add "-i128:128" |
5923 | std::string I64 = "-i64:64" ; |
5924 | std::string I128 = "-i128:128" ; |
5925 | if (!StringRef(Res).contains(Other: I128)) { |
5926 | size_t Pos = Res.find(str: I64); |
5927 | if (Pos != size_t(-1)) |
5928 | Res.insert(pos1: Pos + I64.size(), str: I128); |
5929 | } |
5930 | return Res; |
5931 | } |
5932 | |
5933 | if (!T.isX86()) |
5934 | return Res; |
5935 | |
5936 | AddPtr32Ptr64AddrSpaces(); |
5937 | |
5938 | // i128 values need to be 16-byte-aligned. LLVM already called into libgcc |
5939 | // for i128 operations prior to this being reflected in the data layout, and |
5940 | // clang mostly produced LLVM IR that already aligned i128 to 16 byte |
5941 | // boundaries, so although this is a breaking change, the upgrade is expected |
5942 | // to fix more IR than it breaks. |
5943 | // Intel MCU is an exception and uses 4-byte-alignment. |
5944 | if (!T.isOSIAMCU()) { |
5945 | std::string I128 = "-i128:128" ; |
5946 | if (StringRef Ref = Res; !Ref.contains(Other: I128)) { |
5947 | SmallVector<StringRef, 4> Groups; |
5948 | Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$" ); |
5949 | if (R.match(String: Res, Matches: &Groups)) |
5950 | Res = (Groups[1] + I128 + Groups[3]).str(); |
5951 | } |
5952 | } |
5953 | |
5954 | // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes. |
5955 | // Raising the alignment is safe because Clang did not produce f80 values in |
5956 | // the MSVC environment before this upgrade was added. |
5957 | if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) { |
5958 | StringRef Ref = Res; |
5959 | auto I = Ref.find(Str: "-f80:32-" ); |
5960 | if (I != StringRef::npos) |
5961 | Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str(); |
5962 | } |
5963 | |
5964 | return Res; |
5965 | } |
5966 | |
5967 | void llvm::UpgradeAttributes(AttrBuilder &B) { |
5968 | StringRef FramePointer; |
5969 | Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim" ); |
5970 | if (A.isValid()) { |
5971 | // The value can be "true" or "false". |
5972 | FramePointer = A.getValueAsString() == "true" ? "all" : "none" ; |
5973 | B.removeAttribute(A: "no-frame-pointer-elim" ); |
5974 | } |
5975 | if (B.contains(A: "no-frame-pointer-elim-non-leaf" )) { |
5976 | // The value is ignored. "no-frame-pointer-elim"="true" takes priority. |
5977 | if (FramePointer != "all" ) |
5978 | FramePointer = "non-leaf" ; |
5979 | B.removeAttribute(A: "no-frame-pointer-elim-non-leaf" ); |
5980 | } |
5981 | if (!FramePointer.empty()) |
5982 | B.addAttribute(A: "frame-pointer" , V: FramePointer); |
5983 | |
5984 | A = B.getAttribute(Kind: "null-pointer-is-valid" ); |
5985 | if (A.isValid()) { |
5986 | // The value can be "true" or "false". |
5987 | bool NullPointerIsValid = A.getValueAsString() == "true" ; |
5988 | B.removeAttribute(A: "null-pointer-is-valid" ); |
5989 | if (NullPointerIsValid) |
5990 | B.addAttribute(Val: Attribute::NullPointerIsValid); |
5991 | } |
5992 | } |
5993 | |
5994 | void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) { |
5995 | // clang.arc.attachedcall bundles are now required to have an operand. |
5996 | // If they don't, it's okay to drop them entirely: when there is an operand, |
5997 | // the "attachedcall" is meaningful and required, but without an operand, |
5998 | // it's just a marker NOP. Dropping it merely prevents an optimization. |
5999 | erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) { |
6000 | return OBD.getTag() == "clang.arc.attachedcall" && |
6001 | OBD.inputs().empty(); |
6002 | }); |
6003 | } |
6004 | |