1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/BinaryFormat/Dwarf.h"
21#include "llvm/IR/AttributeMask.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
26#include "llvm/IR/DebugInfoMetadata.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/Instruction.h"
33#include "llvm/IR/IntrinsicInst.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
42#include "llvm/IR/LLVMContext.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Metadata.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/Value.h"
47#include "llvm/IR/Verifier.h"
48#include "llvm/Support/AMDGPUAddrSpace.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/ErrorHandling.h"
51#include "llvm/Support/NVPTXAddrSpace.h"
52#include "llvm/Support/NVVMAttributes.h"
53#include "llvm/Support/Regex.h"
54#include "llvm/Support/TimeProfiler.h"
55#include "llvm/TargetParser/Triple.h"
56#include <cstdint>
57#include <cstring>
58#include <numeric>
59
60using namespace llvm;
61
62static cl::opt<bool>
63 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
64 cl::desc("Disable autoupgrade of debug info"));
65
66static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
67
68// Report a fatal error along with the
69// Call Instruction which caused the error
70[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
71 CallBase *CI) {
72 CI->print(O&: llvm::errs());
73 llvm::errs() << "\n";
74 reportFatalUsageError(reason);
75}
76
77// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
78// changed their type from v4f32 to v2i64.
79static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
80 Function *&NewFn) {
81 // Check whether this is an old version of the function, which received
82 // v4f32 arguments.
83 Type *Arg0Type = F->getFunctionType()->getParamType(i: 0);
84 if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4))
85 return false;
86
87 // Yes, it's old, replace it with new version.
88 rename(GV: F);
89 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
90 return true;
91}
92
93// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
94// arguments have changed their type from i32 to i8.
95static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
96 Function *&NewFn) {
97 // Check that the last argument is an i32.
98 Type *LastArgType = F->getFunctionType()->getParamType(
99 i: F->getFunctionType()->getNumParams() - 1);
100 if (!LastArgType->isIntegerTy(BitWidth: 32))
101 return false;
102
103 // Move this function aside and map down.
104 rename(GV: F);
105 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
106 return true;
107}
108
109// Upgrade the declaration of fp compare intrinsics that change return type
110// from scalar to vXi1 mask.
111static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
112 Function *&NewFn) {
113 // Check if the return type is a vector.
114 if (F->getReturnType()->isVectorTy())
115 return false;
116
117 rename(GV: F);
118 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
119 return true;
120}
121
122// Upgrade the declaration of multiply and add bytes intrinsics whose input
123// arguments' types have changed from vectors of i32 to vectors of i8
124static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID,
125 Function *&NewFn) {
126 // check if input argument type is a vector of i8
127 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
128 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
129 if (Arg1Type->isVectorTy() &&
130 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(BitWidth: 8) &&
131 Arg2Type->isVectorTy() &&
132 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(BitWidth: 8))
133 return false;
134
135 rename(GV: F);
136 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
137 return true;
138}
139
140// Upgrade the declaration of multipy and add words intrinsics whose input
141// arguments' types have changed to vectors of i32 to vectors of i16
142static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID,
143 Function *&NewFn) {
144 // check if input argument type is a vector of i16
145 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
146 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
147 if (Arg1Type->isVectorTy() &&
148 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(BitWidth: 16) &&
149 Arg2Type->isVectorTy() &&
150 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(BitWidth: 16))
151 return false;
152
153 rename(GV: F);
154 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
155 return true;
156}
157
158static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
159 Function *&NewFn) {
160 if (F->getReturnType()->getScalarType()->isBFloatTy())
161 return false;
162
163 rename(GV: F);
164 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
165 return true;
166}
167
168static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
169 Function *&NewFn) {
170 if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy())
171 return false;
172
173 rename(GV: F);
174 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
175 return true;
176}
177
178static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
179 // All of the intrinsics matches below should be marked with which llvm
180 // version started autoupgrading them. At some point in the future we would
181 // like to use this information to remove upgrade code for some older
182 // intrinsics. It is currently undecided how we will determine that future
183 // point.
184 if (Name.consume_front(Prefix: "avx."))
185 return (Name.starts_with(Prefix: "blend.p") || // Added in 3.7
186 Name == "cvt.ps2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.pd.256" || // Added in 3.9
188 Name == "cvtdq2.ps.256" || // Added in 7.0
189 Name.starts_with(Prefix: "movnt.") || // Added in 3.2
190 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
191 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
192 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 3.5
193 Name.starts_with(Prefix: "vbroadcastf128") || // Added in 4.0
194 Name.starts_with(Prefix: "vextractf128.") || // Added in 3.7
195 Name.starts_with(Prefix: "vinsertf128.") || // Added in 3.7
196 Name.starts_with(Prefix: "vperm2f128.") || // Added in 6.0
197 Name.starts_with(Prefix: "vpermil.")); // Added in 3.1
198
199 if (Name.consume_front(Prefix: "avx2."))
200 return (Name == "movntdqa" || // Added in 5.0
201 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
202 Name.starts_with(Prefix: "padds.") || // Added in 8.0
203 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
204 Name.starts_with(Prefix: "pblendd.") || // Added in 3.7
205 Name == "pblendw" || // Added in 3.7
206 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.8
207 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
208 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
209 Name.starts_with(Prefix: "pmax") || // Added in 3.9
210 Name.starts_with(Prefix: "pmin") || // Added in 3.9
211 Name.starts_with(Prefix: "pmovsx") || // Added in 3.9
212 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
213 Name == "pmul.dq" || // Added in 7.0
214 Name == "pmulu.dq" || // Added in 7.0
215 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
216 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
217 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
218 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
219 Name.starts_with(Prefix: "vbroadcast") || // Added in 3.8
220 Name == "vbroadcasti128" || // Added in 3.7
221 Name == "vextracti128" || // Added in 3.7
222 Name == "vinserti128" || // Added in 3.7
223 Name == "vperm2i128"); // Added in 6.0
224
225 if (Name.consume_front(Prefix: "avx512.")) {
226 if (Name.consume_front(Prefix: "mask."))
227 // 'avx512.mask.*'
228 return (Name.starts_with(Prefix: "add.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with(Prefix: "and.") || // Added in 3.9
230 Name.starts_with(Prefix: "andn.") || // Added in 3.9
231 Name.starts_with(Prefix: "broadcast.s") || // Added in 3.9
232 Name.starts_with(Prefix: "broadcastf32x4.") || // Added in 6.0
233 Name.starts_with(Prefix: "broadcastf32x8.") || // Added in 6.0
234 Name.starts_with(Prefix: "broadcastf64x2.") || // Added in 6.0
235 Name.starts_with(Prefix: "broadcastf64x4.") || // Added in 6.0
236 Name.starts_with(Prefix: "broadcasti32x4.") || // Added in 6.0
237 Name.starts_with(Prefix: "broadcasti32x8.") || // Added in 6.0
238 Name.starts_with(Prefix: "broadcasti64x2.") || // Added in 6.0
239 Name.starts_with(Prefix: "broadcasti64x4.") || // Added in 6.0
240 Name.starts_with(Prefix: "cmp.b") || // Added in 5.0
241 Name.starts_with(Prefix: "cmp.d") || // Added in 5.0
242 Name.starts_with(Prefix: "cmp.q") || // Added in 5.0
243 Name.starts_with(Prefix: "cmp.w") || // Added in 5.0
244 Name.starts_with(Prefix: "compress.b") || // Added in 9.0
245 Name.starts_with(Prefix: "compress.d") || // Added in 9.0
246 Name.starts_with(Prefix: "compress.p") || // Added in 9.0
247 Name.starts_with(Prefix: "compress.q") || // Added in 9.0
248 Name.starts_with(Prefix: "compress.store.") || // Added in 7.0
249 Name.starts_with(Prefix: "compress.w") || // Added in 9.0
250 Name.starts_with(Prefix: "conflict.") || // Added in 9.0
251 Name.starts_with(Prefix: "cvtdq2pd.") || // Added in 4.0
252 Name.starts_with(Prefix: "cvtdq2ps.") || // Added in 7.0 updated 9.0
253 Name == "cvtpd2dq.256" || // Added in 7.0
254 Name == "cvtpd2ps.256" || // Added in 7.0
255 Name == "cvtps2pd.128" || // Added in 7.0
256 Name == "cvtps2pd.256" || // Added in 7.0
257 Name.starts_with(Prefix: "cvtqq2pd.") || // Added in 7.0 updated 9.0
258 Name == "cvtqq2ps.256" || // Added in 9.0
259 Name == "cvtqq2ps.512" || // Added in 9.0
260 Name == "cvttpd2dq.256" || // Added in 7.0
261 Name == "cvttps2dq.128" || // Added in 7.0
262 Name == "cvttps2dq.256" || // Added in 7.0
263 Name.starts_with(Prefix: "cvtudq2pd.") || // Added in 4.0
264 Name.starts_with(Prefix: "cvtudq2ps.") || // Added in 7.0 updated 9.0
265 Name.starts_with(Prefix: "cvtuqq2pd.") || // Added in 7.0 updated 9.0
266 Name == "cvtuqq2ps.256" || // Added in 9.0
267 Name == "cvtuqq2ps.512" || // Added in 9.0
268 Name.starts_with(Prefix: "dbpsadbw.") || // Added in 7.0
269 Name.starts_with(Prefix: "div.p") || // Added in 7.0. 128/256 in 4.0
270 Name.starts_with(Prefix: "expand.b") || // Added in 9.0
271 Name.starts_with(Prefix: "expand.d") || // Added in 9.0
272 Name.starts_with(Prefix: "expand.load.") || // Added in 7.0
273 Name.starts_with(Prefix: "expand.p") || // Added in 9.0
274 Name.starts_with(Prefix: "expand.q") || // Added in 9.0
275 Name.starts_with(Prefix: "expand.w") || // Added in 9.0
276 Name.starts_with(Prefix: "fpclass.p") || // Added in 7.0
277 Name.starts_with(Prefix: "insert") || // Added in 4.0
278 Name.starts_with(Prefix: "load.") || // Added in 3.9
279 Name.starts_with(Prefix: "loadu.") || // Added in 3.9
280 Name.starts_with(Prefix: "lzcnt.") || // Added in 5.0
281 Name.starts_with(Prefix: "max.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with(Prefix: "min.p") || // Added in 7.0. 128/256 in 5.0
283 Name.starts_with(Prefix: "movddup") || // Added in 3.9
284 Name.starts_with(Prefix: "move.s") || // Added in 4.0
285 Name.starts_with(Prefix: "movshdup") || // Added in 3.9
286 Name.starts_with(Prefix: "movsldup") || // Added in 3.9
287 Name.starts_with(Prefix: "mul.p") || // Added in 7.0. 128/256 in 4.0
288 Name.starts_with(Prefix: "or.") || // Added in 3.9
289 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
290 Name.starts_with(Prefix: "packssdw.") || // Added in 5.0
291 Name.starts_with(Prefix: "packsswb.") || // Added in 5.0
292 Name.starts_with(Prefix: "packusdw.") || // Added in 5.0
293 Name.starts_with(Prefix: "packuswb.") || // Added in 5.0
294 Name.starts_with(Prefix: "padd.") || // Added in 4.0
295 Name.starts_with(Prefix: "padds.") || // Added in 8.0
296 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
297 Name.starts_with(Prefix: "palignr.") || // Added in 3.9
298 Name.starts_with(Prefix: "pand.") || // Added in 3.9
299 Name.starts_with(Prefix: "pandn.") || // Added in 3.9
300 Name.starts_with(Prefix: "pavg") || // Added in 6.0
301 Name.starts_with(Prefix: "pbroadcast") || // Added in 6.0
302 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.9
303 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.9
304 Name.starts_with(Prefix: "perm.df.") || // Added in 3.9
305 Name.starts_with(Prefix: "perm.di.") || // Added in 3.9
306 Name.starts_with(Prefix: "permvar.") || // Added in 7.0
307 Name.starts_with(Prefix: "pmaddubs.w.") || // Added in 7.0
308 Name.starts_with(Prefix: "pmaddw.d.") || // Added in 7.0
309 Name.starts_with(Prefix: "pmax") || // Added in 4.0
310 Name.starts_with(Prefix: "pmin") || // Added in 4.0
311 Name == "pmov.qd.256" || // Added in 9.0
312 Name == "pmov.qd.512" || // Added in 9.0
313 Name == "pmov.wb.256" || // Added in 9.0
314 Name == "pmov.wb.512" || // Added in 9.0
315 Name.starts_with(Prefix: "pmovsx") || // Added in 4.0
316 Name.starts_with(Prefix: "pmovzx") || // Added in 4.0
317 Name.starts_with(Prefix: "pmul.dq.") || // Added in 4.0
318 Name.starts_with(Prefix: "pmul.hr.sw.") || // Added in 7.0
319 Name.starts_with(Prefix: "pmulh.w.") || // Added in 7.0
320 Name.starts_with(Prefix: "pmulhu.w.") || // Added in 7.0
321 Name.starts_with(Prefix: "pmull.") || // Added in 4.0
322 Name.starts_with(Prefix: "pmultishift.qb.") || // Added in 8.0
323 Name.starts_with(Prefix: "pmulu.dq.") || // Added in 4.0
324 Name.starts_with(Prefix: "por.") || // Added in 3.9
325 Name.starts_with(Prefix: "prol.") || // Added in 8.0
326 Name.starts_with(Prefix: "prolv.") || // Added in 8.0
327 Name.starts_with(Prefix: "pror.") || // Added in 8.0
328 Name.starts_with(Prefix: "prorv.") || // Added in 8.0
329 Name.starts_with(Prefix: "pshuf.b.") || // Added in 4.0
330 Name.starts_with(Prefix: "pshuf.d.") || // Added in 3.9
331 Name.starts_with(Prefix: "pshufh.w.") || // Added in 3.9
332 Name.starts_with(Prefix: "pshufl.w.") || // Added in 3.9
333 Name.starts_with(Prefix: "psll.d") || // Added in 4.0
334 Name.starts_with(Prefix: "psll.q") || // Added in 4.0
335 Name.starts_with(Prefix: "psll.w") || // Added in 4.0
336 Name.starts_with(Prefix: "pslli") || // Added in 4.0
337 Name.starts_with(Prefix: "psllv") || // Added in 4.0
338 Name.starts_with(Prefix: "psra.d") || // Added in 4.0
339 Name.starts_with(Prefix: "psra.q") || // Added in 4.0
340 Name.starts_with(Prefix: "psra.w") || // Added in 4.0
341 Name.starts_with(Prefix: "psrai") || // Added in 4.0
342 Name.starts_with(Prefix: "psrav") || // Added in 4.0
343 Name.starts_with(Prefix: "psrl.d") || // Added in 4.0
344 Name.starts_with(Prefix: "psrl.q") || // Added in 4.0
345 Name.starts_with(Prefix: "psrl.w") || // Added in 4.0
346 Name.starts_with(Prefix: "psrli") || // Added in 4.0
347 Name.starts_with(Prefix: "psrlv") || // Added in 4.0
348 Name.starts_with(Prefix: "psub.") || // Added in 4.0
349 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
350 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
351 Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
352 Name.starts_with(Prefix: "punpckh") || // Added in 3.9
353 Name.starts_with(Prefix: "punpckl") || // Added in 3.9
354 Name.starts_with(Prefix: "pxor.") || // Added in 3.9
355 Name.starts_with(Prefix: "shuf.f") || // Added in 6.0
356 Name.starts_with(Prefix: "shuf.i") || // Added in 6.0
357 Name.starts_with(Prefix: "shuf.p") || // Added in 4.0
358 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
359 Name.starts_with(Prefix: "store.b.") || // Added in 3.9
360 Name.starts_with(Prefix: "store.d.") || // Added in 3.9
361 Name.starts_with(Prefix: "store.p") || // Added in 3.9
362 Name.starts_with(Prefix: "store.q.") || // Added in 3.9
363 Name.starts_with(Prefix: "store.w.") || // Added in 3.9
364 Name == "store.ss" || // Added in 7.0
365 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
366 Name.starts_with(Prefix: "sub.p") || // Added in 7.0. 128/256 in 4.0
367 Name.starts_with(Prefix: "ucmp.") || // Added in 5.0
368 Name.starts_with(Prefix: "unpckh.") || // Added in 3.9
369 Name.starts_with(Prefix: "unpckl.") || // Added in 3.9
370 Name.starts_with(Prefix: "valign.") || // Added in 4.0
371 Name == "vcvtph2ps.128" || // Added in 11.0
372 Name == "vcvtph2ps.256" || // Added in 11.0
373 Name.starts_with(Prefix: "vextract") || // Added in 4.0
374 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
375 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
376 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
377 Name.starts_with(Prefix: "vfnmsub.") || // Added in 7.0
378 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
379 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
380 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
381 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
382 Name.starts_with(Prefix: "vpermi2var.") || // Added in 7.0
383 Name.starts_with(Prefix: "vpermil.p") || // Added in 3.9
384 Name.starts_with(Prefix: "vpermilvar.") || // Added in 4.0
385 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
386 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
387 Name.starts_with(Prefix: "vpshld.") || // Added in 7.0
388 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
389 Name.starts_with(Prefix: "vpshrd.") || // Added in 7.0
390 Name.starts_with(Prefix: "vpshrdv.") || // Added in 8.0
391 Name.starts_with(Prefix: "vpshufbitqmb.") || // Added in 8.0
392 Name.starts_with(Prefix: "xor.")); // Added in 3.9
393
394 if (Name.consume_front(Prefix: "mask3."))
395 // 'avx512.mask3.*'
396 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
397 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
398 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
399 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
400 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
401
402 if (Name.consume_front(Prefix: "maskz."))
403 // 'avx512.maskz.*'
404 return (Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
405 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
406 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
407 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
408 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
409 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
410 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
411 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
412 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
413 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
414 Name.starts_with(Prefix: "vpshrdv.")); // Added in 8.0
415
416 // 'avx512.*'
417 return (Name == "movntdqa" || // Added in 5.0
418 Name == "pmul.dq.512" || // Added in 7.0
419 Name == "pmulu.dq.512" || // Added in 7.0
420 Name.starts_with(Prefix: "broadcastm") || // Added in 6.0
421 Name.starts_with(Prefix: "cmp.p") || // Added in 12.0
422 Name.starts_with(Prefix: "cvtb2mask.") || // Added in 7.0
423 Name.starts_with(Prefix: "cvtd2mask.") || // Added in 7.0
424 Name.starts_with(Prefix: "cvtmask2") || // Added in 5.0
425 Name.starts_with(Prefix: "cvtq2mask.") || // Added in 7.0
426 Name == "cvtusi2sd" || // Added in 7.0
427 Name.starts_with(Prefix: "cvtw2mask.") || // Added in 7.0
428 Name == "kand.w" || // Added in 7.0
429 Name == "kandn.w" || // Added in 7.0
430 Name == "knot.w" || // Added in 7.0
431 Name == "kor.w" || // Added in 7.0
432 Name == "kortestc.w" || // Added in 7.0
433 Name == "kortestz.w" || // Added in 7.0
434 Name.starts_with(Prefix: "kunpck") || // added in 6.0
435 Name == "kxnor.w" || // Added in 7.0
436 Name == "kxor.w" || // Added in 7.0
437 Name.starts_with(Prefix: "padds.") || // Added in 8.0
438 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.9
439 Name.starts_with(Prefix: "prol") || // Added in 8.0
440 Name.starts_with(Prefix: "pror") || // Added in 8.0
441 Name.starts_with(Prefix: "psll.dq") || // Added in 3.9
442 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.9
443 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
444 Name.starts_with(Prefix: "ptestm") || // Added in 6.0
445 Name.starts_with(Prefix: "ptestnm") || // Added in 6.0
446 Name.starts_with(Prefix: "storent.") || // Added in 3.9
447 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 7.0
448 Name.starts_with(Prefix: "vpshld.") || // Added in 8.0
449 Name.starts_with(Prefix: "vpshrd.")); // Added in 8.0
450 }
451
452 if (Name.consume_front(Prefix: "fma."))
453 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
454 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
455 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
456 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
457 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
458
459 if (Name.consume_front(Prefix: "fma4."))
460 return Name.starts_with(Prefix: "vfmadd.s"); // Added in 7.0
461
462 if (Name.consume_front(Prefix: "sse."))
463 return (Name == "add.ss" || // Added in 4.0
464 Name == "cvtsi2ss" || // Added in 7.0
465 Name == "cvtsi642ss" || // Added in 7.0
466 Name == "div.ss" || // Added in 4.0
467 Name == "mul.ss" || // Added in 4.0
468 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
469 Name == "sqrt.ss" || // Added in 7.0
470 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
471 Name == "sub.ss"); // Added in 4.0
472
473 if (Name.consume_front(Prefix: "sse2."))
474 return (Name == "add.sd" || // Added in 4.0
475 Name == "cvtdq2pd" || // Added in 3.9
476 Name == "cvtdq2ps" || // Added in 7.0
477 Name == "cvtps2pd" || // Added in 3.9
478 Name == "cvtsi2sd" || // Added in 7.0
479 Name == "cvtsi642sd" || // Added in 7.0
480 Name == "cvtss2sd" || // Added in 7.0
481 Name == "div.sd" || // Added in 4.0
482 Name == "mul.sd" || // Added in 4.0
483 Name.starts_with(Prefix: "padds.") || // Added in 8.0
484 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
485 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
486 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
487 Name == "pmaxs.w" || // Added in 3.9
488 Name == "pmaxu.b" || // Added in 3.9
489 Name == "pmins.w" || // Added in 3.9
490 Name == "pminu.b" || // Added in 3.9
491 Name == "pmulu.dq" || // Added in 7.0
492 Name.starts_with(Prefix: "pshuf") || // Added in 3.9
493 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
494 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
495 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
496 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
497 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
498 Name == "sqrt.sd" || // Added in 7.0
499 Name == "storel.dq" || // Added in 3.9
500 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
501 Name == "sub.sd"); // Added in 4.0
502
503 if (Name.consume_front(Prefix: "sse41."))
504 return (Name.starts_with(Prefix: "blendp") || // Added in 3.7
505 Name == "movntdqa" || // Added in 5.0
506 Name == "pblendw" || // Added in 3.7
507 Name == "pmaxsb" || // Added in 3.9
508 Name == "pmaxsd" || // Added in 3.9
509 Name == "pmaxud" || // Added in 3.9
510 Name == "pmaxuw" || // Added in 3.9
511 Name == "pminsb" || // Added in 3.9
512 Name == "pminsd" || // Added in 3.9
513 Name == "pminud" || // Added in 3.9
514 Name == "pminuw" || // Added in 3.9
515 Name.starts_with(Prefix: "pmovsx") || // Added in 3.8
516 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
517 Name == "pmuldq"); // Added in 7.0
518
519 if (Name.consume_front(Prefix: "sse42."))
520 return Name == "crc32.64.8"; // Added in 3.4
521
522 if (Name.consume_front(Prefix: "sse4a."))
523 return Name.starts_with(Prefix: "movnt."); // Added in 3.9
524
525 if (Name.consume_front(Prefix: "ssse3."))
526 return (Name == "pabs.b.128" || // Added in 6.0
527 Name == "pabs.d.128" || // Added in 6.0
528 Name == "pabs.w.128"); // Added in 6.0
529
530 if (Name.consume_front(Prefix: "xop."))
531 return (Name == "vpcmov" || // Added in 3.8
532 Name == "vpcmov.256" || // Added in 5.0
533 Name.starts_with(Prefix: "vpcom") || // Added in 3.2, Updated in 9.0
534 Name.starts_with(Prefix: "vprot")); // Added in 8.0
535
536 if (Name.consume_front(Prefix: "bmi."))
537 return (Name.starts_with(Prefix: "pdep.") || // Added in 23.0
538 Name.starts_with(Prefix: "pext.")); // Added in 23.0
539
540 return (Name == "addcarry.u32" || // Added in 8.0
541 Name == "addcarry.u64" || // Added in 8.0
542 Name == "addcarryx.u32" || // Added in 8.0
543 Name == "addcarryx.u64" || // Added in 8.0
544 Name == "subborrow.u32" || // Added in 8.0
545 Name == "subborrow.u64" || // Added in 8.0
546 Name.starts_with(Prefix: "vcvtph2ps.")); // Added in 11.0
547}
548
549static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
550 Function *&NewFn) {
551 // Only handle intrinsics that start with "x86.".
552 if (!Name.consume_front(Prefix: "x86."))
553 return false;
554
555 if (shouldUpgradeX86Intrinsic(F, Name)) {
556 NewFn = nullptr;
557 return true;
558 }
559
560 if (Name == "rdtscp") { // Added in 8.0
561 // If this intrinsic has 0 operands, it's the new version.
562 if (F->getFunctionType()->getNumParams() == 0)
563 return false;
564
565 rename(GV: F);
566 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
567 id: Intrinsic::x86_rdtscp);
568 return true;
569 }
570
571 Intrinsic::ID ID;
572
573 // SSE4.1 ptest functions may have an old signature.
574 if (Name.consume_front(Prefix: "sse41.ptest")) { // Added in 3.2
575 ID = StringSwitch<Intrinsic::ID>(Name)
576 .Case(S: "c", Value: Intrinsic::x86_sse41_ptestc)
577 .Case(S: "z", Value: Intrinsic::x86_sse41_ptestz)
578 .Case(S: "nzc", Value: Intrinsic::x86_sse41_ptestnzc)
579 .Default(Value: Intrinsic::not_intrinsic);
580 if (ID != Intrinsic::not_intrinsic)
581 return upgradePTESTIntrinsic(F, IID: ID, NewFn);
582
583 return false;
584 }
585
586 // Several blend and other instructions with masks used the wrong number of
587 // bits.
588
589 // Added in 3.6
590 ID = StringSwitch<Intrinsic::ID>(Name)
591 .Case(S: "sse41.insertps", Value: Intrinsic::x86_sse41_insertps)
592 .Case(S: "sse41.dppd", Value: Intrinsic::x86_sse41_dppd)
593 .Case(S: "sse41.dpps", Value: Intrinsic::x86_sse41_dpps)
594 .Case(S: "sse41.mpsadbw", Value: Intrinsic::x86_sse41_mpsadbw)
595 .Case(S: "avx.dp.ps.256", Value: Intrinsic::x86_avx_dp_ps_256)
596 .Case(S: "avx2.mpsadbw", Value: Intrinsic::x86_avx2_mpsadbw)
597 .Default(Value: Intrinsic::not_intrinsic);
598 if (ID != Intrinsic::not_intrinsic)
599 return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn);
600
601 if (Name.consume_front(Prefix: "avx512.")) {
602 if (Name.consume_front(Prefix: "mask.cmp.")) {
603 // Added in 7.0
604 ID = StringSwitch<Intrinsic::ID>(Name)
605 .Case(S: "pd.128", Value: Intrinsic::x86_avx512_mask_cmp_pd_128)
606 .Case(S: "pd.256", Value: Intrinsic::x86_avx512_mask_cmp_pd_256)
607 .Case(S: "pd.512", Value: Intrinsic::x86_avx512_mask_cmp_pd_512)
608 .Case(S: "ps.128", Value: Intrinsic::x86_avx512_mask_cmp_ps_128)
609 .Case(S: "ps.256", Value: Intrinsic::x86_avx512_mask_cmp_ps_256)
610 .Case(S: "ps.512", Value: Intrinsic::x86_avx512_mask_cmp_ps_512)
611 .Default(Value: Intrinsic::not_intrinsic);
612 if (ID != Intrinsic::not_intrinsic)
613 return upgradeX86MaskedFPCompare(F, IID: ID, NewFn);
614 } else if (Name.starts_with(Prefix: "vpdpbusd.") ||
615 Name.starts_with(Prefix: "vpdpbusds.")) {
616 // Added in 21.1
617 ID = StringSwitch<Intrinsic::ID>(Name)
618 .Case(S: "vpdpbusd.128", Value: Intrinsic::x86_avx512_vpdpbusd_128)
619 .Case(S: "vpdpbusd.256", Value: Intrinsic::x86_avx512_vpdpbusd_256)
620 .Case(S: "vpdpbusd.512", Value: Intrinsic::x86_avx512_vpdpbusd_512)
621 .Case(S: "vpdpbusds.128", Value: Intrinsic::x86_avx512_vpdpbusds_128)
622 .Case(S: "vpdpbusds.256", Value: Intrinsic::x86_avx512_vpdpbusds_256)
623 .Case(S: "vpdpbusds.512", Value: Intrinsic::x86_avx512_vpdpbusds_512)
624 .Default(Value: Intrinsic::not_intrinsic);
625 if (ID != Intrinsic::not_intrinsic)
626 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
627 } else if (Name.starts_with(Prefix: "vpdpwssd.") ||
628 Name.starts_with(Prefix: "vpdpwssds.")) {
629 // Added in 21.1
630 ID = StringSwitch<Intrinsic::ID>(Name)
631 .Case(S: "vpdpwssd.128", Value: Intrinsic::x86_avx512_vpdpwssd_128)
632 .Case(S: "vpdpwssd.256", Value: Intrinsic::x86_avx512_vpdpwssd_256)
633 .Case(S: "vpdpwssd.512", Value: Intrinsic::x86_avx512_vpdpwssd_512)
634 .Case(S: "vpdpwssds.128", Value: Intrinsic::x86_avx512_vpdpwssds_128)
635 .Case(S: "vpdpwssds.256", Value: Intrinsic::x86_avx512_vpdpwssds_256)
636 .Case(S: "vpdpwssds.512", Value: Intrinsic::x86_avx512_vpdpwssds_512)
637 .Default(Value: Intrinsic::not_intrinsic);
638 if (ID != Intrinsic::not_intrinsic)
639 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
640 }
641 return false; // No other 'x86.avx512.*'.
642 }
643
644 if (Name.consume_front(Prefix: "avx2.")) {
645 if (Name.consume_front(Prefix: "vpdpb")) {
646 // Added in 21.1
647 ID = StringSwitch<Intrinsic::ID>(Name)
648 .Case(S: "ssd.128", Value: Intrinsic::x86_avx2_vpdpbssd_128)
649 .Case(S: "ssd.256", Value: Intrinsic::x86_avx2_vpdpbssd_256)
650 .Case(S: "ssds.128", Value: Intrinsic::x86_avx2_vpdpbssds_128)
651 .Case(S: "ssds.256", Value: Intrinsic::x86_avx2_vpdpbssds_256)
652 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpbsud_128)
653 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpbsud_256)
654 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpbsuds_128)
655 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpbsuds_256)
656 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpbuud_128)
657 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpbuud_256)
658 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpbuuds_128)
659 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpbuuds_256)
660 .Default(Value: Intrinsic::not_intrinsic);
661 if (ID != Intrinsic::not_intrinsic)
662 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
663 } else if (Name.consume_front(Prefix: "vpdpw")) {
664 // Added in 21.1
665 ID = StringSwitch<Intrinsic::ID>(Name)
666 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpwsud_128)
667 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpwsud_256)
668 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpwsuds_128)
669 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpwsuds_256)
670 .Case(S: "usd.128", Value: Intrinsic::x86_avx2_vpdpwusd_128)
671 .Case(S: "usd.256", Value: Intrinsic::x86_avx2_vpdpwusd_256)
672 .Case(S: "usds.128", Value: Intrinsic::x86_avx2_vpdpwusds_128)
673 .Case(S: "usds.256", Value: Intrinsic::x86_avx2_vpdpwusds_256)
674 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpwuud_128)
675 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpwuud_256)
676 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpwuuds_128)
677 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpwuuds_256)
678 .Default(Value: Intrinsic::not_intrinsic);
679 if (ID != Intrinsic::not_intrinsic)
680 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
681 }
682 return false; // No other 'x86.avx2.*'
683 }
684
685 if (Name.consume_front(Prefix: "avx10.")) {
686 if (Name.consume_front(Prefix: "vpdpb")) {
687 // Added in 21.1
688 ID = StringSwitch<Intrinsic::ID>(Name)
689 .Case(S: "ssd.512", Value: Intrinsic::x86_avx10_vpdpbssd_512)
690 .Case(S: "ssds.512", Value: Intrinsic::x86_avx10_vpdpbssds_512)
691 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpbsud_512)
692 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpbsuds_512)
693 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpbuud_512)
694 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpbuuds_512)
695 .Default(Value: Intrinsic::not_intrinsic);
696 if (ID != Intrinsic::not_intrinsic)
697 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
698 } else if (Name.consume_front(Prefix: "vpdpw")) {
699 ID = StringSwitch<Intrinsic::ID>(Name)
700 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpwsud_512)
701 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpwsuds_512)
702 .Case(S: "usd.512", Value: Intrinsic::x86_avx10_vpdpwusd_512)
703 .Case(S: "usds.512", Value: Intrinsic::x86_avx10_vpdpwusds_512)
704 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpwuud_512)
705 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpwuuds_512)
706 .Default(Value: Intrinsic::not_intrinsic);
707 if (ID != Intrinsic::not_intrinsic)
708 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
709 }
710 return false; // No other 'x86.avx10.*'
711 }
712
713 if (Name.consume_front(Prefix: "avx512bf16.")) {
714 // Added in 9.0
715 ID = StringSwitch<Intrinsic::ID>(Name)
716 .Case(S: "cvtne2ps2bf16.128",
717 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
718 .Case(S: "cvtne2ps2bf16.256",
719 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
720 .Case(S: "cvtne2ps2bf16.512",
721 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
722 .Case(S: "mask.cvtneps2bf16.128",
723 Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
724 .Case(S: "cvtneps2bf16.256",
725 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
726 .Case(S: "cvtneps2bf16.512",
727 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
728 .Default(Value: Intrinsic::not_intrinsic);
729 if (ID != Intrinsic::not_intrinsic)
730 return upgradeX86BF16Intrinsic(F, IID: ID, NewFn);
731
732 // Added in 9.0
733 ID = StringSwitch<Intrinsic::ID>(Name)
734 .Case(S: "dpbf16ps.128", Value: Intrinsic::x86_avx512bf16_dpbf16ps_128)
735 .Case(S: "dpbf16ps.256", Value: Intrinsic::x86_avx512bf16_dpbf16ps_256)
736 .Case(S: "dpbf16ps.512", Value: Intrinsic::x86_avx512bf16_dpbf16ps_512)
737 .Default(Value: Intrinsic::not_intrinsic);
738 if (ID != Intrinsic::not_intrinsic)
739 return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn);
740 return false; // No other 'x86.avx512bf16.*'.
741 }
742
743 if (Name.consume_front(Prefix: "xop.")) {
744 Intrinsic::ID ID = Intrinsic::not_intrinsic;
745 if (Name.starts_with(Prefix: "vpermil2")) { // Added in 3.9
746 // Upgrade any XOP PERMIL2 index operand still using a float/double
747 // vector.
748 auto Idx = F->getFunctionType()->getParamType(i: 2);
749 if (Idx->isFPOrFPVectorTy()) {
750 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
751 unsigned EltSize = Idx->getScalarSizeInBits();
752 if (EltSize == 64 && IdxSize == 128)
753 ID = Intrinsic::x86_xop_vpermil2pd;
754 else if (EltSize == 32 && IdxSize == 128)
755 ID = Intrinsic::x86_xop_vpermil2ps;
756 else if (EltSize == 64 && IdxSize == 256)
757 ID = Intrinsic::x86_xop_vpermil2pd_256;
758 else
759 ID = Intrinsic::x86_xop_vpermil2ps_256;
760 }
761 } else if (F->arg_size() == 2)
762 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
763 ID = StringSwitch<Intrinsic::ID>(Name)
764 .Case(S: "vfrcz.ss", Value: Intrinsic::x86_xop_vfrcz_ss)
765 .Case(S: "vfrcz.sd", Value: Intrinsic::x86_xop_vfrcz_sd)
766 .Default(Value: Intrinsic::not_intrinsic);
767
768 if (ID != Intrinsic::not_intrinsic) {
769 rename(GV: F);
770 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
771 return true;
772 }
773 return false; // No other 'x86.xop.*'
774 }
775
776 if (Name == "seh.recoverfp") {
777 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
778 id: Intrinsic::eh_recoverfp);
779 return true;
780 }
781
782 return false;
783}
784
785// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
786// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
787static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
788 StringRef Name,
789 Function *&NewFn) {
790 if (Name.starts_with(Prefix: "rbit")) {
791 // '(arm|aarch64).rbit'.
792 NewFn = Intrinsic::getOrInsertDeclaration(
793 M: F->getParent(), id: Intrinsic::bitreverse, OverloadTys: F->arg_begin()->getType());
794 return true;
795 }
796
797 if (Name == "thread.pointer") {
798 // '(arm|aarch64).thread.pointer'.
799 NewFn = Intrinsic::getOrInsertDeclaration(
800 M: F->getParent(), id: Intrinsic::thread_pointer, OverloadTys: F->getReturnType());
801 return true;
802 }
803
804 bool Neon = Name.consume_front(Prefix: "neon.");
805 if (Neon) {
806 // '(arm|aarch64).neon.*'.
807 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
808 // v16i8 respectively.
809 if (Name.consume_front(Prefix: "bfdot.")) {
810 // (arm|aarch64).neon.bfdot.*'.
811 Intrinsic::ID ID =
812 StringSwitch<Intrinsic::ID>(Name)
813 .Cases(CaseStrings: {"v2f32.v8i8", "v4f32.v16i8"},
814 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
815 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
816 .Default(Value: Intrinsic::not_intrinsic);
817 if (ID != Intrinsic::not_intrinsic) {
818 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
819 assert((OperandWidth == 64 || OperandWidth == 128) &&
820 "Unexpected operand width");
821 LLVMContext &Ctx = F->getParent()->getContext();
822 std::array<Type *, 2> Tys{
823 ._M_elems: {F->getReturnType(),
824 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}};
825 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, OverloadTys: Tys);
826 return true;
827 }
828 return false; // No other '(arm|aarch64).neon.bfdot.*'.
829 }
830
831 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
832 // anymore and accept v8bf16 instead of v16i8.
833 if (Name.consume_front(Prefix: "bfm")) {
834 // (arm|aarch64).neon.bfm*'.
835 if (Name.consume_back(Suffix: ".v4f32.v16i8")) {
836 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
837 Intrinsic::ID ID =
838 StringSwitch<Intrinsic::ID>(Name)
839 .Case(S: "mla",
840 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
841 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
842 .Case(S: "lalb",
843 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
844 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
845 .Case(S: "lalt",
846 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
847 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
848 .Default(Value: Intrinsic::not_intrinsic);
849 if (ID != Intrinsic::not_intrinsic) {
850 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
851 return true;
852 }
853 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
854 }
855 return false; // No other '(arm|aarch64).neon.bfm*.
856 }
857 // Continue on to Aarch64 Neon or Arm Neon.
858 }
859 // Continue on to Arm or Aarch64.
860
861 if (IsArm) {
862 // 'arm.*'.
863 if (Neon) {
864 // 'arm.neon.*'.
865 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
866 .StartsWith(S: "vclz.", Value: Intrinsic::ctlz)
867 .StartsWith(S: "vcnt.", Value: Intrinsic::ctpop)
868 .StartsWith(S: "vqadds.", Value: Intrinsic::sadd_sat)
869 .StartsWith(S: "vqaddu.", Value: Intrinsic::uadd_sat)
870 .StartsWith(S: "vqsubs.", Value: Intrinsic::ssub_sat)
871 .StartsWith(S: "vqsubu.", Value: Intrinsic::usub_sat)
872 .StartsWith(S: "vrinta.", Value: Intrinsic::round)
873 .StartsWith(S: "vrintn.", Value: Intrinsic::roundeven)
874 .StartsWith(S: "vrintm.", Value: Intrinsic::floor)
875 .StartsWith(S: "vrintp.", Value: Intrinsic::ceil)
876 .StartsWith(S: "vrintx.", Value: Intrinsic::rint)
877 .StartsWith(S: "vrintz.", Value: Intrinsic::trunc)
878 .Default(Value: Intrinsic::not_intrinsic);
879 if (ID != Intrinsic::not_intrinsic) {
880 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
881 OverloadTys: F->arg_begin()->getType());
882 return true;
883 }
884
885 if (Name.consume_front(Prefix: "vst")) {
886 // 'arm.neon.vst*'.
887 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
888 SmallVector<StringRef, 2> Groups;
889 if (vstRegex.match(String: Name, Matches: &Groups)) {
890 static const Intrinsic::ID StoreInts[] = {
891 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
892 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
893
894 static const Intrinsic::ID StoreLaneInts[] = {
895 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
896 Intrinsic::arm_neon_vst4lane};
897
898 auto fArgs = F->getFunctionType()->params();
899 Type *Tys[] = {fArgs[0], fArgs[1]};
900 if (Groups[1].size() == 1)
901 NewFn = Intrinsic::getOrInsertDeclaration(
902 M: F->getParent(), id: StoreInts[fArgs.size() - 3], OverloadTys: Tys);
903 else
904 NewFn = Intrinsic::getOrInsertDeclaration(
905 M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], OverloadTys: Tys);
906 return true;
907 }
908 return false; // No other 'arm.neon.vst*'.
909 }
910
911 return false; // No other 'arm.neon.*'.
912 }
913
914 if (Name.consume_front(Prefix: "mve.")) {
915 // 'arm.mve.*'.
916 if (Name == "vctp64") {
917 if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) {
918 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
919 // the function and deal with it below in UpgradeIntrinsicCall.
920 rename(GV: F);
921 return true;
922 }
923 return false; // Not 'arm.mve.vctp64'.
924 }
925
926 if (Name.starts_with(Prefix: "vrintn.v")) {
927 NewFn = Intrinsic::getOrInsertDeclaration(
928 M: F->getParent(), id: Intrinsic::roundeven, OverloadTys: F->arg_begin()->getType());
929 return true;
930 }
931
932 // These too are changed to accept a v2i1 instead of the old v4i1.
933 if (Name.consume_back(Suffix: ".v4i1")) {
934 // 'arm.mve.*.v4i1'.
935 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32"))
936 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
937 return Name == "mull.int" || Name == "vqdmull";
938
939 if (Name.consume_back(Suffix: ".v2i64")) {
940 // 'arm.mve.*.v2i64.v4i1'
941 bool IsGather = Name.consume_front(Prefix: "vldr.gather.");
942 if (IsGather || Name.consume_front(Prefix: "vstr.scatter.")) {
943 if (Name.consume_front(Prefix: "base.")) {
944 // Optional 'wb.' prefix.
945 Name.consume_front(Prefix: "wb.");
946 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
947 // predicated.v2i64.v2i64.v4i1'.
948 return Name == "predicated.v2i64";
949 }
950
951 if (Name.consume_front(Prefix: "offset.predicated."))
952 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
953 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
954
955 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
956 return false;
957 }
958
959 return false; // No other 'arm.mve.*.v2i64.v4i1'.
960 }
961 return false; // No other 'arm.mve.*.v4i1'.
962 }
963 return false; // No other 'arm.mve.*'.
964 }
965
966 if (Name.consume_front(Prefix: "cde.vcx")) {
967 // 'arm.cde.vcx*'.
968 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1"))
969 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
970 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
971 Name == "3q" || Name == "3qa";
972
973 return false; // No other 'arm.cde.vcx*'.
974 }
975 } else {
976 // 'aarch64.*'.
977 if (Neon) {
978 // 'aarch64.neon.*'.
979 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
980 .StartsWith(S: "frintn", Value: Intrinsic::roundeven)
981 .StartsWith(S: "rbit", Value: Intrinsic::bitreverse)
982 .Default(Value: Intrinsic::not_intrinsic);
983 if (ID != Intrinsic::not_intrinsic) {
984 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
985 OverloadTys: F->arg_begin()->getType());
986 return true;
987 }
988
989 if (Name.starts_with(Prefix: "addp")) {
990 // 'aarch64.neon.addp*'.
991 if (F->arg_size() != 2)
992 return false; // Invalid IR.
993 VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType());
994 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
995 NewFn = Intrinsic::getOrInsertDeclaration(
996 M: F->getParent(), id: Intrinsic::aarch64_neon_faddp, OverloadTys: Ty);
997 return true;
998 }
999 }
1000
1001 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
1002 if (Name.starts_with(Prefix: "bfcvt")) {
1003 NewFn = nullptr;
1004 return true;
1005 }
1006
1007 // vcvtfp2hf and vcvthf2fp -> fpext and fptrunc
1008 if (Name == "vcvtfp2hf" || Name == "vcvthf2fp") {
1009 NewFn = nullptr;
1010 return true;
1011 }
1012
1013 return false; // No other 'aarch64.neon.*'.
1014 }
1015 if (Name.consume_front(Prefix: "sve.")) {
1016 // 'aarch64.sve.*'.
1017 if (Name.consume_front(Prefix: "bf")) {
1018 if (Name == "mmla") {
1019 Type *Tys[] = {F->getReturnType(),
1020 std::next(x: F->arg_begin())->getType()};
1021 NewFn = Intrinsic::getOrInsertDeclaration(
1022 M: F->getParent(), id: Intrinsic::aarch64_sve_fmmla, OverloadTys: Tys);
1023 return true;
1024 }
1025 if (Name.consume_back(Suffix: ".lane")) {
1026 // 'aarch64.sve.bf*.lane'.
1027 Intrinsic::ID ID =
1028 StringSwitch<Intrinsic::ID>(Name)
1029 .Case(S: "dot", Value: Intrinsic::aarch64_sve_bfdot_lane_v2)
1030 .Case(S: "mlalb", Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1031 .Case(S: "mlalt", Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1032 .Default(Value: Intrinsic::not_intrinsic);
1033 if (ID != Intrinsic::not_intrinsic) {
1034 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1035 return true;
1036 }
1037 return false; // No other 'aarch64.sve.bf*.lane'.
1038 }
1039 return false; // No other 'aarch64.sve.bf*'.
1040 }
1041
1042 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1043 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1044 NewFn = nullptr;
1045 return true;
1046 }
1047
1048 if (Name.consume_front(Prefix: "addqv")) {
1049 // 'aarch64.sve.addqv'.
1050 if (!F->getReturnType()->isFPOrFPVectorTy())
1051 return false;
1052
1053 auto Args = F->getFunctionType()->params();
1054 Type *Tys[] = {F->getReturnType(), Args[1]};
1055 NewFn = Intrinsic::getOrInsertDeclaration(
1056 M: F->getParent(), id: Intrinsic::aarch64_sve_faddqv, OverloadTys: Tys);
1057 return true;
1058 }
1059
1060 if (Name.consume_front(Prefix: "ld")) {
1061 // 'aarch64.sve.ld*'.
1062 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1063 if (LdRegex.match(String: Name)) {
1064 Type *ScalarTy =
1065 cast<VectorType>(Val: F->getReturnType())->getElementType();
1066 ElementCount EC =
1067 cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount();
1068 assert(F->arg_size() == 2 &&
1069 "Expected 2 arguments for ld* intrinsic.");
1070 Type *PtrTy = F->getArg(i: 1)->getType();
1071 Type *Ty = VectorType::get(ElementType: ScalarTy, EC);
1072 static const Intrinsic::ID LoadIDs[] = {
1073 Intrinsic::aarch64_sve_ld2_sret,
1074 Intrinsic::aarch64_sve_ld3_sret,
1075 Intrinsic::aarch64_sve_ld4_sret,
1076 };
1077 NewFn = Intrinsic::getOrInsertDeclaration(
1078 M: F->getParent(), id: LoadIDs[Name[0] - '2'], OverloadTys: {Ty, PtrTy});
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.ld*'.
1082 }
1083
1084 if (Name.consume_front(Prefix: "tuple.")) {
1085 // 'aarch64.sve.tuple.*'.
1086 if (Name.starts_with(Prefix: "get")) {
1087 // 'aarch64.sve.tuple.get*'.
1088 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1089 NewFn = Intrinsic::getOrInsertDeclaration(
1090 M: F->getParent(), id: Intrinsic::vector_extract, OverloadTys: Tys);
1091 return true;
1092 }
1093
1094 if (Name.starts_with(Prefix: "set")) {
1095 // 'aarch64.sve.tuple.set*'.
1096 auto Args = F->getFunctionType()->params();
1097 Type *Tys[] = {Args[0], Args[2], Args[1]};
1098 NewFn = Intrinsic::getOrInsertDeclaration(
1099 M: F->getParent(), id: Intrinsic::vector_insert, OverloadTys: Tys);
1100 return true;
1101 }
1102
1103 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1104 if (CreateTupleRegex.match(String: Name)) {
1105 // 'aarch64.sve.tuple.create*'.
1106 auto Args = F->getFunctionType()->params();
1107 Type *Tys[] = {F->getReturnType(), Args[1]};
1108 NewFn = Intrinsic::getOrInsertDeclaration(
1109 M: F->getParent(), id: Intrinsic::vector_insert, OverloadTys: Tys);
1110 return true;
1111 }
1112 return false; // No other 'aarch64.sve.tuple.*'.
1113 }
1114
1115 if (Name.starts_with(Prefix: "rev.nxv")) {
1116 // 'aarch64.sve.rev.<Ty>'
1117 NewFn = Intrinsic::getOrInsertDeclaration(
1118 M: F->getParent(), id: Intrinsic::vector_reverse, OverloadTys: F->getReturnType());
1119 return true;
1120 }
1121
1122 return false; // No other 'aarch64.sve.*'.
1123 }
1124 if (Name.consume_front(Prefix: "sme.")) {
1125 // 'aarch64.sme.*'.
1126 if (Name.consume_front(Prefix: "ftmopa.")) {
1127 // The FP8 FTMOPA intrinsics were split out from the non-FP8 FTMOPA
1128 // intrinsics to model their FPMR dependency.
1129 Intrinsic::ID ID =
1130 StringSwitch<Intrinsic::ID>(Name)
1131 .Case(S: "za16.nxv16i8", Value: Intrinsic::aarch64_sme_fp8_ftmopa_za16)
1132 .Case(S: "za32.nxv16i8", Value: Intrinsic::aarch64_sme_fp8_ftmopa_za32)
1133 .Default(Value: Intrinsic::not_intrinsic);
1134 if (ID != Intrinsic::not_intrinsic) {
1135 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1136 return true;
1137 }
1138 return false; // No other 'aarch64.sme.ftmopa.*'.
1139 }
1140
1141 return false; // No other 'aarch64.sme.*'.
1142 }
1143 }
1144 return false; // No other 'arm.*', 'aarch64.*'.
1145}
1146
1147static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
1148 StringRef Name) {
1149 if (Name.consume_front(Prefix: "cp.async.bulk.tensor.g2s.")) {
1150 Intrinsic::ID ID =
1151 StringSwitch<Intrinsic::ID>(Name)
1152 .Case(S: "im2col.3d",
1153 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1154 .Case(S: "im2col.4d",
1155 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1156 .Case(S: "im2col.5d",
1157 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1158 .Case(S: "tile.1d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1159 .Case(S: "tile.2d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1160 .Case(S: "tile.3d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1161 .Case(S: "tile.4d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1162 .Case(S: "tile.5d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1163 .Default(Value: Intrinsic::not_intrinsic);
1164
1165 if (ID == Intrinsic::not_intrinsic)
1166 return ID;
1167
1168 // These intrinsics may need upgrade for two reasons:
1169 // (1) When the address-space of the first argument is shared[AS=3]
1170 // (and we upgrade it to use shared_cluster address-space[AS=7])
1171 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1172 NVPTXAS::ADDRESS_SPACE_SHARED)
1173 return ID;
1174
1175 // (2) When there are only two boolean flag arguments at the end:
1176 //
1177 // The last three parameters of the older version of these
1178 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1179 //
1180 // The newer version reads as:
1181 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1182 //
1183 // So, when the type of the [N-3]rd argument is "not i1", then
1184 // it is the older version and we need to upgrade.
1185 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1186 Type *ArgType = F->getFunctionType()->getParamType(i: FlagStartIndex);
1187 if (!ArgType->isIntegerTy(BitWidth: 1))
1188 return ID;
1189 }
1190
1191 return Intrinsic::not_intrinsic;
1192}
1193
1194static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1195 StringRef Name) {
1196 if (Name.consume_front(Prefix: "mapa.shared.cluster"))
1197 if (F->getReturnType()->getPointerAddressSpace() ==
1198 NVPTXAS::ADDRESS_SPACE_SHARED)
1199 return Intrinsic::nvvm_mapa_shared_cluster;
1200
1201 if (Name.consume_front(Prefix: "cp.async.bulk.")) {
1202 Intrinsic::ID ID =
1203 StringSwitch<Intrinsic::ID>(Name)
1204 .Case(S: "global.to.shared.cluster",
1205 Value: Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1206 .Case(S: "shared.cta.to.cluster",
1207 Value: Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1208 .Default(Value: Intrinsic::not_intrinsic);
1209
1210 if (ID != Intrinsic::not_intrinsic)
1211 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1212 NVPTXAS::ADDRESS_SPACE_SHARED)
1213 return ID;
1214 }
1215
1216 return Intrinsic::not_intrinsic;
1217}
1218
1219static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1220 if (Name.consume_front(Prefix: "fma.rn."))
1221 return StringSwitch<Intrinsic::ID>(Name)
1222 .Case(S: "bf16", Value: Intrinsic::nvvm_fma_rn_bf16)
1223 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fma_rn_bf16x2)
1224 .Case(S: "relu.bf16", Value: Intrinsic::nvvm_fma_rn_relu_bf16)
1225 .Case(S: "relu.bf16x2", Value: Intrinsic::nvvm_fma_rn_relu_bf16x2)
1226 .Default(Value: Intrinsic::not_intrinsic);
1227
1228 if (Name.consume_front(Prefix: "fmax."))
1229 return StringSwitch<Intrinsic::ID>(Name)
1230 .Case(S: "bf16", Value: Intrinsic::nvvm_fmax_bf16)
1231 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmax_bf16x2)
1232 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmax_ftz_bf16)
1233 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_bf16x2)
1234 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16)
1235 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1236 .Case(S: "ftz.nan.xorsign.abs.bf16",
1237 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1238 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1239 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1240 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1241 .Case(S: "ftz.xorsign.abs.bf16x2",
1242 Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1243 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmax_nan_bf16)
1244 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmax_nan_bf16x2)
1245 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1246 .Case(S: "nan.xorsign.abs.bf16x2",
1247 Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1248 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1249 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1250 .Default(Value: Intrinsic::not_intrinsic);
1251
1252 if (Name.consume_front(Prefix: "fmin."))
1253 return StringSwitch<Intrinsic::ID>(Name)
1254 .Case(S: "bf16", Value: Intrinsic::nvvm_fmin_bf16)
1255 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmin_bf16x2)
1256 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmin_ftz_bf16)
1257 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_bf16x2)
1258 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16)
1259 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1260 .Case(S: "ftz.nan.xorsign.abs.bf16",
1261 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1262 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1263 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1264 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1265 .Case(S: "ftz.xorsign.abs.bf16x2",
1266 Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1267 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmin_nan_bf16)
1268 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmin_nan_bf16x2)
1269 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1270 .Case(S: "nan.xorsign.abs.bf16x2",
1271 Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1272 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1273 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1274 .Default(Value: Intrinsic::not_intrinsic);
1275
1276 if (Name.consume_front(Prefix: "neg."))
1277 return StringSwitch<Intrinsic::ID>(Name)
1278 .Case(S: "bf16", Value: Intrinsic::nvvm_neg_bf16)
1279 .Case(S: "bf16x2", Value: Intrinsic::nvvm_neg_bf16x2)
1280 .Default(Value: Intrinsic::not_intrinsic);
1281
1282 return Intrinsic::not_intrinsic;
1283}
1284
1285static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1286 return Name.consume_front(Prefix: "local") || Name.consume_front(Prefix: "shared") ||
1287 Name.consume_front(Prefix: "global") || Name.consume_front(Prefix: "constant") ||
1288 Name.consume_front(Prefix: "param");
1289}
1290
1291static bool convertIntrinsicValidType(StringRef Name,
1292 const FunctionType *FuncTy) {
1293 Type *HalfTy = Type::getHalfTy(C&: FuncTy->getContext());
1294 if (Name.starts_with(Prefix: "to.fp16")) {
1295 return CastInst::castIsValid(op: Instruction::FPTrunc, SrcTy: FuncTy->getParamType(i: 0),
1296 DstTy: HalfTy) &&
1297 CastInst::castIsValid(op: Instruction::BitCast, SrcTy: HalfTy,
1298 DstTy: FuncTy->getReturnType());
1299 }
1300
1301 if (Name.starts_with(Prefix: "from.fp16")) {
1302 return CastInst::castIsValid(op: Instruction::BitCast, SrcTy: FuncTy->getParamType(i: 0),
1303 DstTy: HalfTy) &&
1304 CastInst::castIsValid(op: Instruction::FPExt, SrcTy: HalfTy,
1305 DstTy: FuncTy->getReturnType());
1306 }
1307
1308 return false;
1309}
1310
1311static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1312 bool CanUpgradeDebugIntrinsicsToRecords) {
1313 assert(F && "Illegal to upgrade a non-existent Function.");
1314
1315 StringRef Name = F->getName();
1316
1317 // Quickly eliminate it, if it's not a candidate.
1318 if (!Name.consume_front(Prefix: "llvm.") || Name.empty())
1319 return false;
1320
1321 switch (Name[0]) {
1322 default: break;
1323 case 'a': {
1324 bool IsArm = Name.consume_front(Prefix: "arm.");
1325 if (IsArm || Name.consume_front(Prefix: "aarch64.")) {
1326 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1327 return true;
1328 break;
1329 }
1330
1331 if (Name.consume_front(Prefix: "amdgcn.")) {
1332 if (Name == "alignbit") {
1333 // Target specific intrinsic became redundant
1334 NewFn = Intrinsic::getOrInsertDeclaration(
1335 M: F->getParent(), id: Intrinsic::fshr, OverloadTys: {F->getReturnType()});
1336 return true;
1337 }
1338
1339 if (Name.consume_front(Prefix: "atomic.")) {
1340 if (Name.starts_with(Prefix: "inc") || Name.starts_with(Prefix: "dec") ||
1341 Name.starts_with(Prefix: "cond.sub") || Name.starts_with(Prefix: "csub")) {
1342 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1343 // and usub_sat so there's no new declaration.
1344 NewFn = nullptr;
1345 return true;
1346 }
1347 break; // No other 'amdgcn.atomic.*'
1348 }
1349
1350 switch (F->getIntrinsicID()) {
1351 default:
1352 break;
1353 // Legacy wmma iu intrinsics without the optional clamp operand.
1354 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1355 if (F->arg_size() == 7) {
1356 NewFn = nullptr;
1357 return true;
1358 }
1359 break;
1360 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1361 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1362 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1363 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1364 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1365 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1366 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1367 if (F->arg_size() == 8) {
1368 NewFn = nullptr;
1369 return true;
1370 }
1371 break;
1372 }
1373
1374 if (Name.consume_front(Prefix: "ds.") || Name.consume_front(Prefix: "global.atomic.") ||
1375 Name.consume_front(Prefix: "flat.atomic.")) {
1376 if (Name.starts_with(Prefix: "fadd") ||
1377 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1378 (Name.starts_with(Prefix: "fmin") && !Name.starts_with(Prefix: "fmin.num")) ||
1379 (Name.starts_with(Prefix: "fmax") && !Name.starts_with(Prefix: "fmax.num"))) {
1380 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1381 // declaration.
1382 NewFn = nullptr;
1383 return true;
1384 }
1385 }
1386
1387 if (Name.starts_with(Prefix: "ldexp.")) {
1388 // Target specific intrinsic became redundant
1389 NewFn = Intrinsic::getOrInsertDeclaration(
1390 M: F->getParent(), id: Intrinsic::ldexp,
1391 OverloadTys: {F->getReturnType(), F->getArg(i: 1)->getType()});
1392 return true;
1393 }
1394 break; // No other 'amdgcn.*'
1395 }
1396
1397 break;
1398 }
1399 case 'c': {
1400 if (F->arg_size() == 1) {
1401 if (Name.consume_front(Prefix: "convert.")) {
1402 if (convertIntrinsicValidType(Name, FuncTy: F->getFunctionType())) {
1403 NewFn = nullptr;
1404 return true;
1405 }
1406 }
1407
1408 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1409 .StartsWith(S: "ctlz.", Value: Intrinsic::ctlz)
1410 .StartsWith(S: "cttz.", Value: Intrinsic::cttz)
1411 .Default(Value: Intrinsic::not_intrinsic);
1412 if (ID != Intrinsic::not_intrinsic) {
1413 rename(GV: F);
1414 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1415 OverloadTys: F->arg_begin()->getType());
1416 return true;
1417 }
1418 }
1419
1420 if (F->arg_size() == 2 && Name == "coro.end") {
1421 rename(GV: F);
1422 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1423 id: Intrinsic::coro_end);
1424 return true;
1425 }
1426
1427 break;
1428 }
1429 case 'd':
1430 if (Name.consume_front(Prefix: "dbg.")) {
1431 // Mark debug intrinsics for upgrade to new debug format.
1432 if (CanUpgradeDebugIntrinsicsToRecords) {
1433 if (Name == "addr" || Name == "value" || Name == "assign" ||
1434 Name == "declare" || Name == "label") {
1435 // There's no function to replace these with.
1436 NewFn = nullptr;
1437 // But we do want these to get upgraded.
1438 return true;
1439 }
1440 }
1441 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1442 // converted to DbgVariableRecords later.
1443 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1444 rename(GV: F);
1445 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1446 id: Intrinsic::dbg_value);
1447 return true;
1448 }
1449 break; // No other 'dbg.*'.
1450 }
1451 break;
1452 case 'e':
1453 if (Name.consume_front(Prefix: "experimental.vector.")) {
1454 Intrinsic::ID ID =
1455 StringSwitch<Intrinsic::ID>(Name)
1456 // Skip over extract.last.active, otherwise it will be 'upgraded'
1457 // to a regular vector extract which is a different operation.
1458 .StartsWith(S: "extract.last.active.", Value: Intrinsic::not_intrinsic)
1459 .StartsWith(S: "extract.", Value: Intrinsic::vector_extract)
1460 .StartsWith(S: "insert.", Value: Intrinsic::vector_insert)
1461 .StartsWith(S: "reverse.", Value: Intrinsic::vector_reverse)
1462 .StartsWith(S: "interleave2.", Value: Intrinsic::vector_interleave2)
1463 .StartsWith(S: "deinterleave2.", Value: Intrinsic::vector_deinterleave2)
1464 .StartsWith(S: "partial.reduce.add",
1465 Value: Intrinsic::vector_partial_reduce_add)
1466 .Default(Value: Intrinsic::not_intrinsic);
1467 if (ID != Intrinsic::not_intrinsic) {
1468 const auto *FT = F->getFunctionType();
1469 SmallVector<Type *, 2> Tys;
1470 if (ID == Intrinsic::vector_extract ||
1471 ID == Intrinsic::vector_interleave2)
1472 // Extracting overloads the return type.
1473 Tys.push_back(Elt: FT->getReturnType());
1474 if (ID != Intrinsic::vector_interleave2)
1475 Tys.push_back(Elt: FT->getParamType(i: 0));
1476 if (ID == Intrinsic::vector_insert ||
1477 ID == Intrinsic::vector_partial_reduce_add)
1478 // Inserting overloads the inserted type.
1479 Tys.push_back(Elt: FT->getParamType(i: 1));
1480 rename(GV: F);
1481 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, OverloadTys: Tys);
1482 return true;
1483 }
1484
1485 if (Name.consume_front(Prefix: "reduce.")) {
1486 SmallVector<StringRef, 2> Groups;
1487 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1488 if (R.match(String: Name, Matches: &Groups))
1489 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1490 .Case(S: "add", Value: Intrinsic::vector_reduce_add)
1491 .Case(S: "mul", Value: Intrinsic::vector_reduce_mul)
1492 .Case(S: "and", Value: Intrinsic::vector_reduce_and)
1493 .Case(S: "or", Value: Intrinsic::vector_reduce_or)
1494 .Case(S: "xor", Value: Intrinsic::vector_reduce_xor)
1495 .Case(S: "smax", Value: Intrinsic::vector_reduce_smax)
1496 .Case(S: "smin", Value: Intrinsic::vector_reduce_smin)
1497 .Case(S: "umax", Value: Intrinsic::vector_reduce_umax)
1498 .Case(S: "umin", Value: Intrinsic::vector_reduce_umin)
1499 .Case(S: "fmax", Value: Intrinsic::vector_reduce_fmax)
1500 .Case(S: "fmin", Value: Intrinsic::vector_reduce_fmin)
1501 .Default(Value: Intrinsic::not_intrinsic);
1502
1503 bool V2 = false;
1504 if (ID == Intrinsic::not_intrinsic) {
1505 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1506 Groups.clear();
1507 V2 = true;
1508 if (R2.match(String: Name, Matches: &Groups))
1509 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1510 .Case(S: "fadd", Value: Intrinsic::vector_reduce_fadd)
1511 .Case(S: "fmul", Value: Intrinsic::vector_reduce_fmul)
1512 .Default(Value: Intrinsic::not_intrinsic);
1513 }
1514 if (ID != Intrinsic::not_intrinsic) {
1515 rename(GV: F);
1516 auto Args = F->getFunctionType()->params();
1517 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1518 OverloadTys: {Args[V2 ? 1 : 0]});
1519 return true;
1520 }
1521 break; // No other 'expermental.vector.reduce.*'.
1522 }
1523
1524 if (Name.consume_front(Prefix: "splice"))
1525 return true;
1526 break; // No other 'experimental.vector.*'.
1527 }
1528 if (Name.consume_front(Prefix: "experimental.stepvector.")) {
1529 Intrinsic::ID ID = Intrinsic::stepvector;
1530 rename(GV: F);
1531 NewFn = Intrinsic::getOrInsertDeclaration(
1532 M: F->getParent(), id: ID, OverloadTys: F->getFunctionType()->getReturnType());
1533 return true;
1534 }
1535 break; // No other 'e*'.
1536 case 'f':
1537 if (Name.starts_with(Prefix: "flt.rounds")) {
1538 rename(GV: F);
1539 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1540 id: Intrinsic::get_rounding);
1541 return true;
1542 }
1543 break;
1544 case 'i':
1545 if (Name.starts_with(Prefix: "invariant.group.barrier")) {
1546 // Rename invariant.group.barrier to launder.invariant.group
1547 auto Args = F->getFunctionType()->params();
1548 Type* ObjectPtr[1] = {Args[0]};
1549 rename(GV: F);
1550 NewFn = Intrinsic::getOrInsertDeclaration(
1551 M: F->getParent(), id: Intrinsic::launder_invariant_group, OverloadTys: ObjectPtr);
1552 return true;
1553 }
1554 break;
1555 case 'l':
1556 if ((Name.starts_with(Prefix: "lifetime.start") ||
1557 Name.starts_with(Prefix: "lifetime.end")) &&
1558 F->arg_size() == 2) {
1559 Intrinsic::ID IID = Name.starts_with(Prefix: "lifetime.start")
1560 ? Intrinsic::lifetime_start
1561 : Intrinsic::lifetime_end;
1562 rename(GV: F);
1563 // Old 2 argument form of these intrinsics have [Size, Ptr] as arguments.
1564 // Use the Ptr argument to create new declaration.
1565 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1566 OverloadTys: F->getArg(i: 1)->getType());
1567 return true;
1568 }
1569 break;
1570 case 'm': {
1571 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1572 // alignment parameter to embedding the alignment as an attribute of
1573 // the pointer args.
1574 if (unsigned ID = StringSwitch<unsigned>(Name)
1575 .StartsWith(S: "memcpy.", Value: Intrinsic::memcpy)
1576 .StartsWith(S: "memmove.", Value: Intrinsic::memmove)
1577 .Default(Value: 0)) {
1578 if (F->arg_size() == 5) {
1579 rename(GV: F);
1580 // Get the types of dest, src, and len
1581 ArrayRef<Type *> ParamTypes =
1582 F->getFunctionType()->params().slice(N: 0, M: 3);
1583 NewFn =
1584 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, OverloadTys: ParamTypes);
1585 return true;
1586 }
1587 }
1588 if (Name.starts_with(Prefix: "memset.") && F->arg_size() == 5) {
1589 rename(GV: F);
1590 // Get the types of dest, and len
1591 const auto *FT = F->getFunctionType();
1592 Type *ParamTypes[2] = {
1593 FT->getParamType(i: 0), // Dest
1594 FT->getParamType(i: 2) // len
1595 };
1596 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1597 id: Intrinsic::memset, OverloadTys: ParamTypes);
1598 return true;
1599 }
1600
1601 unsigned MaskedID =
1602 StringSwitch<unsigned>(Name)
1603 .StartsWith(S: "masked.load", Value: Intrinsic::masked_load)
1604 .StartsWith(S: "masked.gather", Value: Intrinsic::masked_gather)
1605 .StartsWith(S: "masked.store", Value: Intrinsic::masked_store)
1606 .StartsWith(S: "masked.scatter", Value: Intrinsic::masked_scatter)
1607 .Default(Value: 0);
1608 if (MaskedID && F->arg_size() == 4) {
1609 rename(GV: F);
1610 if (MaskedID == Intrinsic::masked_load ||
1611 MaskedID == Intrinsic::masked_gather) {
1612 NewFn = Intrinsic::getOrInsertDeclaration(
1613 M: F->getParent(), id: MaskedID,
1614 OverloadTys: {F->getReturnType(), F->getArg(i: 0)->getType()});
1615 return true;
1616 }
1617 NewFn = Intrinsic::getOrInsertDeclaration(
1618 M: F->getParent(), id: MaskedID,
1619 OverloadTys: {F->getArg(i: 0)->getType(), F->getArg(i: 1)->getType()});
1620 return true;
1621 }
1622 break;
1623 }
1624 case 'n': {
1625 if (Name.consume_front(Prefix: "nvvm.")) {
1626 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1627 if (F->arg_size() == 1) {
1628 Intrinsic::ID IID =
1629 StringSwitch<Intrinsic::ID>(Name)
1630 .Cases(CaseStrings: {"brev32", "brev64"}, Value: Intrinsic::bitreverse)
1631 .Case(S: "clz.i", Value: Intrinsic::ctlz)
1632 .Case(S: "popc.i", Value: Intrinsic::ctpop)
1633 .Default(Value: Intrinsic::not_intrinsic);
1634 if (IID != Intrinsic::not_intrinsic) {
1635 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1636 OverloadTys: {F->getReturnType()});
1637 return true;
1638 }
1639 } else if (F->arg_size() == 2) {
1640 Intrinsic::ID IID =
1641 StringSwitch<Intrinsic::ID>(Name)
1642 .Cases(CaseStrings: {"max.s", "max.i", "max.ll"}, Value: Intrinsic::smax)
1643 .Cases(CaseStrings: {"min.s", "min.i", "min.ll"}, Value: Intrinsic::smin)
1644 .Cases(CaseStrings: {"max.us", "max.ui", "max.ull"}, Value: Intrinsic::umax)
1645 .Cases(CaseStrings: {"min.us", "min.ui", "min.ull"}, Value: Intrinsic::umin)
1646 .Default(Value: Intrinsic::not_intrinsic);
1647 if (IID != Intrinsic::not_intrinsic) {
1648 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1649 OverloadTys: {F->getReturnType()});
1650 return true;
1651 }
1652 }
1653
1654 // Check for nvvm intrinsics that need a return type adjustment.
1655 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1656 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1657 if (IID != Intrinsic::not_intrinsic) {
1658 NewFn = nullptr;
1659 return true;
1660 }
1661 }
1662
1663 // Upgrade Distributed Shared Memory Intrinsics
1664 Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1665 if (IID != Intrinsic::not_intrinsic) {
1666 rename(GV: F);
1667 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1668 return true;
1669 }
1670
1671 // Upgrade TMA copy G2S Intrinsics
1672 IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1673 if (IID != Intrinsic::not_intrinsic) {
1674 rename(GV: F);
1675 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1676 return true;
1677 }
1678
1679 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1680 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1681 //
1682 // TODO: We could add lohi.i2d.
1683 bool Expand = false;
1684 if (Name.consume_front(Prefix: "abs."))
1685 // nvvm.abs.{i,ii}
1686 Expand =
1687 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1688 else if (Name.consume_front(Prefix: "fabs."))
1689 // nvvm.fabs.{f,ftz.f,d}
1690 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1691 else if (Name.consume_front(Prefix: "ex2.approx."))
1692 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1693 Expand =
1694 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1695 else if (Name.consume_front(Prefix: "atomic.load."))
1696 // nvvm.atomic.load.add.{f32,f64}.p
1697 // nvvm.atomic.load.{inc,dec}.32.p
1698 Expand = StringSwitch<bool>(Name)
1699 .StartsWith(S: "add.f32.p", Value: true)
1700 .StartsWith(S: "add.f64.p", Value: true)
1701 .StartsWith(S: "inc.32.p", Value: true)
1702 .StartsWith(S: "dec.32.p", Value: true)
1703 .Default(Value: false);
1704 else if (Name.consume_front(Prefix: "atomic."))
1705 // nvvm.atomic.{add,exch,max,min,inc,dec,and,or,xor}.gen.{i,f}.{cta,sys}
1706 // nvvm.atomic.cas.gen.i.{cta,sys}
1707 Expand = StringSwitch<bool>(Name)
1708 .StartsWith(S: "add.gen.", Value: true)
1709 .StartsWith(S: "exch.gen.", Value: true)
1710 .StartsWith(S: "max.gen.", Value: true)
1711 .StartsWith(S: "min.gen.", Value: true)
1712 .StartsWith(S: "inc.gen.", Value: true)
1713 .StartsWith(S: "dec.gen.", Value: true)
1714 .StartsWith(S: "and.gen.", Value: true)
1715 .StartsWith(S: "or.gen.", Value: true)
1716 .StartsWith(S: "xor.gen.", Value: true)
1717 .StartsWith(S: "cas.gen.", Value: true)
1718 .Default(Value: false);
1719 else if (Name.consume_front(Prefix: "bitcast."))
1720 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1721 Expand =
1722 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1723 else if (Name.consume_front(Prefix: "rotate."))
1724 // nvvm.rotate.{b32,b64,right.b64}
1725 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1726 else if (Name.consume_front(Prefix: "ptr.gen.to."))
1727 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1728 Expand = consumeNVVMPtrAddrSpace(Name);
1729 else if (Name.consume_front(Prefix: "ptr."))
1730 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1731 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(Prefix: ".to.gen");
1732 else if (Name.consume_front(Prefix: "ldg.global."))
1733 // nvvm.ldg.global.{i,p,f}
1734 Expand = (Name.starts_with(Prefix: "i.") || Name.starts_with(Prefix: "f.") ||
1735 Name.starts_with(Prefix: "p."));
1736 else
1737 Expand = StringSwitch<bool>(Name)
1738 .Case(S: "barrier0", Value: true)
1739 .Case(S: "barrier.n", Value: true)
1740 .Case(S: "barrier.sync.cnt", Value: true)
1741 .Case(S: "barrier.sync", Value: true)
1742 .Case(S: "barrier", Value: true)
1743 .Case(S: "bar.sync", Value: true)
1744 .Case(S: "barrier0.popc", Value: true)
1745 .Case(S: "barrier0.and", Value: true)
1746 .Case(S: "barrier0.or", Value: true)
1747 .Case(S: "clz.ll", Value: true)
1748 .Case(S: "popc.ll", Value: true)
1749 .Case(S: "h2f", Value: true)
1750 .Case(S: "swap.lo.hi.b64", Value: true)
1751 .Case(S: "tanh.approx.f32", Value: true)
1752 .Default(Value: false);
1753
1754 if (Expand) {
1755 NewFn = nullptr;
1756 return true;
1757 }
1758 break; // No other 'nvvm.*'.
1759 }
1760 break;
1761 }
1762 case 'o':
1763 if (Name.starts_with(Prefix: "objectsize.")) {
1764 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1765 if (F->arg_size() == 2 || F->arg_size() == 3) {
1766 rename(GV: F);
1767 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1768 id: Intrinsic::objectsize, OverloadTys: Tys);
1769 return true;
1770 }
1771 }
1772 break;
1773
1774 case 'p':
1775 if (Name.starts_with(Prefix: "ptr.annotation.") && F->arg_size() == 4) {
1776 rename(GV: F);
1777 NewFn = Intrinsic::getOrInsertDeclaration(
1778 M: F->getParent(), id: Intrinsic::ptr_annotation,
1779 OverloadTys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()});
1780 return true;
1781 }
1782 break;
1783
1784 case 'r': {
1785 if (Name.consume_front(Prefix: "riscv.")) {
1786 Intrinsic::ID ID;
1787 ID = StringSwitch<Intrinsic::ID>(Name)
1788 .Case(S: "aes32dsi", Value: Intrinsic::riscv_aes32dsi)
1789 .Case(S: "aes32dsmi", Value: Intrinsic::riscv_aes32dsmi)
1790 .Case(S: "aes32esi", Value: Intrinsic::riscv_aes32esi)
1791 .Case(S: "aes32esmi", Value: Intrinsic::riscv_aes32esmi)
1792 .Default(Value: Intrinsic::not_intrinsic);
1793 if (ID != Intrinsic::not_intrinsic) {
1794 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(BitWidth: 32)) {
1795 rename(GV: F);
1796 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1797 return true;
1798 }
1799 break; // No other applicable upgrades.
1800 }
1801
1802 ID = StringSwitch<Intrinsic::ID>(Name)
1803 .StartsWith(S: "sm4ks", Value: Intrinsic::riscv_sm4ks)
1804 .StartsWith(S: "sm4ed", Value: Intrinsic::riscv_sm4ed)
1805 .Default(Value: Intrinsic::not_intrinsic);
1806 if (ID != Intrinsic::not_intrinsic) {
1807 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(BitWidth: 32) ||
1808 F->getFunctionType()->getReturnType()->isIntegerTy(BitWidth: 64)) {
1809 rename(GV: F);
1810 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1811 return true;
1812 }
1813 break; // No other applicable upgrades.
1814 }
1815
1816 ID = StringSwitch<Intrinsic::ID>(Name)
1817 .StartsWith(S: "sha256sig0", Value: Intrinsic::riscv_sha256sig0)
1818 .StartsWith(S: "sha256sig1", Value: Intrinsic::riscv_sha256sig1)
1819 .StartsWith(S: "sha256sum0", Value: Intrinsic::riscv_sha256sum0)
1820 .StartsWith(S: "sha256sum1", Value: Intrinsic::riscv_sha256sum1)
1821 .StartsWith(S: "sm3p0", Value: Intrinsic::riscv_sm3p0)
1822 .StartsWith(S: "sm3p1", Value: Intrinsic::riscv_sm3p1)
1823 .Default(Value: Intrinsic::not_intrinsic);
1824 if (ID != Intrinsic::not_intrinsic) {
1825 if (F->getFunctionType()->getReturnType()->isIntegerTy(BitWidth: 64)) {
1826 rename(GV: F);
1827 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1828 return true;
1829 }
1830 break; // No other applicable upgrades.
1831 }
1832
1833 // Replace llvm.riscv.clmul with llvm.clmul.
1834 if (Name == "clmul.i32" || Name == "clmul.i64") {
1835 NewFn = Intrinsic::getOrInsertDeclaration(
1836 M: F->getParent(), id: Intrinsic::clmul, OverloadTys: {F->getReturnType()});
1837 return true;
1838 }
1839
1840 break; // No other 'riscv.*' intrinsics
1841 }
1842 } break;
1843
1844 case 's':
1845 if (Name == "stackprotectorcheck") {
1846 NewFn = nullptr;
1847 return true;
1848 }
1849 break;
1850
1851 case 't':
1852 if (Name == "thread.pointer") {
1853 NewFn = Intrinsic::getOrInsertDeclaration(
1854 M: F->getParent(), id: Intrinsic::thread_pointer, OverloadTys: F->getReturnType());
1855 return true;
1856 }
1857 break;
1858
1859 case 'v': {
1860 if (Name == "var.annotation" && F->arg_size() == 4) {
1861 rename(GV: F);
1862 NewFn = Intrinsic::getOrInsertDeclaration(
1863 M: F->getParent(), id: Intrinsic::var_annotation,
1864 OverloadTys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}});
1865 return true;
1866 }
1867 if (Name.consume_front(Prefix: "vector.splice")) {
1868 if (Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"))
1869 break;
1870 return true;
1871 }
1872 break;
1873 }
1874
1875 case 'w':
1876 if (Name.consume_front(Prefix: "wasm.")) {
1877 Intrinsic::ID ID =
1878 StringSwitch<Intrinsic::ID>(Name)
1879 .StartsWith(S: "fma.", Value: Intrinsic::wasm_relaxed_madd)
1880 .StartsWith(S: "fms.", Value: Intrinsic::wasm_relaxed_nmadd)
1881 .StartsWith(S: "laneselect.", Value: Intrinsic::wasm_relaxed_laneselect)
1882 .Default(Value: Intrinsic::not_intrinsic);
1883 if (ID != Intrinsic::not_intrinsic) {
1884 rename(GV: F);
1885 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1886 OverloadTys: F->getReturnType());
1887 return true;
1888 }
1889
1890 if (Name.consume_front(Prefix: "dot.i8x16.i7x16.")) {
1891 ID = StringSwitch<Intrinsic::ID>(Name)
1892 .Case(S: "signed", Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1893 .Case(S: "add.signed",
1894 Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1895 .Default(Value: Intrinsic::not_intrinsic);
1896 if (ID != Intrinsic::not_intrinsic) {
1897 rename(GV: F);
1898 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1899 return true;
1900 }
1901 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1902 }
1903 break; // No other 'wasm.*'.
1904 }
1905 break;
1906
1907 case 'x':
1908 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1909 return true;
1910 }
1911
1912 auto *ST = dyn_cast<StructType>(Val: F->getReturnType());
1913 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1914 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1915 // Replace return type with literal non-packed struct. Only do this for
1916 // intrinsics declared to return a struct, not for intrinsics with
1917 // overloaded return type, in which case the exact struct type will be
1918 // mangled into the name.
1919 if (Intrinsic::hasStructReturnType(id: F->getIntrinsicID())) {
1920 FunctionType *FT = F->getFunctionType();
1921 auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements());
1922 auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg());
1923 std::string Name = F->getName().str();
1924 rename(GV: F);
1925 NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
1926 N: Name, M: F->getParent());
1927
1928 // The new function may also need remangling.
1929 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn))
1930 NewFn = *Result;
1931 return true;
1932 }
1933 }
1934
1935 // Remangle our intrinsic since we upgrade the mangling
1936 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1937 if (Result != std::nullopt) {
1938 NewFn = *Result;
1939 return true;
1940 }
1941
1942 // This may not belong here. This function is effectively being overloaded
1943 // to both detect an intrinsic which needs upgrading, and to provide the
1944 // upgraded form of the intrinsic. We should perhaps have two separate
1945 // functions for this.
1946 return false;
1947}
1948
1949bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1950 bool CanUpgradeDebugIntrinsicsToRecords) {
1951 NewFn = nullptr;
1952 bool Upgraded =
1953 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1954
1955 // Upgrade intrinsic attributes. This does not change the function.
1956 if (NewFn)
1957 F = NewFn;
1958 if (Intrinsic::ID id = F->getIntrinsicID()) {
1959 // Only do this if the intrinsic signature is valid.
1960 SmallVector<Type *> OverloadTys;
1961 if (Intrinsic::isSignatureValid(ID: id, FT: F->getFunctionType(), OverloadTys))
1962 F->setAttributes(
1963 Intrinsic::getAttributes(C&: F->getContext(), id, FT: F->getFunctionType()));
1964 }
1965 return Upgraded;
1966}
1967
1968GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1969 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1970 GV->getName() == "llvm.global_dtors")) ||
1971 !GV->hasInitializer())
1972 return nullptr;
1973 ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType());
1974 if (!ATy)
1975 return nullptr;
1976 StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType());
1977 if (!STy || STy->getNumElements() != 2)
1978 return nullptr;
1979
1980 LLVMContext &C = GV->getContext();
1981 IRBuilder<> IRB(C);
1982 auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1),
1983 elts: IRB.getPtrTy());
1984 Constant *Init = GV->getInitializer();
1985 unsigned N = Init->getNumOperands();
1986 std::vector<Constant *> NewCtors(N);
1987 for (unsigned i = 0; i != N; ++i) {
1988 auto Ctor = cast<Constant>(Val: Init->getOperand(i));
1989 NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u),
1990 Vs: Ctor->getAggregateElement(Elt: 1),
1991 Vs: ConstantPointerNull::get(T: IRB.getPtrTy()));
1992 }
1993 Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors);
1994
1995 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1996 NewInit, GV->getName());
1997}
1998
1999// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
2000// to byte shuffles.
2001static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
2002 unsigned Shift) {
2003 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
2004 unsigned NumElts = ResultTy->getNumElements() * 8;
2005
2006 // Bitcast from a 64-bit element type to a byte element type.
2007 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
2008 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
2009
2010 // We'll be shuffling in zeroes.
2011 Value *Res = Constant::getNullValue(Ty: VecTy);
2012
2013 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2014 // we'll just return the zero vector.
2015 if (Shift < 16) {
2016 int Idxs[64];
2017 // 256/512-bit version is split into 2/4 16-byte lanes.
2018 for (unsigned l = 0; l != NumElts; l += 16)
2019 for (unsigned i = 0; i != 16; ++i) {
2020 unsigned Idx = NumElts + i - Shift;
2021 if (Idx < NumElts)
2022 Idx -= NumElts - 16; // end of lane, switch operand.
2023 Idxs[l + i] = Idx + l;
2024 }
2025
2026 Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts));
2027 }
2028
2029 // Bitcast back to a 64-bit element type.
2030 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
2031}
2032
2033// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
2034// to byte shuffles.
2035static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
2036 unsigned Shift) {
2037 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
2038 unsigned NumElts = ResultTy->getNumElements() * 8;
2039
2040 // Bitcast from a 64-bit element type to a byte element type.
2041 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
2042 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
2043
2044 // We'll be shuffling in zeroes.
2045 Value *Res = Constant::getNullValue(Ty: VecTy);
2046
2047 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2048 // we'll just return the zero vector.
2049 if (Shift < 16) {
2050 int Idxs[64];
2051 // 256/512-bit version is split into 2/4 16-byte lanes.
2052 for (unsigned l = 0; l != NumElts; l += 16)
2053 for (unsigned i = 0; i != 16; ++i) {
2054 unsigned Idx = i + Shift;
2055 if (Idx >= 16)
2056 Idx += NumElts - 16; // end of lane, switch operand.
2057 Idxs[l + i] = Idx + l;
2058 }
2059
2060 Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts));
2061 }
2062
2063 // Bitcast back to a 64-bit element type.
2064 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
2065}
2066
2067static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2068 unsigned NumElts) {
2069 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2070 llvm::VectorType *MaskTy = FixedVectorType::get(
2071 ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
2072 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2073
2074 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2075 // i8 and we need to extract down to the right number of elements.
2076 if (NumElts <= 4) {
2077 int Indices[4];
2078 for (unsigned i = 0; i != NumElts; ++i)
2079 Indices[i] = i;
2080 Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts),
2081 Name: "extract");
2082 }
2083
2084 return Mask;
2085}
2086
2087static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2088 Value *Op1) {
2089 // If the mask is all ones just emit the first operation.
2090 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2091 if (C->isAllOnesValue())
2092 return Op0;
2093
2094 Mask = getX86MaskVec(Builder, Mask,
2095 NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements());
2096 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2097}
2098
2099static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2100 Value *Op1) {
2101 // If the mask is all ones just emit the first operation.
2102 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2103 if (C->isAllOnesValue())
2104 return Op0;
2105
2106 auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(),
2107 NumElts: Mask->getType()->getIntegerBitWidth());
2108 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2109 Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
2110 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2111}
2112
2113// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2114// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2115// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2116static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
2117 Value *Op1, Value *Shift,
2118 Value *Passthru, Value *Mask,
2119 bool IsVALIGN) {
2120 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue();
2121
2122 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2123 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2124 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2125 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2126
2127 // Mask the immediate for VALIGN.
2128 if (IsVALIGN)
2129 ShiftVal &= (NumElts - 1);
2130
2131 // If palignr is shifting the pair of vectors more than the size of two
2132 // lanes, emit zero.
2133 if (ShiftVal >= 32)
2134 return llvm::Constant::getNullValue(Ty: Op0->getType());
2135
2136 // If palignr is shifting the pair of input vectors more than one lane,
2137 // but less than two lanes, convert to shifting in zeroes.
2138 if (ShiftVal > 16) {
2139 ShiftVal -= 16;
2140 Op1 = Op0;
2141 Op0 = llvm::Constant::getNullValue(Ty: Op0->getType());
2142 }
2143
2144 int Indices[64];
2145 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2146 for (unsigned l = 0; l < NumElts; l += 16) {
2147 for (unsigned i = 0; i != 16; ++i) {
2148 unsigned Idx = ShiftVal + i;
2149 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2150 Idx += NumElts - 16; // End of lane, switch operand.
2151 Indices[l + i] = Idx + l;
2152 }
2153 }
2154
2155 Value *Align = Builder.CreateShuffleVector(
2156 V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr");
2157
2158 return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru);
2159}
2160
2161static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
2162 bool ZeroMask, bool IndexForm) {
2163 Type *Ty = CI.getType();
2164 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2165 unsigned EltWidth = Ty->getScalarSizeInBits();
2166 bool IsFloat = Ty->isFPOrFPVectorTy();
2167 Intrinsic::ID IID;
2168 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2169 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2170 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2171 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2172 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2173 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2174 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2175 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2176 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2177 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2178 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2179 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2180 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2181 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2182 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2183 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2184 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2185 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2186 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2187 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2188 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2189 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2190 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2191 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2192 else if (VecWidth == 128 && EltWidth == 16)
2193 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2194 else if (VecWidth == 256 && EltWidth == 16)
2195 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2196 else if (VecWidth == 512 && EltWidth == 16)
2197 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2198 else if (VecWidth == 128 && EltWidth == 8)
2199 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2200 else if (VecWidth == 256 && EltWidth == 8)
2201 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2202 else if (VecWidth == 512 && EltWidth == 8)
2203 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2204 else
2205 llvm_unreachable("Unexpected intrinsic");
2206
2207 Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1),
2208 CI.getArgOperand(i: 2) };
2209
2210 // If this isn't index form we need to swap operand 0 and 1.
2211 if (!IndexForm)
2212 std::swap(a&: Args[0], b&: Args[1]);
2213
2214 Value *V = Builder.CreateIntrinsic(ID: IID, Args);
2215 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2216 : Builder.CreateBitCast(V: CI.getArgOperand(i: 1),
2217 DestTy: Ty);
2218 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru);
2219}
2220
2221static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
2222 Intrinsic::ID IID) {
2223 Type *Ty = CI.getType();
2224 Value *Op0 = CI.getOperand(i_nocapture: 0);
2225 Value *Op1 = CI.getOperand(i_nocapture: 1);
2226 Value *Res = Builder.CreateIntrinsic(ID: IID, OverloadTypes: Ty, Args: {Op0, Op1});
2227
2228 if (CI.arg_size() == 4) { // For masked intrinsics.
2229 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2230 Value *Mask = CI.getOperand(i_nocapture: 3);
2231 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2232 }
2233 return Res;
2234}
2235
2236static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
2237 bool IsRotateRight) {
2238 Type *Ty = CI.getType();
2239 Value *Src = CI.getArgOperand(i: 0);
2240 Value *Amt = CI.getArgOperand(i: 1);
2241
2242 // Amount may be scalar immediate, in which case create a splat vector.
2243 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2244 // we only care about the lowest log2 bits anyway.
2245 if (Amt->getType() != Ty) {
2246 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2247 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2248 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2249 }
2250
2251 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2252 Value *Res = Builder.CreateIntrinsic(ID: IID, OverloadTypes: Ty, Args: {Src, Src, Amt});
2253
2254 if (CI.arg_size() == 4) { // For masked intrinsics.
2255 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2256 Value *Mask = CI.getOperand(i_nocapture: 3);
2257 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2258 }
2259 return Res;
2260}
2261
2262static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2263 bool IsSigned) {
2264 Type *Ty = CI.getType();
2265 Value *LHS = CI.getArgOperand(i: 0);
2266 Value *RHS = CI.getArgOperand(i: 1);
2267
2268 CmpInst::Predicate Pred;
2269 switch (Imm) {
2270 case 0x0:
2271 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2272 break;
2273 case 0x1:
2274 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2275 break;
2276 case 0x2:
2277 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2278 break;
2279 case 0x3:
2280 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2281 break;
2282 case 0x4:
2283 Pred = ICmpInst::ICMP_EQ;
2284 break;
2285 case 0x5:
2286 Pred = ICmpInst::ICMP_NE;
2287 break;
2288 case 0x6:
2289 return Constant::getNullValue(Ty); // FALSE
2290 case 0x7:
2291 return Constant::getAllOnesValue(Ty); // TRUE
2292 default:
2293 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2294 }
2295
2296 Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS);
2297 Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty);
2298 return Ext;
2299}
2300
2301static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
2302 bool IsShiftRight, bool ZeroMask) {
2303 Type *Ty = CI.getType();
2304 Value *Op0 = CI.getArgOperand(i: 0);
2305 Value *Op1 = CI.getArgOperand(i: 1);
2306 Value *Amt = CI.getArgOperand(i: 2);
2307
2308 if (IsShiftRight)
2309 std::swap(a&: Op0, b&: Op1);
2310
2311 // Amount may be scalar immediate, in which case create a splat vector.
2312 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2313 // we only care about the lowest log2 bits anyway.
2314 if (Amt->getType() != Ty) {
2315 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2316 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2317 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2318 }
2319
2320 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2321 Value *Res = Builder.CreateIntrinsic(ID: IID, OverloadTypes: Ty, Args: {Op0, Op1, Amt});
2322
2323 unsigned NumArgs = CI.arg_size();
2324 if (NumArgs >= 4) { // For masked intrinsics.
2325 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) :
2326 ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) :
2327 CI.getArgOperand(i: 0);
2328 Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1);
2329 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2330 }
2331 return Res;
2332}
2333
2334static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2335 Value *Mask, bool Aligned) {
2336 const Align Alignment =
2337 Aligned
2338 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2339 : Align(1);
2340
2341 // If the mask is all ones just emit a regular store.
2342 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2343 if (C->isAllOnesValue())
2344 return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment);
2345
2346 // Convert the mask from an integer type to a vector of i1.
2347 unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements();
2348 Mask = getX86MaskVec(Builder, Mask, NumElts);
2349 return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask);
2350}
2351
2352static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2353 Value *Passthru, Value *Mask, bool Aligned) {
2354 Type *ValTy = Passthru->getType();
2355 const Align Alignment =
2356 Aligned
2357 ? Align(
2358 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2359 8)
2360 : Align(1);
2361
2362 // If the mask is all ones just emit a regular store.
2363 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2364 if (C->isAllOnesValue())
2365 return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment);
2366
2367 // Convert the mask from an integer type to a vector of i1.
2368 unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements();
2369 Mask = getX86MaskVec(Builder, Mask, NumElts);
2370 return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru);
2371}
2372
2373static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2374 Type *Ty = CI.getType();
2375 Value *Op0 = CI.getArgOperand(i: 0);
2376 Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::abs, OverloadTypes: Ty,
2377 Args: {Op0, Builder.getInt1(V: false)});
2378 if (CI.arg_size() == 3)
2379 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1));
2380 return Res;
2381}
2382
2383static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2384 Type *Ty = CI.getType();
2385
2386 // Arguments have a vXi32 type so cast to vXi64.
2387 Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty);
2388 Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty);
2389
2390 if (IsSigned) {
2391 // Shift left then arithmetic shift right.
2392 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
2393 LHS = Builder.CreateShl(LHS, RHS: ShiftAmt);
2394 LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt);
2395 RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
2396 RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
2397 } else {
2398 // Clear the upper bits.
2399 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
2400 LHS = Builder.CreateAnd(LHS, RHS: Mask);
2401 RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask);
2402 }
2403
2404 Value *Res = Builder.CreateMul(LHS, RHS);
2405
2406 if (CI.arg_size() == 4)
2407 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2));
2408
2409 return Res;
2410}
2411
2412// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2413static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2414 Value *Mask) {
2415 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
2416 if (Mask) {
2417 const auto *C = dyn_cast<Constant>(Val: Mask);
2418 if (!C || !C->isAllOnesValue())
2419 Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts));
2420 }
2421
2422 if (NumElts < 8) {
2423 int Indices[8];
2424 for (unsigned i = 0; i != NumElts; ++i)
2425 Indices[i] = i;
2426 for (unsigned i = NumElts; i != 8; ++i)
2427 Indices[i] = NumElts + i % NumElts;
2428 Vec = Builder.CreateShuffleVector(V1: Vec,
2429 V2: Constant::getNullValue(Ty: Vec->getType()),
2430 Mask: Indices);
2431 }
2432 return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U)));
2433}
2434
2435static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2436 unsigned CC, bool Signed) {
2437 Value *Op0 = CI.getArgOperand(i: 0);
2438 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2439
2440 Value *Cmp;
2441 if (CC == 3) {
2442 Cmp = Constant::getNullValue(
2443 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2444 } else if (CC == 7) {
2445 Cmp = Constant::getAllOnesValue(
2446 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2447 } else {
2448 ICmpInst::Predicate Pred;
2449 switch (CC) {
2450 default: llvm_unreachable("Unknown condition code");
2451 case 0: Pred = ICmpInst::ICMP_EQ; break;
2452 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2453 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2454 case 4: Pred = ICmpInst::ICMP_NE; break;
2455 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2456 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2457 }
2458 Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1));
2459 }
2460
2461 Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1);
2462
2463 return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask);
2464}
2465
2466// Replace a masked intrinsic with an older unmasked intrinsic.
2467static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2468 Intrinsic::ID IID) {
2469 Value *Rep =
2470 Builder.CreateIntrinsic(ID: IID, Args: {CI.getArgOperand(i: 0), CI.getArgOperand(i: 1)});
2471 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2));
2472}
2473
2474static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2475 Value* A = CI.getArgOperand(i: 0);
2476 Value* B = CI.getArgOperand(i: 1);
2477 Value* Src = CI.getArgOperand(i: 2);
2478 Value* Mask = CI.getArgOperand(i: 3);
2479
2480 Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1));
2481 Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode);
2482 Value* Extract1 = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
2483 Value* Extract2 = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0);
2484 Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2);
2485 return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0);
2486}
2487
2488static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2489 Value* Op = CI.getArgOperand(i: 0);
2490 Type* ReturnOp = CI.getType();
2491 unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements();
2492 Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts);
2493 return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2");
2494}
2495
2496// Replace intrinsic with unmasked version and a select.
2497static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2498 CallBase &CI, Value *&Rep) {
2499 Name = Name.substr(Start: 12); // Remove avx512.mask.
2500
2501 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2502 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2503 Intrinsic::ID IID;
2504 if (Name.starts_with(Prefix: "max.p")) {
2505 if (VecWidth == 128 && EltWidth == 32)
2506 IID = Intrinsic::x86_sse_max_ps;
2507 else if (VecWidth == 128 && EltWidth == 64)
2508 IID = Intrinsic::x86_sse2_max_pd;
2509 else if (VecWidth == 256 && EltWidth == 32)
2510 IID = Intrinsic::x86_avx_max_ps_256;
2511 else if (VecWidth == 256 && EltWidth == 64)
2512 IID = Intrinsic::x86_avx_max_pd_256;
2513 else
2514 llvm_unreachable("Unexpected intrinsic");
2515 } else if (Name.starts_with(Prefix: "min.p")) {
2516 if (VecWidth == 128 && EltWidth == 32)
2517 IID = Intrinsic::x86_sse_min_ps;
2518 else if (VecWidth == 128 && EltWidth == 64)
2519 IID = Intrinsic::x86_sse2_min_pd;
2520 else if (VecWidth == 256 && EltWidth == 32)
2521 IID = Intrinsic::x86_avx_min_ps_256;
2522 else if (VecWidth == 256 && EltWidth == 64)
2523 IID = Intrinsic::x86_avx_min_pd_256;
2524 else
2525 llvm_unreachable("Unexpected intrinsic");
2526 } else if (Name.starts_with(Prefix: "pshuf.b.")) {
2527 if (VecWidth == 128)
2528 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2529 else if (VecWidth == 256)
2530 IID = Intrinsic::x86_avx2_pshuf_b;
2531 else if (VecWidth == 512)
2532 IID = Intrinsic::x86_avx512_pshuf_b_512;
2533 else
2534 llvm_unreachable("Unexpected intrinsic");
2535 } else if (Name.starts_with(Prefix: "pmul.hr.sw.")) {
2536 if (VecWidth == 128)
2537 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2538 else if (VecWidth == 256)
2539 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2540 else if (VecWidth == 512)
2541 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2542 else
2543 llvm_unreachable("Unexpected intrinsic");
2544 } else if (Name.starts_with(Prefix: "pmulh.w.")) {
2545 if (VecWidth == 128)
2546 IID = Intrinsic::x86_sse2_pmulh_w;
2547 else if (VecWidth == 256)
2548 IID = Intrinsic::x86_avx2_pmulh_w;
2549 else if (VecWidth == 512)
2550 IID = Intrinsic::x86_avx512_pmulh_w_512;
2551 else
2552 llvm_unreachable("Unexpected intrinsic");
2553 } else if (Name.starts_with(Prefix: "pmulhu.w.")) {
2554 if (VecWidth == 128)
2555 IID = Intrinsic::x86_sse2_pmulhu_w;
2556 else if (VecWidth == 256)
2557 IID = Intrinsic::x86_avx2_pmulhu_w;
2558 else if (VecWidth == 512)
2559 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2560 else
2561 llvm_unreachable("Unexpected intrinsic");
2562 } else if (Name.starts_with(Prefix: "pmaddw.d.")) {
2563 if (VecWidth == 128)
2564 IID = Intrinsic::x86_sse2_pmadd_wd;
2565 else if (VecWidth == 256)
2566 IID = Intrinsic::x86_avx2_pmadd_wd;
2567 else if (VecWidth == 512)
2568 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2569 else
2570 llvm_unreachable("Unexpected intrinsic");
2571 } else if (Name.starts_with(Prefix: "pmaddubs.w.")) {
2572 if (VecWidth == 128)
2573 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2574 else if (VecWidth == 256)
2575 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2576 else if (VecWidth == 512)
2577 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2578 else
2579 llvm_unreachable("Unexpected intrinsic");
2580 } else if (Name.starts_with(Prefix: "packsswb.")) {
2581 if (VecWidth == 128)
2582 IID = Intrinsic::x86_sse2_packsswb_128;
2583 else if (VecWidth == 256)
2584 IID = Intrinsic::x86_avx2_packsswb;
2585 else if (VecWidth == 512)
2586 IID = Intrinsic::x86_avx512_packsswb_512;
2587 else
2588 llvm_unreachable("Unexpected intrinsic");
2589 } else if (Name.starts_with(Prefix: "packssdw.")) {
2590 if (VecWidth == 128)
2591 IID = Intrinsic::x86_sse2_packssdw_128;
2592 else if (VecWidth == 256)
2593 IID = Intrinsic::x86_avx2_packssdw;
2594 else if (VecWidth == 512)
2595 IID = Intrinsic::x86_avx512_packssdw_512;
2596 else
2597 llvm_unreachable("Unexpected intrinsic");
2598 } else if (Name.starts_with(Prefix: "packuswb.")) {
2599 if (VecWidth == 128)
2600 IID = Intrinsic::x86_sse2_packuswb_128;
2601 else if (VecWidth == 256)
2602 IID = Intrinsic::x86_avx2_packuswb;
2603 else if (VecWidth == 512)
2604 IID = Intrinsic::x86_avx512_packuswb_512;
2605 else
2606 llvm_unreachable("Unexpected intrinsic");
2607 } else if (Name.starts_with(Prefix: "packusdw.")) {
2608 if (VecWidth == 128)
2609 IID = Intrinsic::x86_sse41_packusdw;
2610 else if (VecWidth == 256)
2611 IID = Intrinsic::x86_avx2_packusdw;
2612 else if (VecWidth == 512)
2613 IID = Intrinsic::x86_avx512_packusdw_512;
2614 else
2615 llvm_unreachable("Unexpected intrinsic");
2616 } else if (Name.starts_with(Prefix: "vpermilvar.")) {
2617 if (VecWidth == 128 && EltWidth == 32)
2618 IID = Intrinsic::x86_avx_vpermilvar_ps;
2619 else if (VecWidth == 128 && EltWidth == 64)
2620 IID = Intrinsic::x86_avx_vpermilvar_pd;
2621 else if (VecWidth == 256 && EltWidth == 32)
2622 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2623 else if (VecWidth == 256 && EltWidth == 64)
2624 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2625 else if (VecWidth == 512 && EltWidth == 32)
2626 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2627 else if (VecWidth == 512 && EltWidth == 64)
2628 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2629 else
2630 llvm_unreachable("Unexpected intrinsic");
2631 } else if (Name == "cvtpd2dq.256") {
2632 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2633 } else if (Name == "cvtpd2ps.256") {
2634 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2635 } else if (Name == "cvttpd2dq.256") {
2636 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2637 } else if (Name == "cvttps2dq.128") {
2638 IID = Intrinsic::x86_sse2_cvttps2dq;
2639 } else if (Name == "cvttps2dq.256") {
2640 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2641 } else if (Name.starts_with(Prefix: "permvar.")) {
2642 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2643 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2644 IID = Intrinsic::x86_avx2_permps;
2645 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2646 IID = Intrinsic::x86_avx2_permd;
2647 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2648 IID = Intrinsic::x86_avx512_permvar_df_256;
2649 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2650 IID = Intrinsic::x86_avx512_permvar_di_256;
2651 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2652 IID = Intrinsic::x86_avx512_permvar_sf_512;
2653 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2654 IID = Intrinsic::x86_avx512_permvar_si_512;
2655 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2656 IID = Intrinsic::x86_avx512_permvar_df_512;
2657 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2658 IID = Intrinsic::x86_avx512_permvar_di_512;
2659 else if (VecWidth == 128 && EltWidth == 16)
2660 IID = Intrinsic::x86_avx512_permvar_hi_128;
2661 else if (VecWidth == 256 && EltWidth == 16)
2662 IID = Intrinsic::x86_avx512_permvar_hi_256;
2663 else if (VecWidth == 512 && EltWidth == 16)
2664 IID = Intrinsic::x86_avx512_permvar_hi_512;
2665 else if (VecWidth == 128 && EltWidth == 8)
2666 IID = Intrinsic::x86_avx512_permvar_qi_128;
2667 else if (VecWidth == 256 && EltWidth == 8)
2668 IID = Intrinsic::x86_avx512_permvar_qi_256;
2669 else if (VecWidth == 512 && EltWidth == 8)
2670 IID = Intrinsic::x86_avx512_permvar_qi_512;
2671 else
2672 llvm_unreachable("Unexpected intrinsic");
2673 } else if (Name.starts_with(Prefix: "dbpsadbw.")) {
2674 if (VecWidth == 128)
2675 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2676 else if (VecWidth == 256)
2677 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2678 else if (VecWidth == 512)
2679 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2680 else
2681 llvm_unreachable("Unexpected intrinsic");
2682 } else if (Name.starts_with(Prefix: "pmultishift.qb.")) {
2683 if (VecWidth == 128)
2684 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2685 else if (VecWidth == 256)
2686 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2687 else if (VecWidth == 512)
2688 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2689 else
2690 llvm_unreachable("Unexpected intrinsic");
2691 } else if (Name.starts_with(Prefix: "conflict.")) {
2692 if (Name[9] == 'd' && VecWidth == 128)
2693 IID = Intrinsic::x86_avx512_conflict_d_128;
2694 else if (Name[9] == 'd' && VecWidth == 256)
2695 IID = Intrinsic::x86_avx512_conflict_d_256;
2696 else if (Name[9] == 'd' && VecWidth == 512)
2697 IID = Intrinsic::x86_avx512_conflict_d_512;
2698 else if (Name[9] == 'q' && VecWidth == 128)
2699 IID = Intrinsic::x86_avx512_conflict_q_128;
2700 else if (Name[9] == 'q' && VecWidth == 256)
2701 IID = Intrinsic::x86_avx512_conflict_q_256;
2702 else if (Name[9] == 'q' && VecWidth == 512)
2703 IID = Intrinsic::x86_avx512_conflict_q_512;
2704 else
2705 llvm_unreachable("Unexpected intrinsic");
2706 } else if (Name.starts_with(Prefix: "pavg.")) {
2707 if (Name[5] == 'b' && VecWidth == 128)
2708 IID = Intrinsic::x86_sse2_pavg_b;
2709 else if (Name[5] == 'b' && VecWidth == 256)
2710 IID = Intrinsic::x86_avx2_pavg_b;
2711 else if (Name[5] == 'b' && VecWidth == 512)
2712 IID = Intrinsic::x86_avx512_pavg_b_512;
2713 else if (Name[5] == 'w' && VecWidth == 128)
2714 IID = Intrinsic::x86_sse2_pavg_w;
2715 else if (Name[5] == 'w' && VecWidth == 256)
2716 IID = Intrinsic::x86_avx2_pavg_w;
2717 else if (Name[5] == 'w' && VecWidth == 512)
2718 IID = Intrinsic::x86_avx512_pavg_w_512;
2719 else
2720 llvm_unreachable("Unexpected intrinsic");
2721 } else
2722 return false;
2723
2724 SmallVector<Value *, 4> Args(CI.args());
2725 Args.pop_back();
2726 Args.pop_back();
2727 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2728 unsigned NumArgs = CI.arg_size();
2729 Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep,
2730 Op1: CI.getArgOperand(i: NumArgs - 2));
2731 return true;
2732}
2733
2734/// Upgrade comment in call to inline asm that represents an objc retain release
2735/// marker.
2736void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2737 size_t Pos;
2738 if (AsmStr->find(s: "mov\tfp") == 0 &&
2739 AsmStr->find(s: "objc_retainAutoreleaseReturnValue") != std::string::npos &&
2740 (Pos = AsmStr->find(s: "# marker")) != std::string::npos) {
2741 AsmStr->replace(pos: Pos, n1: 1, s: ";");
2742 }
2743}
2744
2745static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2746 Function *F, IRBuilder<> &Builder) {
2747 Value *Rep = nullptr;
2748
2749 if (Name == "abs.i" || Name == "abs.ll") {
2750 Value *Arg = CI->getArgOperand(i: 0);
2751 Rep = Builder.CreateIntrinsic(ID: Intrinsic::abs, OverloadTypes: {Arg->getType()},
2752 Args: {Arg, Builder.getTrue()},
2753 /*FMFSource=*/nullptr, Name: "abs");
2754 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2755 Type *Ty = (Name == "abs.bf16")
2756 ? Builder.getBFloatTy()
2757 : FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 2);
2758 Value *Arg = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Ty);
2759 Value *Abs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::nvvm_fabs, Op: Arg);
2760 Rep = Builder.CreateBitCast(V: Abs, DestTy: CI->getType());
2761 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2762 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2763 : Intrinsic::nvvm_fabs;
2764 Rep = Builder.CreateUnaryIntrinsic(ID: IID, Op: CI->getArgOperand(i: 0));
2765 } else if (Name.consume_front(Prefix: "ex2.approx.")) {
2766 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2767 Intrinsic::ID IID = Name.starts_with(Prefix: "ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2768 : Intrinsic::nvvm_ex2_approx;
2769 Rep = Builder.CreateUnaryIntrinsic(ID: IID, Op: CI->getArgOperand(i: 0));
2770 } else if (Name.starts_with(Prefix: "atomic.load.add.f32.p") ||
2771 Name.starts_with(Prefix: "atomic.load.add.f64.p")) {
2772 Value *Ptr = CI->getArgOperand(i: 0);
2773 Value *Val = CI->getArgOperand(i: 1);
2774 Rep = Builder.CreateAtomicRMW(
2775 Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic,
2776 SSID: CI->getContext().getOrInsertSyncScopeID(SSN: "device"));
2777 // The default scope for atomic.load.* intrinsics is device
2778 // (= gpu scope in ptx), but the default LLVM atomic scope is
2779 // "system"
2780 } else if (Name.starts_with(Prefix: "atomic.load.inc.32.p") ||
2781 Name.starts_with(Prefix: "atomic.load.dec.32.p")) {
2782 Value *Ptr = CI->getArgOperand(i: 0);
2783 Value *Val = CI->getArgOperand(i: 1);
2784 auto Op = Name.starts_with(Prefix: "atomic.load.inc") ? AtomicRMWInst::UIncWrap
2785 : AtomicRMWInst::UDecWrap;
2786 Rep = Builder.CreateAtomicRMW(
2787 Op, Ptr, Val, Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic,
2788 SSID: CI->getContext().getOrInsertSyncScopeID(SSN: "device"));
2789 // See comment above.
2790 } else if (Name.starts_with(Prefix: "atomic.") && Name.contains(Other: ".gen.")) {
2791 // nvvm.atomic.{op}.gen.{i,f}.{cta,sys} -> atomicrmw / cmpxchg.
2792 StringRef Op = Name.substr(Start: StringRef("atomic.").size());
2793 Value *Ptr = CI->getArgOperand(i: 0);
2794 Value *Val = CI->getArgOperand(i: 1);
2795 SyncScope::ID SSID = CI->getContext().getOrInsertSyncScopeID(
2796 SSN: Op.contains(Other: ".cta.") ? "block" : "");
2797 if (Op.starts_with(Prefix: "cas.")) {
2798 Value *New = CI->getArgOperand(i: 2);
2799 Value *Pair = Builder.CreateAtomicCmpXchg(
2800 Ptr, Cmp: Val, New, Align: MaybeAlign(), SuccessOrdering: AtomicOrdering::Monotonic,
2801 FailureOrdering: AtomicOrdering::Monotonic, SSID);
2802 Rep = Builder.CreateExtractValue(Agg: Pair, Idxs: 0);
2803 } else {
2804 // Note we don't upgrade anything to AtomicRMWInst::UMin/UMax. This is
2805 // because we were actually missing those intrinsics!
2806 AtomicRMWInst::BinOp BinOp =
2807 StringSwitch<AtomicRMWInst::BinOp>(Op)
2808 .StartsWith(S: "add.gen.f", Value: AtomicRMWInst::FAdd)
2809 .StartsWith(S: "add.gen.i", Value: AtomicRMWInst::Add)
2810 .StartsWith(S: "exch.", Value: AtomicRMWInst::Xchg)
2811 .StartsWith(S: "max.", Value: AtomicRMWInst::Max)
2812 .StartsWith(S: "min.", Value: AtomicRMWInst::Min)
2813 .StartsWith(S: "inc.", Value: AtomicRMWInst::UIncWrap)
2814 .StartsWith(S: "dec.", Value: AtomicRMWInst::UDecWrap)
2815 .StartsWith(S: "and.", Value: AtomicRMWInst::And)
2816 .StartsWith(S: "or.", Value: AtomicRMWInst::Or)
2817 .StartsWith(S: "xor.", Value: AtomicRMWInst::Xor)
2818 .Default(Value: AtomicRMWInst::BAD_BINOP);
2819 assert(BinOp != AtomicRMWInst::BAD_BINOP &&
2820 "unexpected nvvm scoped atomic intrinsic");
2821 Rep = Builder.CreateAtomicRMW(Op: BinOp, Ptr, Val, Align: MaybeAlign(),
2822 Ordering: AtomicOrdering::Monotonic, SSID);
2823 }
2824 } else if (Name == "clz.ll") {
2825 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2826 Value *Arg = CI->getArgOperand(i: 0);
2827 Value *Ctlz = Builder.CreateIntrinsic(ID: Intrinsic::ctlz, OverloadTypes: {Arg->getType()},
2828 Args: {Arg, Builder.getFalse()},
2829 /*FMFSource=*/nullptr, Name: "ctlz");
2830 Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc");
2831 } else if (Name == "popc.ll") {
2832 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2833 // i64.
2834 Value *Arg = CI->getArgOperand(i: 0);
2835 Value *Popc = Builder.CreateIntrinsic(ID: Intrinsic::ctpop, OverloadTypes: {Arg->getType()},
2836 Args: Arg, /*FMFSource=*/nullptr, Name: "ctpop");
2837 Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc");
2838 } else if (Name == "h2f") {
2839 Value *Cast =
2840 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
2841 Rep = Builder.CreateFPExt(V: Cast, DestTy: Builder.getFloatTy());
2842 } else if (Name.consume_front(Prefix: "bitcast.") &&
2843 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2844 Name == "d2ll")) {
2845 Rep = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2846 } else if (Name == "rotate.b32") {
2847 Value *Arg = CI->getOperand(i_nocapture: 0);
2848 Value *ShiftAmt = CI->getOperand(i_nocapture: 1);
2849 Rep = Builder.CreateIntrinsic(RetTy: Builder.getInt32Ty(), ID: Intrinsic::fshl,
2850 Args: {Arg, Arg, ShiftAmt});
2851 } else if (Name == "rotate.b64") {
2852 Type *Int64Ty = Builder.getInt64Ty();
2853 Value *Arg = CI->getOperand(i_nocapture: 0);
2854 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2855 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2856 Args: {Arg, Arg, ZExtShiftAmt});
2857 } else if (Name == "rotate.right.b64") {
2858 Type *Int64Ty = Builder.getInt64Ty();
2859 Value *Arg = CI->getOperand(i_nocapture: 0);
2860 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2861 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshr,
2862 Args: {Arg, Arg, ZExtShiftAmt});
2863 } else if (Name == "swap.lo.hi.b64") {
2864 Type *Int64Ty = Builder.getInt64Ty();
2865 Value *Arg = CI->getOperand(i_nocapture: 0);
2866 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2867 Args: {Arg, Arg, Builder.getInt64(C: 32)});
2868 } else if ((Name.consume_front(Prefix: "ptr.gen.to.") &&
2869 consumeNVVMPtrAddrSpace(Name)) ||
2870 (Name.consume_front(Prefix: "ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2871 Name.starts_with(Prefix: ".to.gen"))) {
2872 Rep = Builder.CreateAddrSpaceCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2873 } else if (Name.consume_front(Prefix: "ldg.global")) {
2874 Value *Ptr = CI->getArgOperand(i: 0);
2875 Align PtrAlign = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getAlignValue();
2876 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2877 Value *ASC = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Builder.getPtrTy(AddrSpace: 1));
2878 Instruction *LD = Builder.CreateAlignedLoad(Ty: CI->getType(), Ptr: ASC, Align: PtrAlign);
2879 MDNode *MD = MDNode::get(Context&: Builder.getContext(), MDs: {});
2880 LD->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
2881 return LD;
2882 } else if (Name == "tanh.approx.f32") {
2883 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2884 FastMathFlags FMF;
2885 FMF.setApproxFunc();
2886 Rep = Builder.CreateUnaryIntrinsic(ID: Intrinsic::tanh, Op: CI->getArgOperand(i: 0),
2887 FMFSource: FMF);
2888 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2889 Value *Arg =
2890 Name.ends_with(Suffix: '0') ? Builder.getInt32(C: 0) : CI->getArgOperand(i: 0);
2891 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2892 OverloadTypes: {}, Args: {Arg});
2893 } else if (Name == "barrier") {
2894 Rep = Builder.CreateIntrinsic(
2895 ID: Intrinsic::nvvm_barrier_cta_sync_aligned_count, OverloadTypes: {},
2896 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2897 } else if (Name == "barrier.sync") {
2898 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_all, OverloadTypes: {},
2899 Args: {CI->getArgOperand(i: 0)});
2900 } else if (Name == "barrier.sync.cnt") {
2901 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_count, OverloadTypes: {},
2902 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2903 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2904 Name == "barrier0.or") {
2905 Value *C = CI->getArgOperand(i: 0);
2906 C = Builder.CreateICmpNE(LHS: C, RHS: Builder.getInt32(C: 0));
2907
2908 Intrinsic::ID IID =
2909 StringSwitch<Intrinsic::ID>(Name)
2910 .Case(S: "barrier0.popc",
2911 Value: Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2912 .Case(S: "barrier0.and",
2913 Value: Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2914 .Case(S: "barrier0.or",
2915 Value: Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2916 Value *Bar = Builder.CreateIntrinsic(ID: IID, OverloadTypes: {}, Args: {Builder.getInt32(C: 0), C});
2917 Rep = Builder.CreateZExt(V: Bar, DestTy: CI->getType());
2918 } else {
2919 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2920 if (IID != Intrinsic::not_intrinsic &&
2921 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2922 rename(GV: F);
2923 Function *NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
2924 SmallVector<Value *, 2> Args;
2925 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2926 Value *Arg = CI->getArgOperand(i: I);
2927 Type *OldType = Arg->getType();
2928 Type *NewType = NewFn->getArg(i: I)->getType();
2929 Args.push_back(
2930 Elt: (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2931 ? Builder.CreateBitCast(V: Arg, DestTy: NewType)
2932 : Arg);
2933 }
2934 Rep = Builder.CreateCall(Callee: NewFn, Args);
2935 if (F->getReturnType()->isIntegerTy())
2936 Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType());
2937 }
2938 }
2939
2940 return Rep;
2941}
2942
2943static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2944 IRBuilder<> &Builder) {
2945 LLVMContext &C = F->getContext();
2946 Value *Rep = nullptr;
2947
2948 if (Name.starts_with(Prefix: "sse4a.movnt.")) {
2949 SmallVector<Metadata *, 1> Elts;
2950 Elts.push_back(
2951 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2952 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2953
2954 Value *Arg0 = CI->getArgOperand(i: 0);
2955 Value *Arg1 = CI->getArgOperand(i: 1);
2956
2957 // Nontemporal (unaligned) store of the 0'th element of the float/double
2958 // vector.
2959 Value *Extract =
2960 Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement");
2961
2962 StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Arg0, Align: Align(1));
2963 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2964 } else if (Name.starts_with(Prefix: "avx.movnt.") ||
2965 Name.starts_with(Prefix: "avx512.storent.")) {
2966 SmallVector<Metadata *, 1> Elts;
2967 Elts.push_back(
2968 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2969 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2970
2971 Value *Arg0 = CI->getArgOperand(i: 0);
2972 Value *Arg1 = CI->getArgOperand(i: 1);
2973
2974 StoreInst *SI = Builder.CreateAlignedStore(
2975 Val: Arg1, Ptr: Arg0,
2976 Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2977 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2978 } else if (Name == "sse2.storel.dq") {
2979 Value *Arg0 = CI->getArgOperand(i: 0);
2980 Value *Arg1 = CI->getArgOperand(i: 1);
2981
2982 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
2983 Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
2984 Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0);
2985 Builder.CreateAlignedStore(Val: Elt, Ptr: Arg0, Align: Align(1));
2986 } else if (Name.starts_with(Prefix: "sse.storeu.") ||
2987 Name.starts_with(Prefix: "sse2.storeu.") ||
2988 Name.starts_with(Prefix: "avx.storeu.")) {
2989 Value *Arg0 = CI->getArgOperand(i: 0);
2990 Value *Arg1 = CI->getArgOperand(i: 1);
2991 Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1));
2992 } else if (Name == "avx512.mask.store.ss") {
2993 Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1));
2994 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2995 Mask, Aligned: false);
2996 } else if (Name.starts_with(Prefix: "avx512.mask.store")) {
2997 // "avx512.mask.storeu." or "avx512.mask.store."
2998 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2999 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
3000 Mask: CI->getArgOperand(i: 2), Aligned);
3001 } else if (Name.starts_with(Prefix: "sse2.pcmp") || Name.starts_with(Prefix: "avx2.pcmp")) {
3002 // Upgrade packed integer vector compare intrinsics to compare instructions.
3003 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
3004 bool CmpEq = Name[9] == 'e';
3005 Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
3006 LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3007 Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "");
3008 } else if (Name.starts_with(Prefix: "avx512.broadcastm")) {
3009 Type *ExtTy = Type::getInt32Ty(C);
3010 if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(BitWidth: 8))
3011 ExtTy = Type::getInt64Ty(C);
3012 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
3013 ExtTy->getPrimitiveSizeInBits();
3014 Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy);
3015 Rep = Builder.CreateVectorSplat(NumElts, V: Rep);
3016 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
3017 Value *Vec = CI->getArgOperand(i: 0);
3018 Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0);
3019 Elt0 = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, OverloadTypes: Elt0->getType(), Args: Elt0);
3020 Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0);
3021 } else if (Name.starts_with(Prefix: "avx.sqrt.p") ||
3022 Name.starts_with(Prefix: "sse2.sqrt.p") ||
3023 Name.starts_with(Prefix: "sse.sqrt.p")) {
3024 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, OverloadTypes: CI->getType(),
3025 Args: {CI->getArgOperand(i: 0)});
3026 } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p")) {
3027 if (CI->arg_size() == 4 &&
3028 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
3029 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
3030 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
3031 : Intrinsic::x86_avx512_sqrt_pd_512;
3032
3033 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)};
3034 Rep = Builder.CreateIntrinsic(ID: IID, Args);
3035 } else {
3036 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, OverloadTypes: CI->getType(),
3037 Args: {CI->getArgOperand(i: 0)});
3038 }
3039 Rep =
3040 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3041 } else if (Name.starts_with(Prefix: "avx512.ptestm") ||
3042 Name.starts_with(Prefix: "avx512.ptestnm")) {
3043 Value *Op0 = CI->getArgOperand(i: 0);
3044 Value *Op1 = CI->getArgOperand(i: 1);
3045 Value *Mask = CI->getArgOperand(i: 2);
3046 Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1);
3047 llvm::Type *Ty = Op0->getType();
3048 Value *Zero = llvm::Constant::getNullValue(Ty);
3049 ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm")
3050 ? ICmpInst::ICMP_NE
3051 : ICmpInst::ICMP_EQ;
3052 Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero);
3053 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask);
3054 } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast")) {
3055 unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType())
3056 ->getNumElements();
3057 Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0));
3058 Rep =
3059 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3060 } else if (Name.starts_with(Prefix: "avx512.kunpck")) {
3061 unsigned NumElts = CI->getType()->getScalarSizeInBits();
3062 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts);
3063 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts);
3064 int Indices[64];
3065 for (unsigned i = 0; i != NumElts; ++i)
3066 Indices[i] = i;
3067
3068 // First extract half of each vector. This gives better codegen than
3069 // doing it in a single shuffle.
3070 LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
3071 RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
3072 // Concat the vectors.
3073 // NOTE: Operands have to be swapped to match intrinsic definition.
3074 Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
3075 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3076 } else if (Name == "avx512.kand.w") {
3077 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3078 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3079 Rep = Builder.CreateAnd(LHS, RHS);
3080 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3081 } else if (Name == "avx512.kandn.w") {
3082 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3083 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3084 LHS = Builder.CreateNot(V: LHS);
3085 Rep = Builder.CreateAnd(LHS, RHS);
3086 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3087 } else if (Name == "avx512.kor.w") {
3088 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3089 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3090 Rep = Builder.CreateOr(LHS, RHS);
3091 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3092 } else if (Name == "avx512.kxor.w") {
3093 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3094 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3095 Rep = Builder.CreateXor(LHS, RHS);
3096 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3097 } else if (Name == "avx512.kxnor.w") {
3098 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3099 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3100 LHS = Builder.CreateNot(V: LHS);
3101 Rep = Builder.CreateXor(LHS, RHS);
3102 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3103 } else if (Name == "avx512.knot.w") {
3104 Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3105 Rep = Builder.CreateNot(V: Rep);
3106 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3107 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3108 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3109 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3110 Rep = Builder.CreateOr(LHS, RHS);
3111 Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty());
3112 Value *C;
3113 if (Name[14] == 'c')
3114 C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty());
3115 else
3116 C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty());
3117 Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C);
3118 Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty());
3119 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3120 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3121 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3122 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3123 Type *I32Ty = Type::getInt32Ty(C);
3124 Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0),
3125 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3126 Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1),
3127 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3128 Value *EltOp;
3129 if (Name.contains(Other: ".add."))
3130 EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1);
3131 else if (Name.contains(Other: ".sub."))
3132 EltOp = Builder.CreateFSub(L: Elt0, R: Elt1);
3133 else if (Name.contains(Other: ".mul."))
3134 EltOp = Builder.CreateFMul(L: Elt0, R: Elt1);
3135 else
3136 EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1);
3137 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp,
3138 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3139 } else if (Name.starts_with(Prefix: "avx512.mask.pcmp")) {
3140 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3141 bool CmpEq = Name[16] == 'e';
3142 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true);
3143 } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb.")) {
3144 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3145 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3146 Intrinsic::ID IID;
3147 switch (VecWidth) {
3148 default:
3149 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3150 break;
3151 case 128:
3152 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3153 break;
3154 case 256:
3155 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3156 break;
3157 case 512:
3158 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3159 break;
3160 }
3161
3162 Rep =
3163 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3164 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3165 } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p")) {
3166 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3167 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3168 unsigned EltWidth = OpTy->getScalarSizeInBits();
3169 Intrinsic::ID IID;
3170 if (VecWidth == 128 && EltWidth == 32)
3171 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3172 else if (VecWidth == 256 && EltWidth == 32)
3173 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3174 else if (VecWidth == 512 && EltWidth == 32)
3175 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3176 else if (VecWidth == 128 && EltWidth == 64)
3177 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3178 else if (VecWidth == 256 && EltWidth == 64)
3179 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3180 else if (VecWidth == 512 && EltWidth == 64)
3181 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3182 else
3183 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3184
3185 Rep =
3186 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3187 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3188 } else if (Name.starts_with(Prefix: "avx512.cmp.p")) {
3189 SmallVector<Value *, 4> Args(CI->args());
3190 Type *OpTy = Args[0]->getType();
3191 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3192 unsigned EltWidth = OpTy->getScalarSizeInBits();
3193 Intrinsic::ID IID;
3194 if (VecWidth == 128 && EltWidth == 32)
3195 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3196 else if (VecWidth == 256 && EltWidth == 32)
3197 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3198 else if (VecWidth == 512 && EltWidth == 32)
3199 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3200 else if (VecWidth == 128 && EltWidth == 64)
3201 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3202 else if (VecWidth == 256 && EltWidth == 64)
3203 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3204 else if (VecWidth == 512 && EltWidth == 64)
3205 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3206 else
3207 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3208
3209 Value *Mask = Constant::getAllOnesValue(Ty: CI->getType());
3210 if (VecWidth == 512)
3211 std::swap(a&: Mask, b&: Args.back());
3212 Args.push_back(Elt: Mask);
3213
3214 Rep = Builder.CreateIntrinsic(ID: IID, Args);
3215 } else if (Name.starts_with(Prefix: "avx512.mask.cmp.")) {
3216 // Integer compare intrinsics.
3217 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3218 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true);
3219 } else if (Name.starts_with(Prefix: "avx512.mask.ucmp.")) {
3220 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3221 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false);
3222 } else if (Name.starts_with(Prefix: "avx512.cvtb2mask.") ||
3223 Name.starts_with(Prefix: "avx512.cvtw2mask.") ||
3224 Name.starts_with(Prefix: "avx512.cvtd2mask.") ||
3225 Name.starts_with(Prefix: "avx512.cvtq2mask.")) {
3226 Value *Op = CI->getArgOperand(i: 0);
3227 Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType());
3228 Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero);
3229 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr);
3230 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3231 Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs") ||
3232 Name.starts_with(Prefix: "avx512.mask.pabs")) {
3233 Rep = upgradeAbs(Builder, CI&: *CI);
3234 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3235 Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs") ||
3236 Name.starts_with(Prefix: "avx512.mask.pmaxs")) {
3237 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax);
3238 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3239 Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu") ||
3240 Name.starts_with(Prefix: "avx512.mask.pmaxu")) {
3241 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax);
3242 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3243 Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins") ||
3244 Name.starts_with(Prefix: "avx512.mask.pmins")) {
3245 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin);
3246 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3247 Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu") ||
3248 Name.starts_with(Prefix: "avx512.mask.pminu")) {
3249 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin);
3250 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3251 Name == "avx512.pmulu.dq.512" ||
3252 Name.starts_with(Prefix: "avx512.mask.pmulu.dq.")) {
3253 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false);
3254 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3255 Name == "avx512.pmul.dq.512" ||
3256 Name.starts_with(Prefix: "avx512.mask.pmul.dq.")) {
3257 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true);
3258 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3259 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3260 Rep =
3261 Builder.CreateSIToFP(V: CI->getArgOperand(i: 1),
3262 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3263 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3264 } else if (Name == "avx512.cvtusi2sd") {
3265 Rep =
3266 Builder.CreateUIToFP(V: CI->getArgOperand(i: 1),
3267 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3268 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3269 } else if (Name == "sse2.cvtss2sd") {
3270 Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0);
3271 Rep = Builder.CreateFPExt(
3272 V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3273 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3274 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3275 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3276 Name.starts_with(Prefix: "avx512.mask.cvtdq2pd.") ||
3277 Name.starts_with(Prefix: "avx512.mask.cvtudq2pd.") ||
3278 Name.starts_with(Prefix: "avx512.mask.cvtdq2ps.") ||
3279 Name.starts_with(Prefix: "avx512.mask.cvtudq2ps.") ||
3280 Name.starts_with(Prefix: "avx512.mask.cvtqq2pd.") ||
3281 Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd.") ||
3282 Name == "avx512.mask.cvtqq2ps.256" ||
3283 Name == "avx512.mask.cvtqq2ps.512" ||
3284 Name == "avx512.mask.cvtuqq2ps.256" ||
3285 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3286 Name == "avx.cvt.ps2.pd.256" ||
3287 Name == "avx512.mask.cvtps2pd.128" ||
3288 Name == "avx512.mask.cvtps2pd.256") {
3289 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3290 Rep = CI->getArgOperand(i: 0);
3291 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3292
3293 unsigned NumDstElts = DstTy->getNumElements();
3294 if (NumDstElts < SrcTy->getNumElements()) {
3295 assert(NumDstElts == 2 && "Unexpected vector size");
3296 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1});
3297 }
3298
3299 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3300 bool IsUnsigned = Name.contains(Other: "cvtu");
3301 if (IsPS2PD)
3302 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd");
3303 else if (CI->arg_size() == 4 &&
3304 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
3305 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
3306 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3307 : Intrinsic::x86_avx512_sitofp_round;
3308 Rep = Builder.CreateIntrinsic(ID: IID, OverloadTypes: {DstTy, SrcTy},
3309 Args: {Rep, CI->getArgOperand(i: 3)});
3310 } else {
3311 Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt")
3312 : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt");
3313 }
3314
3315 if (CI->arg_size() >= 3)
3316 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3317 Op1: CI->getArgOperand(i: 1));
3318 } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps.") ||
3319 Name.starts_with(Prefix: "vcvtph2ps.")) {
3320 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3321 Rep = CI->getArgOperand(i: 0);
3322 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3323 unsigned NumDstElts = DstTy->getNumElements();
3324 if (NumDstElts != SrcTy->getNumElements()) {
3325 assert(NumDstElts == 4 && "Unexpected vector size");
3326 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3});
3327 }
3328 Rep = Builder.CreateBitCast(
3329 V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts));
3330 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps");
3331 if (CI->arg_size() >= 3)
3332 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3333 Op1: CI->getArgOperand(i: 1));
3334 } else if (Name.starts_with(Prefix: "avx512.mask.load")) {
3335 // "avx512.mask.loadu." or "avx512.mask.load."
3336 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3337 Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1),
3338 Mask: CI->getArgOperand(i: 2), Aligned);
3339 } else if (Name.starts_with(Prefix: "avx512.mask.expand.load.")) {
3340 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3341 auto *PtrTy = CI->getOperand(i_nocapture: 0)->getType();
3342 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3343 NumElts: ResultTy->getNumElements());
3344 Rep = Builder.CreateIntrinsic(
3345 ID: Intrinsic::masked_expandload, OverloadTypes: {ResultTy, PtrTy},
3346 Args: {CI->getOperand(i_nocapture: 0), MaskVec, CI->getOperand(i_nocapture: 1)});
3347 } else if (Name.starts_with(Prefix: "avx512.mask.compress.store.")) {
3348 auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType());
3349 auto *PtrTy = CI->getArgOperand(i: 0)->getType();
3350 Value *MaskVec =
3351 getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3352 NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
3353 Rep = Builder.CreateIntrinsic(
3354 ID: Intrinsic::masked_compressstore, OverloadTypes: {ResultTy, PtrTy},
3355 Args: {CI->getArgOperand(i: 1), CI->getArgOperand(i: 0), MaskVec});
3356 } else if (Name.starts_with(Prefix: "avx512.mask.compress.") ||
3357 Name.starts_with(Prefix: "avx512.mask.expand.")) {
3358 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3359
3360 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3361 NumElts: ResultTy->getNumElements());
3362
3363 bool IsCompress = Name[12] == 'c';
3364 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3365 : Intrinsic::x86_avx512_mask_expand;
3366 Rep = Builder.CreateIntrinsic(
3367 ID: IID, OverloadTypes: ResultTy, Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec});
3368 } else if (Name.starts_with(Prefix: "xop.vpcom")) {
3369 bool IsSigned;
3370 if (Name.ends_with(Suffix: "ub") || Name.ends_with(Suffix: "uw") || Name.ends_with(Suffix: "ud") ||
3371 Name.ends_with(Suffix: "uq"))
3372 IsSigned = false;
3373 else if (Name.ends_with(Suffix: "b") || Name.ends_with(Suffix: "w") ||
3374 Name.ends_with(Suffix: "d") || Name.ends_with(Suffix: "q"))
3375 IsSigned = true;
3376 else
3377 reportFatalUsageErrorWithCI(reason: "Intrinsic has unknown suffix", CI);
3378
3379 unsigned Imm;
3380 if (CI->arg_size() == 3) {
3381 Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3382 } else {
3383 Name = Name.substr(Start: 9); // strip off "xop.vpcom"
3384 if (Name.starts_with(Prefix: "lt"))
3385 Imm = 0;
3386 else if (Name.starts_with(Prefix: "le"))
3387 Imm = 1;
3388 else if (Name.starts_with(Prefix: "gt"))
3389 Imm = 2;
3390 else if (Name.starts_with(Prefix: "ge"))
3391 Imm = 3;
3392 else if (Name.starts_with(Prefix: "eq"))
3393 Imm = 4;
3394 else if (Name.starts_with(Prefix: "ne"))
3395 Imm = 5;
3396 else if (Name.starts_with(Prefix: "false"))
3397 Imm = 6;
3398 else if (Name.starts_with(Prefix: "true"))
3399 Imm = 7;
3400 else
3401 llvm_unreachable("Unknown condition");
3402 }
3403
3404 Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned);
3405 } else if (Name.starts_with(Prefix: "xop.vpcmov")) {
3406 Value *Sel = CI->getArgOperand(i: 2);
3407 Value *NotSel = Builder.CreateNot(V: Sel);
3408 Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel);
3409 Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel);
3410 Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1);
3411 } else if (Name.starts_with(Prefix: "xop.vprot") || Name.starts_with(Prefix: "avx512.prol") ||
3412 Name.starts_with(Prefix: "avx512.mask.prol")) {
3413 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false);
3414 } else if (Name.starts_with(Prefix: "avx512.pror") ||
3415 Name.starts_with(Prefix: "avx512.mask.pror")) {
3416 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true);
3417 } else if (Name.starts_with(Prefix: "avx512.vpshld.") ||
3418 Name.starts_with(Prefix: "avx512.mask.vpshld") ||
3419 Name.starts_with(Prefix: "avx512.maskz.vpshld")) {
3420 bool ZeroMask = Name[11] == 'z';
3421 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask);
3422 } else if (Name.starts_with(Prefix: "avx512.vpshrd.") ||
3423 Name.starts_with(Prefix: "avx512.mask.vpshrd") ||
3424 Name.starts_with(Prefix: "avx512.maskz.vpshrd")) {
3425 bool ZeroMask = Name[11] == 'z';
3426 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask);
3427 } else if (Name == "sse42.crc32.64.8") {
3428 Value *Trunc0 =
3429 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C));
3430 Rep = Builder.CreateIntrinsic(ID: Intrinsic::x86_sse42_crc32_32_8,
3431 Args: {Trunc0, CI->getArgOperand(i: 1)});
3432 Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "");
3433 } else if (Name.starts_with(Prefix: "avx.vbroadcast.s") ||
3434 Name.starts_with(Prefix: "avx512.vbroadcast.s")) {
3435 // Replace broadcasts with a series of insertelements.
3436 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3437 Type *EltTy = VecTy->getElementType();
3438 unsigned EltNum = VecTy->getNumElements();
3439 Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0));
3440 Type *I32Ty = Type::getInt32Ty(C);
3441 Rep = PoisonValue::get(T: VecTy);
3442 for (unsigned I = 0; I < EltNum; ++I)
3443 Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I));
3444 } else if (Name.starts_with(Prefix: "sse41.pmovsx") ||
3445 Name.starts_with(Prefix: "sse41.pmovzx") ||
3446 Name.starts_with(Prefix: "avx2.pmovsx") ||
3447 Name.starts_with(Prefix: "avx2.pmovzx") ||
3448 Name.starts_with(Prefix: "avx512.mask.pmovsx") ||
3449 Name.starts_with(Prefix: "avx512.mask.pmovzx")) {
3450 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3451 unsigned NumDstElts = DstTy->getNumElements();
3452
3453 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3454 SmallVector<int, 8> ShuffleMask(NumDstElts);
3455 for (unsigned i = 0; i != NumDstElts; ++i)
3456 ShuffleMask[i] = i;
3457
3458 Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3459
3460 bool DoSext = Name.contains(Other: "pmovsx");
3461 Rep =
3462 DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy);
3463 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3464 if (CI->arg_size() == 3)
3465 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3466 Op1: CI->getArgOperand(i: 1));
3467 } else if (Name == "avx512.mask.pmov.qd.256" ||
3468 Name == "avx512.mask.pmov.qd.512" ||
3469 Name == "avx512.mask.pmov.wb.256" ||
3470 Name == "avx512.mask.pmov.wb.512") {
3471 Type *Ty = CI->getArgOperand(i: 1)->getType();
3472 Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty);
3473 Rep =
3474 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3475 } else if (Name.starts_with(Prefix: "avx.vbroadcastf128") ||
3476 Name == "avx2.vbroadcasti128") {
3477 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3478 Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType();
3479 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3480 auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts);
3481 Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
3482 if (NumSrcElts == 2)
3483 Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1});
3484 else
3485 Rep = Builder.CreateShuffleVector(V: Load,
3486 Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3487 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i") ||
3488 Name.starts_with(Prefix: "avx512.mask.shuf.f")) {
3489 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3490 Type *VT = CI->getType();
3491 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3492 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3493 unsigned ControlBitsMask = NumLanes - 1;
3494 unsigned NumControlBits = NumLanes / 2;
3495 SmallVector<int, 8> ShuffleMask(0);
3496
3497 for (unsigned l = 0; l != NumLanes; ++l) {
3498 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3499 // We actually need the other source.
3500 if (l >= NumLanes / 2)
3501 LaneMask += NumLanes;
3502 for (unsigned i = 0; i != NumElementsInLane; ++i)
3503 ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i);
3504 }
3505 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3506 V2: CI->getArgOperand(i: 1), Mask: ShuffleMask);
3507 Rep =
3508 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3509 } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf") ||
3510 Name.starts_with(Prefix: "avx512.mask.broadcasti")) {
3511 unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType())
3512 ->getNumElements();
3513 unsigned NumDstElts =
3514 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3515
3516 SmallVector<int, 8> ShuffleMask(NumDstElts);
3517 for (unsigned i = 0; i != NumDstElts; ++i)
3518 ShuffleMask[i] = i % NumSrcElts;
3519
3520 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3521 V2: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3522 Rep =
3523 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3524 } else if (Name.starts_with(Prefix: "avx2.pbroadcast") ||
3525 Name.starts_with(Prefix: "avx2.vbroadcast") ||
3526 Name.starts_with(Prefix: "avx512.pbroadcast") ||
3527 Name.starts_with(Prefix: "avx512.mask.broadcast.s")) {
3528 // Replace vp?broadcasts with a vector shuffle.
3529 Value *Op = CI->getArgOperand(i: 0);
3530 ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount();
3531 Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC);
3532 SmallVector<int, 8> M;
3533 ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M);
3534 Rep = Builder.CreateShuffleVector(V: Op, Mask: M);
3535
3536 if (CI->arg_size() == 3)
3537 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3538 Op1: CI->getArgOperand(i: 1));
3539 } else if (Name.starts_with(Prefix: "sse2.padds.") ||
3540 Name.starts_with(Prefix: "avx2.padds.") ||
3541 Name.starts_with(Prefix: "avx512.padds.") ||
3542 Name.starts_with(Prefix: "avx512.mask.padds.")) {
3543 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat);
3544 } else if (Name.starts_with(Prefix: "sse2.psubs.") ||
3545 Name.starts_with(Prefix: "avx2.psubs.") ||
3546 Name.starts_with(Prefix: "avx512.psubs.") ||
3547 Name.starts_with(Prefix: "avx512.mask.psubs.")) {
3548 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat);
3549 } else if (Name.starts_with(Prefix: "sse2.paddus.") ||
3550 Name.starts_with(Prefix: "avx2.paddus.") ||
3551 Name.starts_with(Prefix: "avx512.mask.paddus.")) {
3552 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat);
3553 } else if (Name.starts_with(Prefix: "sse2.psubus.") ||
3554 Name.starts_with(Prefix: "avx2.psubus.") ||
3555 Name.starts_with(Prefix: "avx512.mask.psubus.")) {
3556 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat);
3557 } else if (Name.starts_with(Prefix: "avx512.mask.palignr.")) {
3558 Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0),
3559 Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2),
3560 Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3561 IsVALIGN: false);
3562 } else if (Name.starts_with(Prefix: "avx512.mask.valign.")) {
3563 Rep = upgradeX86ALIGNIntrinsics(
3564 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3565 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true);
3566 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3567 // 128/256-bit shift left specified in bits.
3568 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3569 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3570 Shift: Shift / 8); // Shift is in bits.
3571 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3572 // 128/256-bit shift right specified in bits.
3573 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3574 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3575 Shift: Shift / 8); // Shift is in bits.
3576 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3577 Name == "avx512.psll.dq.512") {
3578 // 128/256/512-bit shift left specified in bytes.
3579 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3580 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3581 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3582 Name == "avx512.psrl.dq.512") {
3583 // 128/256/512-bit shift right specified in bytes.
3584 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3585 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3586 } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp") ||
3587 Name.starts_with(Prefix: "avx.blend.p") || Name == "avx2.pblendw" ||
3588 Name.starts_with(Prefix: "avx2.pblendd.")) {
3589 Value *Op0 = CI->getArgOperand(i: 0);
3590 Value *Op1 = CI->getArgOperand(i: 1);
3591 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3592 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3593 unsigned NumElts = VecTy->getNumElements();
3594
3595 SmallVector<int, 16> Idxs(NumElts);
3596 for (unsigned i = 0; i != NumElts; ++i)
3597 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3598
3599 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3600 } else if (Name.starts_with(Prefix: "avx.vinsertf128.") ||
3601 Name == "avx2.vinserti128" ||
3602 Name.starts_with(Prefix: "avx512.mask.insert")) {
3603 Value *Op0 = CI->getArgOperand(i: 0);
3604 Value *Op1 = CI->getArgOperand(i: 1);
3605 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3606 unsigned DstNumElts =
3607 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3608 unsigned SrcNumElts =
3609 cast<FixedVectorType>(Val: Op1->getType())->getNumElements();
3610 unsigned Scale = DstNumElts / SrcNumElts;
3611
3612 // Mask off the high bits of the immediate value; hardware ignores those.
3613 Imm = Imm % Scale;
3614
3615 // Extend the second operand into a vector the size of the destination.
3616 SmallVector<int, 8> Idxs(DstNumElts);
3617 for (unsigned i = 0; i != SrcNumElts; ++i)
3618 Idxs[i] = i;
3619 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3620 Idxs[i] = SrcNumElts;
3621 Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs);
3622
3623 // Insert the second operand into the first operand.
3624
3625 // Note that there is no guarantee that instruction lowering will actually
3626 // produce a vinsertf128 instruction for the created shuffles. In
3627 // particular, the 0 immediate case involves no lane changes, so it can
3628 // be handled as a blend.
3629
3630 // Example of shuffle mask for 32-bit elements:
3631 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3632 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3633
3634 // First fill with identify mask.
3635 for (unsigned i = 0; i != DstNumElts; ++i)
3636 Idxs[i] = i;
3637 // Then replace the elements where we need to insert.
3638 for (unsigned i = 0; i != SrcNumElts; ++i)
3639 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3640 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs);
3641
3642 // If the intrinsic has a mask operand, handle that.
3643 if (CI->arg_size() == 5)
3644 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3645 Op1: CI->getArgOperand(i: 3));
3646 } else if (Name.starts_with(Prefix: "avx.vextractf128.") ||
3647 Name == "avx2.vextracti128" ||
3648 Name.starts_with(Prefix: "avx512.mask.vextract")) {
3649 Value *Op0 = CI->getArgOperand(i: 0);
3650 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3651 unsigned DstNumElts =
3652 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3653 unsigned SrcNumElts =
3654 cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
3655 unsigned Scale = SrcNumElts / DstNumElts;
3656
3657 // Mask off the high bits of the immediate value; hardware ignores those.
3658 Imm = Imm % Scale;
3659
3660 // Get indexes for the subvector of the input vector.
3661 SmallVector<int, 8> Idxs(DstNumElts);
3662 for (unsigned i = 0; i != DstNumElts; ++i) {
3663 Idxs[i] = i + (Imm * DstNumElts);
3664 }
3665 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3666
3667 // If the intrinsic has a mask operand, handle that.
3668 if (CI->arg_size() == 4)
3669 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3670 Op1: CI->getArgOperand(i: 2));
3671 } else if (Name.starts_with(Prefix: "avx512.mask.perm.df.") ||
3672 Name.starts_with(Prefix: "avx512.mask.perm.di.")) {
3673 Value *Op0 = CI->getArgOperand(i: 0);
3674 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3675 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3676 unsigned NumElts = VecTy->getNumElements();
3677
3678 SmallVector<int, 8> Idxs(NumElts);
3679 for (unsigned i = 0; i != NumElts; ++i)
3680 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3681
3682 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3683
3684 if (CI->arg_size() == 4)
3685 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3686 Op1: CI->getArgOperand(i: 2));
3687 } else if (Name.starts_with(Prefix: "avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3688 // The immediate permute control byte looks like this:
3689 // [1:0] - select 128 bits from sources for low half of destination
3690 // [2] - ignore
3691 // [3] - zero low half of destination
3692 // [5:4] - select 128 bits from sources for high half of destination
3693 // [6] - ignore
3694 // [7] - zero high half of destination
3695
3696 uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3697
3698 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3699 unsigned HalfSize = NumElts / 2;
3700 SmallVector<int, 8> ShuffleMask(NumElts);
3701
3702 // Determine which operand(s) are actually in use for this instruction.
3703 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3704 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3705
3706 // If needed, replace operands based on zero mask.
3707 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0;
3708 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1;
3709
3710 // Permute low half of result.
3711 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3712 for (unsigned i = 0; i < HalfSize; ++i)
3713 ShuffleMask[i] = StartIndex + i;
3714
3715 // Permute high half of result.
3716 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3717 for (unsigned i = 0; i < HalfSize; ++i)
3718 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3719
3720 Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask);
3721
3722 } else if (Name.starts_with(Prefix: "avx.vpermil.") || Name == "sse2.pshuf.d" ||
3723 Name.starts_with(Prefix: "avx512.mask.vpermil.p") ||
3724 Name.starts_with(Prefix: "avx512.mask.pshuf.d.")) {
3725 Value *Op0 = CI->getArgOperand(i: 0);
3726 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3727 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3728 unsigned NumElts = VecTy->getNumElements();
3729 // Calculate the size of each index in the immediate.
3730 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3731 unsigned IdxMask = ((1 << IdxSize) - 1);
3732
3733 SmallVector<int, 8> Idxs(NumElts);
3734 // Lookup the bits for this element, wrapping around the immediate every
3735 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3736 // to offset by the first index of each group.
3737 for (unsigned i = 0; i != NumElts; ++i)
3738 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3739
3740 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3741
3742 if (CI->arg_size() == 4)
3743 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3744 Op1: CI->getArgOperand(i: 2));
3745 } else if (Name == "sse2.pshufl.w" ||
3746 Name.starts_with(Prefix: "avx512.mask.pshufl.w.")) {
3747 Value *Op0 = CI->getArgOperand(i: 0);
3748 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3749 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3750
3751 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3752 reportFatalUsageErrorWithCI(reason: "Intrinsic has invalid signature", CI);
3753
3754 SmallVector<int, 16> Idxs(NumElts);
3755 for (unsigned l = 0; l != NumElts; l += 8) {
3756 for (unsigned i = 0; i != 4; ++i)
3757 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3758 for (unsigned i = 4; i != 8; ++i)
3759 Idxs[i + l] = i + l;
3760 }
3761
3762 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3763
3764 if (CI->arg_size() == 4)
3765 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3766 Op1: CI->getArgOperand(i: 2));
3767 } else if (Name == "sse2.pshufh.w" ||
3768 Name.starts_with(Prefix: "avx512.mask.pshufh.w.")) {
3769 Value *Op0 = CI->getArgOperand(i: 0);
3770 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3771 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3772
3773 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3774 reportFatalUsageErrorWithCI(reason: "Intrinsic has invalid signature", CI);
3775
3776 SmallVector<int, 16> Idxs(NumElts);
3777 for (unsigned l = 0; l != NumElts; l += 8) {
3778 for (unsigned i = 0; i != 4; ++i)
3779 Idxs[i + l] = i + l;
3780 for (unsigned i = 0; i != 4; ++i)
3781 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3782 }
3783
3784 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3785
3786 if (CI->arg_size() == 4)
3787 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3788 Op1: CI->getArgOperand(i: 2));
3789 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p")) {
3790 Value *Op0 = CI->getArgOperand(i: 0);
3791 Value *Op1 = CI->getArgOperand(i: 1);
3792 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3793 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3794
3795 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3796 unsigned HalfLaneElts = NumLaneElts / 2;
3797
3798 SmallVector<int, 16> Idxs(NumElts);
3799 for (unsigned i = 0; i != NumElts; ++i) {
3800 // Base index is the starting element of the lane.
3801 Idxs[i] = i - (i % NumLaneElts);
3802 // If we are half way through the lane switch to the other source.
3803 if ((i % NumLaneElts) >= HalfLaneElts)
3804 Idxs[i] += NumElts;
3805 // Now select the specific element. By adding HalfLaneElts bits from
3806 // the immediate. Wrapping around the immediate every 8-bits.
3807 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3808 }
3809
3810 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3811
3812 Rep =
3813 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3814 } else if (Name.starts_with(Prefix: "avx512.mask.movddup") ||
3815 Name.starts_with(Prefix: "avx512.mask.movshdup") ||
3816 Name.starts_with(Prefix: "avx512.mask.movsldup")) {
3817 Value *Op0 = CI->getArgOperand(i: 0);
3818 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3819 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3820
3821 unsigned Offset = 0;
3822 if (Name.starts_with(Prefix: "avx512.mask.movshdup."))
3823 Offset = 1;
3824
3825 SmallVector<int, 16> Idxs(NumElts);
3826 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3827 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3828 Idxs[i + l + 0] = i + l + Offset;
3829 Idxs[i + l + 1] = i + l + Offset;
3830 }
3831
3832 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3833
3834 Rep =
3835 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3836 } else if (Name.starts_with(Prefix: "avx512.mask.punpckl") ||
3837 Name.starts_with(Prefix: "avx512.mask.unpckl.")) {
3838 Value *Op0 = CI->getArgOperand(i: 0);
3839 Value *Op1 = CI->getArgOperand(i: 1);
3840 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3841 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3842
3843 SmallVector<int, 64> Idxs(NumElts);
3844 for (int l = 0; l != NumElts; l += NumLaneElts)
3845 for (int i = 0; i != NumLaneElts; ++i)
3846 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3847
3848 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3849
3850 Rep =
3851 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3852 } else if (Name.starts_with(Prefix: "avx512.mask.punpckh") ||
3853 Name.starts_with(Prefix: "avx512.mask.unpckh.")) {
3854 Value *Op0 = CI->getArgOperand(i: 0);
3855 Value *Op1 = CI->getArgOperand(i: 1);
3856 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3857 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3858
3859 SmallVector<int, 64> Idxs(NumElts);
3860 for (int l = 0; l != NumElts; l += NumLaneElts)
3861 for (int i = 0; i != NumLaneElts; ++i)
3862 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3863
3864 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3865
3866 Rep =
3867 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3868 } else if (Name.starts_with(Prefix: "avx512.mask.and.") ||
3869 Name.starts_with(Prefix: "avx512.mask.pand.")) {
3870 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3871 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3872 Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3873 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3874 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3875 Rep =
3876 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3877 } else if (Name.starts_with(Prefix: "avx512.mask.andn.") ||
3878 Name.starts_with(Prefix: "avx512.mask.pandn.")) {
3879 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3880 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3881 Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy));
3882 Rep = Builder.CreateAnd(LHS: Rep,
3883 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3884 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3885 Rep =
3886 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3887 } else if (Name.starts_with(Prefix: "avx512.mask.or.") ||
3888 Name.starts_with(Prefix: "avx512.mask.por.")) {
3889 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3890 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3891 Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3892 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3893 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3894 Rep =
3895 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3896 } else if (Name.starts_with(Prefix: "avx512.mask.xor.") ||
3897 Name.starts_with(Prefix: "avx512.mask.pxor.")) {
3898 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3899 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3900 Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3901 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3902 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3903 Rep =
3904 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3905 } else if (Name.starts_with(Prefix: "avx512.mask.padd.")) {
3906 Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3907 Rep =
3908 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3909 } else if (Name.starts_with(Prefix: "avx512.mask.psub.")) {
3910 Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3911 Rep =
3912 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3913 } else if (Name.starts_with(Prefix: "avx512.mask.pmull.")) {
3914 Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3915 Rep =
3916 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3917 } else if (Name.starts_with(Prefix: "avx512.mask.add.p")) {
3918 if (Name.ends_with(Suffix: ".512")) {
3919 Intrinsic::ID IID;
3920 if (Name[17] == 's')
3921 IID = Intrinsic::x86_avx512_add_ps_512;
3922 else
3923 IID = Intrinsic::x86_avx512_add_pd_512;
3924
3925 Rep = Builder.CreateIntrinsic(
3926 ID: IID,
3927 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3928 } else {
3929 Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3930 }
3931 Rep =
3932 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3933 } else if (Name.starts_with(Prefix: "avx512.mask.div.p")) {
3934 if (Name.ends_with(Suffix: ".512")) {
3935 Intrinsic::ID IID;
3936 if (Name[17] == 's')
3937 IID = Intrinsic::x86_avx512_div_ps_512;
3938 else
3939 IID = Intrinsic::x86_avx512_div_pd_512;
3940
3941 Rep = Builder.CreateIntrinsic(
3942 ID: IID,
3943 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3944 } else {
3945 Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3946 }
3947 Rep =
3948 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3949 } else if (Name.starts_with(Prefix: "avx512.mask.mul.p")) {
3950 if (Name.ends_with(Suffix: ".512")) {
3951 Intrinsic::ID IID;
3952 if (Name[17] == 's')
3953 IID = Intrinsic::x86_avx512_mul_ps_512;
3954 else
3955 IID = Intrinsic::x86_avx512_mul_pd_512;
3956
3957 Rep = Builder.CreateIntrinsic(
3958 ID: IID,
3959 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3960 } else {
3961 Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3962 }
3963 Rep =
3964 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3965 } else if (Name.starts_with(Prefix: "avx512.mask.sub.p")) {
3966 if (Name.ends_with(Suffix: ".512")) {
3967 Intrinsic::ID IID;
3968 if (Name[17] == 's')
3969 IID = Intrinsic::x86_avx512_sub_ps_512;
3970 else
3971 IID = Intrinsic::x86_avx512_sub_pd_512;
3972
3973 Rep = Builder.CreateIntrinsic(
3974 ID: IID,
3975 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3976 } else {
3977 Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3978 }
3979 Rep =
3980 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3981 } else if ((Name.starts_with(Prefix: "avx512.mask.max.p") ||
3982 Name.starts_with(Prefix: "avx512.mask.min.p")) &&
3983 Name.drop_front(N: 18) == ".512") {
3984 bool IsDouble = Name[17] == 'd';
3985 bool IsMin = Name[13] == 'i';
3986 static const Intrinsic::ID MinMaxTbl[2][2] = {
3987 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3988 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3989 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3990
3991 Rep = Builder.CreateIntrinsic(
3992 ID: IID,
3993 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3994 Rep =
3995 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3996 } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt.")) {
3997 Rep =
3998 Builder.CreateIntrinsic(ID: Intrinsic::ctlz, OverloadTypes: CI->getType(),
3999 Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)});
4000 Rep =
4001 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
4002 } else if (Name.starts_with(Prefix: "avx512.mask.psll")) {
4003 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4004 bool IsVariable = Name[16] == 'v';
4005 char Size = Name[16] == '.' ? Name[17]
4006 : Name[17] == '.' ? Name[18]
4007 : Name[18] == '.' ? Name[19]
4008 : Name[20];
4009
4010 Intrinsic::ID IID;
4011 if (IsVariable && Name[17] != '.') {
4012 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
4013 IID = Intrinsic::x86_avx2_psllv_q;
4014 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
4015 IID = Intrinsic::x86_avx2_psllv_q_256;
4016 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
4017 IID = Intrinsic::x86_avx2_psllv_d;
4018 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
4019 IID = Intrinsic::x86_avx2_psllv_d_256;
4020 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
4021 IID = Intrinsic::x86_avx512_psllv_w_128;
4022 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
4023 IID = Intrinsic::x86_avx512_psllv_w_256;
4024 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
4025 IID = Intrinsic::x86_avx512_psllv_w_512;
4026 else
4027 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4028 } else if (Name.ends_with(Suffix: ".128")) {
4029 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
4030 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4031 : Intrinsic::x86_sse2_psll_d;
4032 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
4033 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4034 : Intrinsic::x86_sse2_psll_q;
4035 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
4036 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4037 : Intrinsic::x86_sse2_psll_w;
4038 else
4039 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4040 } else if (Name.ends_with(Suffix: ".256")) {
4041 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
4042 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4043 : Intrinsic::x86_avx2_psll_d;
4044 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
4045 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4046 : Intrinsic::x86_avx2_psll_q;
4047 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
4048 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4049 : Intrinsic::x86_avx2_psll_w;
4050 else
4051 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4052 } else {
4053 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
4054 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4055 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4056 : Intrinsic::x86_avx512_psll_d_512;
4057 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
4058 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4059 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4060 : Intrinsic::x86_avx512_psll_q_512;
4061 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
4062 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4063 : Intrinsic::x86_avx512_psll_w_512;
4064 else
4065 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4066 }
4067
4068 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4069 } else if (Name.starts_with(Prefix: "avx512.mask.psrl")) {
4070 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4071 bool IsVariable = Name[16] == 'v';
4072 char Size = Name[16] == '.' ? Name[17]
4073 : Name[17] == '.' ? Name[18]
4074 : Name[18] == '.' ? Name[19]
4075 : Name[20];
4076
4077 Intrinsic::ID IID;
4078 if (IsVariable && Name[17] != '.') {
4079 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
4080 IID = Intrinsic::x86_avx2_psrlv_q;
4081 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4082 IID = Intrinsic::x86_avx2_psrlv_q_256;
4083 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4084 IID = Intrinsic::x86_avx2_psrlv_d;
4085 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4086 IID = Intrinsic::x86_avx2_psrlv_d_256;
4087 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4088 IID = Intrinsic::x86_avx512_psrlv_w_128;
4089 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4090 IID = Intrinsic::x86_avx512_psrlv_w_256;
4091 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4092 IID = Intrinsic::x86_avx512_psrlv_w_512;
4093 else
4094 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4095 } else if (Name.ends_with(Suffix: ".128")) {
4096 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4097 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4098 : Intrinsic::x86_sse2_psrl_d;
4099 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4100 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4101 : Intrinsic::x86_sse2_psrl_q;
4102 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4103 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4104 : Intrinsic::x86_sse2_psrl_w;
4105 else
4106 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4107 } else if (Name.ends_with(Suffix: ".256")) {
4108 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4109 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4110 : Intrinsic::x86_avx2_psrl_d;
4111 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4112 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4113 : Intrinsic::x86_avx2_psrl_q;
4114 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4115 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4116 : Intrinsic::x86_avx2_psrl_w;
4117 else
4118 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4119 } else {
4120 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4121 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4122 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4123 : Intrinsic::x86_avx512_psrl_d_512;
4124 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4125 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4126 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4127 : Intrinsic::x86_avx512_psrl_q_512;
4128 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4129 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4130 : Intrinsic::x86_avx512_psrl_w_512;
4131 else
4132 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4133 }
4134
4135 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4136 } else if (Name.starts_with(Prefix: "avx512.mask.psra")) {
4137 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4138 bool IsVariable = Name[16] == 'v';
4139 char Size = Name[16] == '.' ? Name[17]
4140 : Name[17] == '.' ? Name[18]
4141 : Name[18] == '.' ? Name[19]
4142 : Name[20];
4143
4144 Intrinsic::ID IID;
4145 if (IsVariable && Name[17] != '.') {
4146 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4147 IID = Intrinsic::x86_avx2_psrav_d;
4148 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4149 IID = Intrinsic::x86_avx2_psrav_d_256;
4150 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4151 IID = Intrinsic::x86_avx512_psrav_w_128;
4152 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4153 IID = Intrinsic::x86_avx512_psrav_w_256;
4154 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4155 IID = Intrinsic::x86_avx512_psrav_w_512;
4156 else
4157 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4158 } else if (Name.ends_with(Suffix: ".128")) {
4159 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4160 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4161 : Intrinsic::x86_sse2_psra_d;
4162 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4163 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4164 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4165 : Intrinsic::x86_avx512_psra_q_128;
4166 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4167 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4168 : Intrinsic::x86_sse2_psra_w;
4169 else
4170 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4171 } else if (Name.ends_with(Suffix: ".256")) {
4172 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4173 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4174 : Intrinsic::x86_avx2_psra_d;
4175 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4176 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4177 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4178 : Intrinsic::x86_avx512_psra_q_256;
4179 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4180 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4181 : Intrinsic::x86_avx2_psra_w;
4182 else
4183 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4184 } else {
4185 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4186 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4187 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4188 : Intrinsic::x86_avx512_psra_d_512;
4189 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4190 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4191 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4192 : Intrinsic::x86_avx512_psra_q_512;
4193 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4194 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4195 : Intrinsic::x86_avx512_psra_w_512;
4196 else
4197 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4198 }
4199
4200 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4201 } else if (Name.starts_with(Prefix: "avx512.mask.move.s")) {
4202 Rep = upgradeMaskedMove(Builder, CI&: *CI);
4203 } else if (Name.starts_with(Prefix: "avx512.cvtmask2")) {
4204 Rep = upgradeMaskToInt(Builder, CI&: *CI);
4205 } else if (Name.ends_with(Suffix: ".movntdqa")) {
4206 MDNode *Node = MDNode::get(
4207 Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
4208
4209 LoadInst *LI = Builder.CreateAlignedLoad(
4210 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0),
4211 Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
4212 LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
4213 Rep = LI;
4214 } else if (Name.starts_with(Prefix: "fma.vfmadd.") ||
4215 Name.starts_with(Prefix: "fma.vfmsub.") ||
4216 Name.starts_with(Prefix: "fma.vfnmadd.") ||
4217 Name.starts_with(Prefix: "fma.vfnmsub.")) {
4218 bool NegMul = Name[6] == 'n';
4219 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4220 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4221
4222 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4223 CI->getArgOperand(i: 2)};
4224
4225 if (IsScalar) {
4226 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4227 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4228 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4229 }
4230
4231 if (NegMul && !IsScalar)
4232 Ops[0] = Builder.CreateFNeg(V: Ops[0]);
4233 if (NegMul && IsScalar)
4234 Ops[1] = Builder.CreateFNeg(V: Ops[1]);
4235 if (NegAcc)
4236 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4237
4238 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, OverloadTypes: Ops[0]->getType(), Args: Ops);
4239
4240 if (IsScalar)
4241 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
4242 } else if (Name.starts_with(Prefix: "fma4.vfmadd.s")) {
4243 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4244 CI->getArgOperand(i: 2)};
4245
4246 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4247 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4248 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4249
4250 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, OverloadTypes: Ops[0]->getType(), Args: Ops);
4251
4252 Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()),
4253 NewElt: Rep, Idx: (uint64_t)0);
4254 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s") ||
4255 Name.starts_with(Prefix: "avx512.maskz.vfmadd.s") ||
4256 Name.starts_with(Prefix: "avx512.mask3.vfmadd.s") ||
4257 Name.starts_with(Prefix: "avx512.mask3.vfmsub.s") ||
4258 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s")) {
4259 bool IsMask3 = Name[11] == '3';
4260 bool IsMaskZ = Name[11] == 'z';
4261 // Drop the "avx512.mask." to make it easier.
4262 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4263 bool NegMul = Name[2] == 'n';
4264 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4265
4266 Value *A = CI->getArgOperand(i: 0);
4267 Value *B = CI->getArgOperand(i: 1);
4268 Value *C = CI->getArgOperand(i: 2);
4269
4270 if (NegMul && (IsMask3 || IsMaskZ))
4271 A = Builder.CreateFNeg(V: A);
4272 if (NegMul && !(IsMask3 || IsMaskZ))
4273 B = Builder.CreateFNeg(V: B);
4274 if (NegAcc)
4275 C = Builder.CreateFNeg(V: C);
4276
4277 A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0);
4278 B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
4279 C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0);
4280
4281 if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4282 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) {
4283 Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)};
4284
4285 Intrinsic::ID IID;
4286 if (Name.back() == 'd')
4287 IID = Intrinsic::x86_avx512_vfmadd_f64;
4288 else
4289 IID = Intrinsic::x86_avx512_vfmadd_f32;
4290 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4291 } else {
4292 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4293 }
4294
4295 Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType())
4296 : IsMask3 ? C
4297 : A;
4298
4299 // For Mask3 with NegAcc, we need to create a new extractelement that
4300 // avoids the negation above.
4301 if (NegAcc && IsMask3)
4302 PassThru =
4303 Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0);
4304
4305 Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4306 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep,
4307 Idx: (uint64_t)0);
4308 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p") ||
4309 Name.starts_with(Prefix: "avx512.mask.vfnmadd.p") ||
4310 Name.starts_with(Prefix: "avx512.mask.vfnmsub.p") ||
4311 Name.starts_with(Prefix: "avx512.mask3.vfmadd.p") ||
4312 Name.starts_with(Prefix: "avx512.mask3.vfmsub.p") ||
4313 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p") ||
4314 Name.starts_with(Prefix: "avx512.maskz.vfmadd.p")) {
4315 bool IsMask3 = Name[11] == '3';
4316 bool IsMaskZ = Name[11] == 'z';
4317 // Drop the "avx512.mask." to make it easier.
4318 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4319 bool NegMul = Name[2] == 'n';
4320 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4321
4322 Value *A = CI->getArgOperand(i: 0);
4323 Value *B = CI->getArgOperand(i: 1);
4324 Value *C = CI->getArgOperand(i: 2);
4325
4326 if (NegMul && (IsMask3 || IsMaskZ))
4327 A = Builder.CreateFNeg(V: A);
4328 if (NegMul && !(IsMask3 || IsMaskZ))
4329 B = Builder.CreateFNeg(V: B);
4330 if (NegAcc)
4331 C = Builder.CreateFNeg(V: C);
4332
4333 if (CI->arg_size() == 5 &&
4334 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4335 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) {
4336 Intrinsic::ID IID;
4337 // Check the character before ".512" in string.
4338 if (Name[Name.size() - 5] == 's')
4339 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4340 else
4341 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4342
4343 Rep = Builder.CreateIntrinsic(ID: IID, Args: {A, B, C, CI->getArgOperand(i: 4)});
4344 } else {
4345 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4346 }
4347
4348 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4349 : IsMask3 ? CI->getArgOperand(i: 2)
4350 : CI->getArgOperand(i: 0);
4351
4352 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4353 } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p")) {
4354 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4355 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4356 Intrinsic::ID IID;
4357 if (VecWidth == 128 && EltWidth == 32)
4358 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4359 else if (VecWidth == 256 && EltWidth == 32)
4360 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4361 else if (VecWidth == 128 && EltWidth == 64)
4362 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4363 else if (VecWidth == 256 && EltWidth == 64)
4364 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4365 else
4366 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4367
4368 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4369 CI->getArgOperand(i: 2)};
4370 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4371 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4372 } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p") ||
4373 Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p") ||
4374 Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p") ||
4375 Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p")) {
4376 bool IsMask3 = Name[11] == '3';
4377 bool IsMaskZ = Name[11] == 'z';
4378 // Drop the "avx512.mask." to make it easier.
4379 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4380 bool IsSubAdd = Name[3] == 's';
4381 if (CI->arg_size() == 5) {
4382 Intrinsic::ID IID;
4383 // Check the character before ".512" in string.
4384 if (Name[Name.size() - 5] == 's')
4385 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4386 else
4387 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4388
4389 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4390 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
4391 if (IsSubAdd)
4392 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4393
4394 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4395 } else {
4396 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4397
4398 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4399 CI->getArgOperand(i: 2)};
4400
4401 Function *FMA = Intrinsic::getOrInsertDeclaration(
4402 M: CI->getModule(), id: Intrinsic::fma, OverloadTys: Ops[0]->getType());
4403 Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops);
4404 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4405 Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops);
4406
4407 if (IsSubAdd)
4408 std::swap(a&: Even, b&: Odd);
4409
4410 SmallVector<int, 32> Idxs(NumElts);
4411 for (int i = 0; i != NumElts; ++i)
4412 Idxs[i] = i + (i % 2) * NumElts;
4413
4414 Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs);
4415 }
4416
4417 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4418 : IsMask3 ? CI->getArgOperand(i: 2)
4419 : CI->getArgOperand(i: 0);
4420
4421 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4422 } else if (Name.starts_with(Prefix: "avx512.mask.pternlog.") ||
4423 Name.starts_with(Prefix: "avx512.maskz.pternlog.")) {
4424 bool ZeroMask = Name[11] == 'z';
4425 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4426 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4427 Intrinsic::ID IID;
4428 if (VecWidth == 128 && EltWidth == 32)
4429 IID = Intrinsic::x86_avx512_pternlog_d_128;
4430 else if (VecWidth == 256 && EltWidth == 32)
4431 IID = Intrinsic::x86_avx512_pternlog_d_256;
4432 else if (VecWidth == 512 && EltWidth == 32)
4433 IID = Intrinsic::x86_avx512_pternlog_d_512;
4434 else if (VecWidth == 128 && EltWidth == 64)
4435 IID = Intrinsic::x86_avx512_pternlog_q_128;
4436 else if (VecWidth == 256 && EltWidth == 64)
4437 IID = Intrinsic::x86_avx512_pternlog_q_256;
4438 else if (VecWidth == 512 && EltWidth == 64)
4439 IID = Intrinsic::x86_avx512_pternlog_q_512;
4440 else
4441 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4442
4443 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4444 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)};
4445 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4446 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4447 : CI->getArgOperand(i: 0);
4448 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru);
4449 } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52") ||
4450 Name.starts_with(Prefix: "avx512.maskz.vpmadd52")) {
4451 bool ZeroMask = Name[11] == 'z';
4452 bool High = Name[20] == 'h' || Name[21] == 'h';
4453 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4454 Intrinsic::ID IID;
4455 if (VecWidth == 128 && !High)
4456 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4457 else if (VecWidth == 256 && !High)
4458 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4459 else if (VecWidth == 512 && !High)
4460 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4461 else if (VecWidth == 128 && High)
4462 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4463 else if (VecWidth == 256 && High)
4464 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4465 else if (VecWidth == 512 && High)
4466 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4467 else
4468 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4469
4470 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4471 CI->getArgOperand(i: 2)};
4472 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4473 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4474 : CI->getArgOperand(i: 0);
4475 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4476 } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var.") ||
4477 Name.starts_with(Prefix: "avx512.mask.vpermt2var.") ||
4478 Name.starts_with(Prefix: "avx512.maskz.vpermt2var.")) {
4479 bool ZeroMask = Name[11] == 'z';
4480 bool IndexForm = Name[17] == 'i';
4481 Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm);
4482 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd.") ||
4483 Name.starts_with(Prefix: "avx512.maskz.vpdpbusd.") ||
4484 Name.starts_with(Prefix: "avx512.mask.vpdpbusds.") ||
4485 Name.starts_with(Prefix: "avx512.maskz.vpdpbusds.")) {
4486 bool ZeroMask = Name[11] == 'z';
4487 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4488 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4489 Intrinsic::ID IID;
4490 if (VecWidth == 128 && !IsSaturating)
4491 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4492 else if (VecWidth == 256 && !IsSaturating)
4493 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4494 else if (VecWidth == 512 && !IsSaturating)
4495 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4496 else if (VecWidth == 128 && IsSaturating)
4497 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4498 else if (VecWidth == 256 && IsSaturating)
4499 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4500 else if (VecWidth == 512 && IsSaturating)
4501 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4502 else
4503 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4504
4505 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4506 CI->getArgOperand(i: 2)};
4507
4508 // Input arguments types were incorrectly set to vectors of i32 before but
4509 // they should be vectors of i8. Insert bit cast when encountering the old
4510 // types
4511 if (Args[1]->getType()->isVectorTy() &&
4512 cast<VectorType>(Val: Args[1]->getType())
4513 ->getElementType()
4514 ->isIntegerTy(BitWidth: 32) &&
4515 Args[2]->getType()->isVectorTy() &&
4516 cast<VectorType>(Val: Args[2]->getType())
4517 ->getElementType()
4518 ->isIntegerTy(BitWidth: 32)) {
4519 Type *NewArgType = nullptr;
4520 if (VecWidth == 128)
4521 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 16, Scalable: false);
4522 else if (VecWidth == 256)
4523 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 32, Scalable: false);
4524 else if (VecWidth == 512)
4525 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 64, Scalable: false);
4526 else
4527 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected vector bit width",
4528 CI);
4529
4530 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4531 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4532 }
4533
4534 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4535 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4536 : CI->getArgOperand(i: 0);
4537 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4538 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd.") ||
4539 Name.starts_with(Prefix: "avx512.maskz.vpdpwssd.") ||
4540 Name.starts_with(Prefix: "avx512.mask.vpdpwssds.") ||
4541 Name.starts_with(Prefix: "avx512.maskz.vpdpwssds.")) {
4542 bool ZeroMask = Name[11] == 'z';
4543 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4544 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4545 Intrinsic::ID IID;
4546 if (VecWidth == 128 && !IsSaturating)
4547 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4548 else if (VecWidth == 256 && !IsSaturating)
4549 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4550 else if (VecWidth == 512 && !IsSaturating)
4551 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4552 else if (VecWidth == 128 && IsSaturating)
4553 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4554 else if (VecWidth == 256 && IsSaturating)
4555 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4556 else if (VecWidth == 512 && IsSaturating)
4557 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4558 else
4559 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4560
4561 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4562 CI->getArgOperand(i: 2)};
4563
4564 // Input arguments types were incorrectly set to vectors of i32 before but
4565 // they should be vectors of i16. Insert bit cast when encountering the old
4566 // types
4567 if (Args[1]->getType()->isVectorTy() &&
4568 cast<VectorType>(Val: Args[1]->getType())
4569 ->getElementType()
4570 ->isIntegerTy(BitWidth: 32) &&
4571 Args[2]->getType()->isVectorTy() &&
4572 cast<VectorType>(Val: Args[2]->getType())
4573 ->getElementType()
4574 ->isIntegerTy(BitWidth: 32)) {
4575 Type *NewArgType = nullptr;
4576 if (VecWidth == 128)
4577 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 8, Scalable: false);
4578 else if (VecWidth == 256)
4579 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 16, Scalable: false);
4580 else if (VecWidth == 512)
4581 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 32, Scalable: false);
4582 else
4583 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected vector bit width",
4584 CI);
4585
4586 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4587 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4588 }
4589
4590 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4591 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4592 : CI->getArgOperand(i: 0);
4593 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4594 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4595 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4596 Name == "subborrow.u32" || Name == "subborrow.u64") {
4597 Intrinsic::ID IID;
4598 if (Name[0] == 'a' && Name.back() == '2')
4599 IID = Intrinsic::x86_addcarry_32;
4600 else if (Name[0] == 'a' && Name.back() == '4')
4601 IID = Intrinsic::x86_addcarry_64;
4602 else if (Name[0] == 's' && Name.back() == '2')
4603 IID = Intrinsic::x86_subborrow_32;
4604 else if (Name[0] == 's' && Name.back() == '4')
4605 IID = Intrinsic::x86_subborrow_64;
4606 else
4607 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4608
4609 // Make a call with 3 operands.
4610 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4611 CI->getArgOperand(i: 2)};
4612 Value *NewCall = Builder.CreateIntrinsic(ID: IID, Args);
4613
4614 // Extract the second result and store it.
4615 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4616 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 3), Align: Align(1));
4617 // Replace the original call result with the first result of the new call.
4618 Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4619
4620 CI->replaceAllUsesWith(V: CF);
4621 Rep = nullptr;
4622 } else if (Name.starts_with(Prefix: "avx512.mask.") &&
4623 upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) {
4624 // Rep will be updated by the call in the condition.
4625 } else if (Name.starts_with(Prefix: "bmi.pdep.")) {
4626 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::pdep);
4627 } else if (Name.starts_with(Prefix: "bmi.pext.")) {
4628 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::pext);
4629 } else
4630 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4631
4632 return Rep;
4633}
4634
4635static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4636 Function *F, IRBuilder<> &Builder) {
4637 if (Name.starts_with(Prefix: "neon.bfcvt")) {
4638 if (Name.starts_with(Prefix: "neon.bfcvtn2")) {
4639 SmallVector<int, 32> LoMask(4);
4640 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
4641 SmallVector<int, 32> ConcatMask(8);
4642 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4643 Value *Inactive = Builder.CreateShuffleVector(V: CI->getOperand(i_nocapture: 0), Mask: LoMask);
4644 Value *Trunc =
4645 Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 1), DestTy: Inactive->getType());
4646 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
4647 } else if (Name.starts_with(Prefix: "neon.bfcvtn")) {
4648 SmallVector<int, 32> ConcatMask(8);
4649 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4650 Type *V4BF16 =
4651 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: F->getContext()), NumElts: 4);
4652 Value *Trunc = Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), DestTy: V4BF16);
4653 dbgs() << "Trunc: " << *Trunc << "\n";
4654 return Builder.CreateShuffleVector(
4655 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
4656 } else {
4657 return Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0),
4658 DestTy: Type::getBFloatTy(C&: F->getContext()));
4659 }
4660 } else if (Name.starts_with(Prefix: "sve.fcvt")) {
4661 Intrinsic::ID NewID =
4662 StringSwitch<Intrinsic::ID>(Name)
4663 .Case(S: "sve.fcvt.bf16f32", Value: Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4664 .Case(S: "sve.fcvtnt.bf16f32",
4665 Value: Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4666 .Default(Value: Intrinsic::not_intrinsic);
4667 if (NewID == Intrinsic::not_intrinsic)
4668 llvm_unreachable("Unhandled Intrinsic!");
4669
4670 SmallVector<Value *, 3> Args(CI->args());
4671
4672 // The original intrinsics incorrectly used a predicate based on the
4673 // smallest element type rather than the largest.
4674 Type *BadPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
4675 Type *GoodPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
4676
4677 if (Args[1]->getType() != BadPredTy)
4678 llvm_unreachable("Unexpected predicate type!");
4679
4680 Args[1] = Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_to_svbool,
4681 OverloadTypes: BadPredTy, Args: Args[1]);
4682 Args[1] = Builder.CreateIntrinsic(
4683 ID: Intrinsic::aarch64_sve_convert_from_svbool, OverloadTypes: GoodPredTy, Args: Args[1]);
4684
4685 return Builder.CreateIntrinsic(ID: NewID, Args, /*FMFSource=*/nullptr,
4686 Name: CI->getName());
4687 }
4688
4689 if (Name == "neon.vcvtfp2hf")
4690 return Builder.CreateBitCast(
4691 V: Builder.CreateFPTrunc(
4692 V: CI->getOperand(i_nocapture: 0),
4693 DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C&: F->getContext()), NumElts: 4)),
4694 DestTy: FixedVectorType::get(ElementType: Type::getInt16Ty(C&: F->getContext()), NumElts: 4));
4695 if (Name == "neon.vcvthf2fp")
4696 return Builder.CreateFPExt(
4697 V: Builder.CreateBitCast(
4698 V: CI->getOperand(i_nocapture: 0),
4699 DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C&: F->getContext()), NumElts: 4)),
4700 DestTy: FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4));
4701
4702 llvm_unreachable("Unhandled Intrinsic!");
4703}
4704
4705static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4706 IRBuilder<> &Builder) {
4707 if (Name == "mve.vctp64.old") {
4708 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4709 // correct type.
4710 Value *VCTP = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_vctp64, OverloadTypes: {},
4711 Args: CI->getArgOperand(i: 0),
4712 /*FMFSource=*/nullptr, Name: CI->getName());
4713 Value *C1 = Builder.CreateIntrinsic(
4714 ID: Intrinsic::arm_mve_pred_v2i,
4715 OverloadTypes: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}, Args: VCTP);
4716 return Builder.CreateIntrinsic(
4717 ID: Intrinsic::arm_mve_pred_i2v,
4718 OverloadTypes: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: C1);
4719 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4720 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4721 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4722 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4723 Name ==
4724 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4725 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4726 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4727 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4728 Name ==
4729 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4730 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4731 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4732 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4733 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4734 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4735 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4736 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4737 std::vector<Type *> Tys;
4738 unsigned ID = CI->getIntrinsicID();
4739 Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2);
4740 switch (ID) {
4741 case Intrinsic::arm_mve_mull_int_predicated:
4742 case Intrinsic::arm_mve_vqdmull_predicated:
4743 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4744 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty};
4745 break;
4746 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4747 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4748 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4749 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4750 V2I1Ty};
4751 break;
4752 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4753 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4754 CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4755 break;
4756 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4757 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(),
4758 CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty};
4759 break;
4760 case Intrinsic::arm_cde_vcx1q_predicated:
4761 case Intrinsic::arm_cde_vcx1qa_predicated:
4762 case Intrinsic::arm_cde_vcx2q_predicated:
4763 case Intrinsic::arm_cde_vcx2qa_predicated:
4764 case Intrinsic::arm_cde_vcx3q_predicated:
4765 case Intrinsic::arm_cde_vcx3qa_predicated:
4766 Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4767 break;
4768 default:
4769 llvm_unreachable("Unhandled Intrinsic!");
4770 }
4771
4772 std::vector<Value *> Ops;
4773 for (Value *Op : CI->args()) {
4774 Type *Ty = Op->getType();
4775 if (Ty->getScalarSizeInBits() == 1) {
4776 Value *C1 = Builder.CreateIntrinsic(
4777 ID: Intrinsic::arm_mve_pred_v2i,
4778 OverloadTypes: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: Op);
4779 Op = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_pred_i2v, OverloadTypes: {V2I1Ty}, Args: C1);
4780 }
4781 Ops.push_back(x: Op);
4782 }
4783
4784 return Builder.CreateIntrinsic(ID, OverloadTypes: Tys, Args: Ops, /*FMFSource=*/nullptr,
4785 Name: CI->getName());
4786 }
4787 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4788}
4789
4790// These are expected to have the arguments:
4791// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4792//
4793// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4794//
4795static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4796 Function *F, IRBuilder<> &Builder) {
4797 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4798 // for compatibility.
4799 auto UpgradeLegacyWMMAIUIntrinsicCall =
4800 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4801 ArrayRef<Type *> OverloadTys) -> Value * {
4802 // Prepare arguments, append clamp=0 for compatibility
4803 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4804 Args.push_back(Elt: Builder.getFalse());
4805
4806 // Insert the declaration for the right overload types
4807 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
4808 M: F->getParent(), id: F->getIntrinsicID(), OverloadTys);
4809
4810 // Copy operand bundles if any
4811 SmallVector<OperandBundleDef, 1> Bundles;
4812 CI->getOperandBundlesAsDefs(Defs&: Bundles);
4813
4814 // Create the new call and copy calling properties
4815 auto *NewCall = cast<CallInst>(Val: Builder.CreateCall(Callee: NewDecl, Args, OpBundles: Bundles));
4816 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4817 NewCall->setCallingConv(CI->getCallingConv());
4818 NewCall->setAttributes(CI->getAttributes());
4819 NewCall->setDebugLoc(CI->getDebugLoc());
4820 NewCall->copyMetadata(SrcInst: *CI);
4821 return NewCall;
4822 };
4823
4824 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4825 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4826 "intrinsic should have 7 arguments");
4827 Type *T1 = CI->getArgOperand(i: 4)->getType();
4828 Type *T2 = CI->getArgOperand(i: 1)->getType();
4829 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4830 }
4831 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4832 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4833 "intrinsic should have 8 arguments");
4834 Type *T1 = CI->getArgOperand(i: 4)->getType();
4835 Type *T2 = CI->getArgOperand(i: 1)->getType();
4836 Type *T3 = CI->getArgOperand(i: 3)->getType();
4837 Type *T4 = CI->getArgOperand(i: 5)->getType();
4838 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4839 }
4840
4841 switch (F->getIntrinsicID()) {
4842 default:
4843 break;
4844 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4845 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4846 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4847 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4848 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4849 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4850 // Drop src0 and src1 modifiers.
4851 const Value *Op0 = CI->getArgOperand(i: 0);
4852 const Value *Op2 = CI->getArgOperand(i: 2);
4853 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4854 const ConstantInt *ModA = dyn_cast<ConstantInt>(Val: Op0);
4855 const ConstantInt *ModB = dyn_cast<ConstantInt>(Val: Op2);
4856 if (!ModA->isZero() || !ModB->isZero())
4857 reportFatalUsageError(reason: Name + " matrix A and B modifiers shall be zero");
4858
4859 SmallVector<Value *, 8> Args{CI->getArgOperand(i: 1), CI->getArgOperand(i: 3)};
4860 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4861 Args.push_back(Elt: CI->getArgOperand(i: I));
4862
4863 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4864 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4865 Overloads.push_back(Elt: Args[3]->getType());
4866 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
4867 M: F->getParent(), id: F->getIntrinsicID(), OverloadTys: Overloads);
4868
4869 SmallVector<OperandBundleDef, 1> Bundles;
4870 CI->getOperandBundlesAsDefs(Defs&: Bundles);
4871
4872 auto *NewCall = cast<CallInst>(Val: Builder.CreateCall(Callee: NewDecl, Args, OpBundles: Bundles));
4873 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4874 NewCall->setCallingConv(CI->getCallingConv());
4875 NewCall->setAttributes(CI->getAttributes());
4876 NewCall->setDebugLoc(CI->getDebugLoc());
4877 NewCall->copyMetadata(SrcInst: *CI);
4878 NewCall->takeName(V: CI);
4879 return NewCall;
4880 }
4881 }
4882
4883 AtomicRMWInst::BinOp RMWOp =
4884 StringSwitch<AtomicRMWInst::BinOp>(Name)
4885 .StartsWith(S: "ds.fadd", Value: AtomicRMWInst::FAdd)
4886 .StartsWith(S: "ds.fmin", Value: AtomicRMWInst::FMin)
4887 .StartsWith(S: "ds.fmax", Value: AtomicRMWInst::FMax)
4888 .StartsWith(S: "atomic.inc.", Value: AtomicRMWInst::UIncWrap)
4889 .StartsWith(S: "atomic.dec.", Value: AtomicRMWInst::UDecWrap)
4890 .StartsWith(S: "global.atomic.fadd", Value: AtomicRMWInst::FAdd)
4891 .StartsWith(S: "flat.atomic.fadd", Value: AtomicRMWInst::FAdd)
4892 .StartsWith(S: "global.atomic.fmin", Value: AtomicRMWInst::FMin)
4893 .StartsWith(S: "flat.atomic.fmin", Value: AtomicRMWInst::FMin)
4894 .StartsWith(S: "global.atomic.fmax", Value: AtomicRMWInst::FMax)
4895 .StartsWith(S: "flat.atomic.fmax", Value: AtomicRMWInst::FMax)
4896 .StartsWith(S: "atomic.cond.sub", Value: AtomicRMWInst::USubCond)
4897 .StartsWith(S: "atomic.csub", Value: AtomicRMWInst::USubSat);
4898
4899 unsigned NumOperands = CI->getNumOperands();
4900 if (NumOperands < 3) // Malformed bitcode.
4901 return nullptr;
4902
4903 Value *Ptr = CI->getArgOperand(i: 0);
4904 PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType());
4905 if (!PtrTy) // Malformed.
4906 return nullptr;
4907
4908 Value *Val = CI->getArgOperand(i: 1);
4909 if (Val->getType() != CI->getType()) // Malformed.
4910 return nullptr;
4911
4912 ConstantInt *OrderArg = nullptr;
4913 bool IsVolatile = false;
4914
4915 // These should have 5 arguments (plus the callee). A separate version of the
4916 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4917 if (NumOperands > 3)
4918 OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4919
4920 // Ignore scope argument at 3
4921
4922 if (NumOperands > 5) {
4923 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4));
4924 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4925 }
4926
4927 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4928 if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue()))
4929 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4930 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4931 Order = AtomicOrdering::SequentiallyConsistent;
4932
4933 LLVMContext &Ctx = F->getContext();
4934
4935 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4936 Type *RetTy = CI->getType();
4937 if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) {
4938 if (VT->getElementType()->isIntegerTy(BitWidth: 16)) {
4939 VectorType *AsBF16 =
4940 VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount());
4941 Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16);
4942 }
4943 }
4944
4945 // The scope argument never really worked correctly. Use agent as the most
4946 // conservative option which should still always produce the instruction.
4947 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent");
4948 AtomicRMWInst *RMW =
4949 Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID);
4950
4951 unsigned AddrSpace = PtrTy->getAddressSpace();
4952 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4953 MDNode *EmptyMD = MDNode::get(Context&: F->getContext(), MDs: {});
4954 RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory", Node: EmptyMD);
4955 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4956 RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: EmptyMD);
4957 }
4958
4959 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4960 MDBuilder MDB(F->getContext());
4961 MDNode *RangeNotPrivate =
4962 MDB.createRange(Lo: APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4963 Hi: APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4964 RMW->setMetadata(KindID: LLVMContext::MD_noalias_addrspace, Node: RangeNotPrivate);
4965 }
4966
4967 if (IsVolatile)
4968 RMW->setVolatile(true);
4969
4970 return Builder.CreateBitCast(V: RMW, DestTy: RetTy);
4971}
4972
4973/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4974/// plain MDNode, as it's the verifier's job to check these are the correct
4975/// types later.
4976static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4977 if (Op < CI->arg_size()) {
4978 if (MetadataAsValue *MAV =
4979 dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) {
4980 Metadata *MD = MAV->getMetadata();
4981 return dyn_cast_if_present<MDNode>(Val: MD);
4982 }
4983 }
4984 return nullptr;
4985}
4986
4987/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4988static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4989 if (Op < CI->arg_size())
4990 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op)))
4991 return MAV->getMetadata();
4992 return nullptr;
4993}
4994
4995/// Convert debug intrinsic calls to non-instruction debug records.
4996/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4997/// \p CI - The debug intrinsic call.
4998static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4999 DbgRecord *DR = nullptr;
5000 if (Name == "label") {
5001 DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(Label: unwrapMAVOp(CI, Op: 0));
5002 } else if (Name == "assign") {
5003 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
5004 Type: DbgVariableRecord::LocationType::Assign, Val: unwrapMAVMetadataOp(CI, Op: 0),
5005 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: unwrapMAVOp(CI, Op: 3),
5006 Address: unwrapMAVMetadataOp(CI, Op: 4),
5007 /*The address is a Value ref, it will be stored as a Metadata */
5008 AddressExpression: unwrapMAVOp(CI, Op: 5));
5009 } else if (Name == "declare") {
5010 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
5011 Type: DbgVariableRecord::LocationType::Declare, Val: unwrapMAVMetadataOp(CI, Op: 0),
5012 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: nullptr, Address: nullptr, AddressExpression: nullptr);
5013 } else if (Name == "addr") {
5014 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
5015 MDNode *ExprNode = unwrapMAVOp(CI, Op: 2);
5016 // Don't try to add something to the expression if it's not an expression.
5017 // Instead, allow the verifier to fail later.
5018 if (DIExpression *Expr = dyn_cast<DIExpression>(Val: ExprNode)) {
5019 ExprNode = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
5020 }
5021 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
5022 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
5023 Variable: unwrapMAVOp(CI, Op: 1), Expression: ExprNode, AssignID: nullptr, Address: nullptr, AddressExpression: nullptr);
5024 } else if (Name == "value") {
5025 // An old version of dbg.value had an extra offset argument.
5026 unsigned VarOp = 1;
5027 unsigned ExprOp = 2;
5028 if (CI->arg_size() == 4) {
5029 auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1));
5030 // Nonzero offset dbg.values get dropped without a replacement.
5031 if (!Offset || !Offset->isNullValue())
5032 return;
5033 VarOp = 2;
5034 ExprOp = 3;
5035 }
5036 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
5037 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
5038 Variable: unwrapMAVOp(CI, Op: VarOp), Expression: unwrapMAVOp(CI, Op: ExprOp), AssignID: nullptr, Address: nullptr,
5039 AddressExpression: nullptr);
5040 }
5041 DR->setDebugLoc(CI->getDebugLoc());
5042 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
5043 CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator());
5044}
5045
5046static Value *upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder) {
5047 auto *Offset = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
5048 if (!Offset)
5049 reportFatalUsageError(reason: "Invalid llvm.vector.splice offset argument");
5050 int64_t OffsetVal = Offset->getSExtValue();
5051 return Builder.CreateIntrinsic(ID: OffsetVal >= 0
5052 ? Intrinsic::vector_splice_left
5053 : Intrinsic::vector_splice_right,
5054 OverloadTypes: CI->getType(),
5055 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5056 Builder.getInt32(C: std::abs(i: OffsetVal))});
5057}
5058
5059static Value *upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI,
5060 Function *F, IRBuilder<> &Builder) {
5061 if (Name.starts_with(Prefix: "to.fp16")) {
5062 Value *Cast =
5063 Builder.CreateFPTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
5064 return Builder.CreateBitCast(V: Cast, DestTy: CI->getType());
5065 }
5066
5067 if (Name.starts_with(Prefix: "from.fp16")) {
5068 Value *Cast =
5069 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
5070 return Builder.CreateFPExt(V: Cast, DestTy: CI->getType());
5071 }
5072
5073 return nullptr;
5074}
5075
5076/// Upgrade a call to an old intrinsic. All argument and return casting must be
5077/// provided to seamlessly integrate with existing context.
5078void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
5079 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
5080 // checks the callee's function type matches. It's likely we need to handle
5081 // type changes here.
5082 Function *F = dyn_cast<Function>(Val: CI->getCalledOperand());
5083 if (!F)
5084 return;
5085
5086 LLVMContext &C = CI->getContext();
5087 IRBuilder<> Builder(C);
5088 if (isa<FPMathOperator>(Val: CI))
5089 Builder.setFastMathFlags(CI->getFastMathFlags());
5090 Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator());
5091
5092 if (!NewFn) {
5093 // Get the Function's name.
5094 StringRef Name = F->getName();
5095 if (!Name.consume_front(Prefix: "llvm."))
5096 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5097
5098 bool IsX86 = Name.consume_front(Prefix: "x86.");
5099 bool IsNVVM = Name.consume_front(Prefix: "nvvm.");
5100 bool IsAArch64 = Name.consume_front(Prefix: "aarch64.");
5101 bool IsARM = Name.consume_front(Prefix: "arm.");
5102 bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn.");
5103 bool IsDbg = Name.consume_front(Prefix: "dbg.");
5104 bool IsOldSplice =
5105 (Name.consume_front(Prefix: "experimental.vector.splice") ||
5106 Name.consume_front(Prefix: "vector.splice")) &&
5107 !(Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"));
5108 Value *Rep = nullptr;
5109
5110 if (!IsX86 && Name == "stackprotectorcheck") {
5111 Rep = nullptr;
5112 } else if (IsNVVM) {
5113 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5114 } else if (IsX86) {
5115 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5116 } else if (IsAArch64) {
5117 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5118 } else if (IsARM) {
5119 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5120 } else if (IsAMDGCN) {
5121 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5122 } else if (IsDbg) {
5123 upgradeDbgIntrinsicToDbgRecord(Name, CI);
5124 } else if (IsOldSplice) {
5125 Rep = upgradeVectorSplice(CI, Builder);
5126 } else if (Name.consume_front(Prefix: "convert.")) {
5127 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5128 } else {
5129 llvm_unreachable("Unknown function for CallBase upgrade.");
5130 }
5131
5132 if (Rep)
5133 CI->replaceAllUsesWith(V: Rep);
5134 CI->eraseFromParent();
5135 return;
5136 }
5137
5138 const auto &DefaultCase = [&]() -> void {
5139 if (F == NewFn)
5140 return;
5141
5142 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5143 // Handle generic mangling change.
5144 assert(
5145 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5146 "Unknown function for CallBase upgrade and isn't just a name change");
5147 CI->setCalledFunction(NewFn);
5148 return;
5149 }
5150
5151 // This must be an upgrade from a named to a literal struct.
5152 if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) {
5153 assert(OldST != NewFn->getReturnType() &&
5154 "Return type must have changed");
5155 assert(OldST->getNumElements() ==
5156 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5157 "Must have same number of elements");
5158
5159 SmallVector<Value *> Args(CI->args());
5160 CallInst *NewCI = Builder.CreateCall(Callee: NewFn, Args);
5161 NewCI->setAttributes(CI->getAttributes());
5162 Value *Res = PoisonValue::get(T: OldST);
5163 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5164 Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx);
5165 Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx);
5166 }
5167 CI->replaceAllUsesWith(V: Res);
5168 CI->eraseFromParent();
5169 return;
5170 }
5171
5172 // We're probably about to produce something invalid. Let the verifier catch
5173 // it instead of dying here.
5174 CI->setCalledOperand(
5175 ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType()));
5176 return;
5177 };
5178 CallInst *NewCall = nullptr;
5179 switch (NewFn->getIntrinsicID()) {
5180 default: {
5181 DefaultCase();
5182 return;
5183 }
5184 case Intrinsic::arm_neon_vst1:
5185 case Intrinsic::arm_neon_vst2:
5186 case Intrinsic::arm_neon_vst3:
5187 case Intrinsic::arm_neon_vst4:
5188 case Intrinsic::arm_neon_vst2lane:
5189 case Intrinsic::arm_neon_vst3lane:
5190 case Intrinsic::arm_neon_vst4lane: {
5191 SmallVector<Value *, 4> Args(CI->args());
5192 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5193 break;
5194 }
5195 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5196 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5197 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5198 LLVMContext &Ctx = F->getParent()->getContext();
5199 SmallVector<Value *, 4> Args(CI->args());
5200 Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx),
5201 V: cast<ConstantInt>(Val: Args[3])->getZExtValue());
5202 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5203 break;
5204 }
5205 case Intrinsic::aarch64_sve_ld3_sret:
5206 case Intrinsic::aarch64_sve_ld4_sret:
5207 case Intrinsic::aarch64_sve_ld2_sret: {
5208 // Is this a trivial remangle of the name to support ptr address spaces?
5209 if (isa<StructType>(Val: F->getReturnType())) {
5210 DefaultCase();
5211 return;
5212 }
5213
5214 StringRef Name = F->getName();
5215 Name = Name.substr(Start: 5);
5216 unsigned N = StringSwitch<unsigned>(Name)
5217 .StartsWith(S: "aarch64.sve.ld2", Value: 2)
5218 .StartsWith(S: "aarch64.sve.ld3", Value: 3)
5219 .StartsWith(S: "aarch64.sve.ld4", Value: 4)
5220 .Default(Value: 0);
5221 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5222 unsigned MinElts = RetTy->getMinNumElements() / N;
5223 SmallVector<Value *, 2> Args(CI->args());
5224 Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args);
5225 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5226 for (unsigned I = 0; I < N; I++) {
5227 Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I);
5228 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx: I * MinElts);
5229 }
5230 NewCall = dyn_cast<CallInst>(Val: Ret);
5231 break;
5232 }
5233
5234 case Intrinsic::coro_end: {
5235 SmallVector<Value *, 3> Args(CI->args());
5236 Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext()));
5237 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5238 break;
5239 }
5240
5241 case Intrinsic::vector_extract: {
5242 StringRef Name = F->getName();
5243 Name = Name.substr(Start: 5); // Strip llvm
5244 if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get")) {
5245 DefaultCase();
5246 return;
5247 }
5248 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5249 unsigned MinElts = RetTy->getMinNumElements();
5250 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5251 Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
5252 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx});
5253 break;
5254 }
5255
5256 case Intrinsic::vector_insert: {
5257 StringRef Name = F->getName();
5258 Name = Name.substr(Start: 5);
5259 if (!Name.starts_with(Prefix: "aarch64.sve.tuple")) {
5260 DefaultCase();
5261 return;
5262 }
5263 if (Name.starts_with(Prefix: "aarch64.sve.tuple.set")) {
5264 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5265 auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType());
5266 Value *NewIdx =
5267 ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements());
5268 NewCall = Builder.CreateCall(
5269 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx});
5270 break;
5271 }
5272 if (Name.starts_with(Prefix: "aarch64.sve.tuple.create")) {
5273 unsigned N = StringSwitch<unsigned>(Name)
5274 .StartsWith(S: "aarch64.sve.tuple.create2", Value: 2)
5275 .StartsWith(S: "aarch64.sve.tuple.create3", Value: 3)
5276 .StartsWith(S: "aarch64.sve.tuple.create4", Value: 4)
5277 .Default(Value: 0);
5278 assert(N > 1 && "Create is expected to be between 2-4");
5279 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5280 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5281 unsigned MinElts = RetTy->getMinNumElements() / N;
5282 for (unsigned I = 0; I < N; I++) {
5283 Value *V = CI->getArgOperand(i: I);
5284 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx: I * MinElts);
5285 }
5286 NewCall = dyn_cast<CallInst>(Val: Ret);
5287 }
5288 break;
5289 }
5290
5291 case Intrinsic::arm_neon_bfdot:
5292 case Intrinsic::arm_neon_bfmmla:
5293 case Intrinsic::arm_neon_bfmlalb:
5294 case Intrinsic::arm_neon_bfmlalt:
5295 case Intrinsic::aarch64_neon_bfdot:
5296 case Intrinsic::aarch64_neon_bfmmla:
5297 case Intrinsic::aarch64_neon_bfmlalb:
5298 case Intrinsic::aarch64_neon_bfmlalt: {
5299 SmallVector<Value *, 3> Args;
5300 assert(CI->arg_size() == 3 &&
5301 "Mismatch between function args and call args");
5302 size_t OperandWidth =
5303 CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits();
5304 assert((OperandWidth == 64 || OperandWidth == 128) &&
5305 "Unexpected operand width");
5306 Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16);
5307 auto Iter = CI->args().begin();
5308 Args.push_back(Elt: *Iter++);
5309 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5310 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5311 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5312 break;
5313 }
5314
5315 case Intrinsic::bitreverse:
5316 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5317 break;
5318
5319 case Intrinsic::ctlz:
5320 case Intrinsic::cttz: {
5321 if (CI->arg_size() != 1) {
5322 DefaultCase();
5323 return;
5324 }
5325
5326 NewCall =
5327 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()});
5328 break;
5329 }
5330
5331 case Intrinsic::objectsize: {
5332 Value *NullIsUnknownSize =
5333 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2);
5334 Value *Dynamic =
5335 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3);
5336 NewCall = Builder.CreateCall(
5337 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic});
5338 break;
5339 }
5340
5341 case Intrinsic::ctpop:
5342 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5343 break;
5344 case Intrinsic::dbg_value: {
5345 StringRef Name = F->getName();
5346 Name = Name.substr(Start: 5); // Strip llvm.
5347 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5348 if (Name.starts_with(Prefix: "dbg.addr")) {
5349 DIExpression *Expr = cast<DIExpression>(
5350 Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata());
5351 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
5352 NewCall =
5353 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5354 MetadataAsValue::get(Context&: C, MD: Expr)});
5355 break;
5356 }
5357
5358 // Upgrade from the old version that had an extra offset argument.
5359 assert(CI->arg_size() == 4);
5360 // Drop nonzero offsets instead of attempting to upgrade them.
5361 if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)))
5362 if (Offset->isNullValue()) {
5363 NewCall = Builder.CreateCall(
5364 Callee: NewFn,
5365 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)});
5366 break;
5367 }
5368 CI->eraseFromParent();
5369 return;
5370 }
5371
5372 case Intrinsic::ptr_annotation:
5373 // Upgrade from versions that lacked the annotation attribute argument.
5374 if (CI->arg_size() != 4) {
5375 DefaultCase();
5376 return;
5377 }
5378
5379 // Create a new call with an added null annotation attribute argument.
5380 NewCall = Builder.CreateCall(
5381 Callee: NewFn,
5382 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5383 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5384 NewCall->takeName(V: CI);
5385 CI->replaceAllUsesWith(V: NewCall);
5386 CI->eraseFromParent();
5387 return;
5388
5389 case Intrinsic::var_annotation:
5390 // Upgrade from versions that lacked the annotation attribute argument.
5391 if (CI->arg_size() != 4) {
5392 DefaultCase();
5393 return;
5394 }
5395 // Create a new call with an added null annotation attribute argument.
5396 NewCall = Builder.CreateCall(
5397 Callee: NewFn,
5398 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5399 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5400 NewCall->takeName(V: CI);
5401 CI->replaceAllUsesWith(V: NewCall);
5402 CI->eraseFromParent();
5403 return;
5404
5405 case Intrinsic::riscv_aes32dsi:
5406 case Intrinsic::riscv_aes32dsmi:
5407 case Intrinsic::riscv_aes32esi:
5408 case Intrinsic::riscv_aes32esmi:
5409 case Intrinsic::riscv_sm4ks:
5410 case Intrinsic::riscv_sm4ed: {
5411 // The last argument to these intrinsics used to be i8 and changed to i32.
5412 // The type overload for sm4ks and sm4ed was removed.
5413 Value *Arg2 = CI->getArgOperand(i: 2);
5414 if (Arg2->getType()->isIntegerTy(BitWidth: 32) && !CI->getType()->isIntegerTy(BitWidth: 64))
5415 return;
5416
5417 Value *Arg0 = CI->getArgOperand(i: 0);
5418 Value *Arg1 = CI->getArgOperand(i: 1);
5419 if (CI->getType()->isIntegerTy(BitWidth: 64)) {
5420 Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty());
5421 Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty());
5422 }
5423
5424 Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C),
5425 V: cast<ConstantInt>(Val: Arg2)->getZExtValue());
5426
5427 NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2});
5428 Value *Res = NewCall;
5429 if (Res->getType() != CI->getType())
5430 Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5431 NewCall->takeName(V: CI);
5432 CI->replaceAllUsesWith(V: Res);
5433 CI->eraseFromParent();
5434 return;
5435 }
5436 case Intrinsic::nvvm_mapa_shared_cluster: {
5437 // Create a new call with the correct address space.
5438 NewCall =
5439 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
5440 Value *Res = NewCall;
5441 Res = Builder.CreateAddrSpaceCast(
5442 V: Res, DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED));
5443 NewCall->takeName(V: CI);
5444 CI->replaceAllUsesWith(V: Res);
5445 CI->eraseFromParent();
5446 return;
5447 }
5448 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5449 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5450 // Create a new call with the correct address space.
5451 SmallVector<Value *, 4> Args(CI->args());
5452 Args[0] = Builder.CreateAddrSpaceCast(
5453 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5454
5455 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5456 NewCall->takeName(V: CI);
5457 CI->replaceAllUsesWith(V: NewCall);
5458 CI->eraseFromParent();
5459 return;
5460 }
5461 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5462 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5463 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5464 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5465 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5466 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5467 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5468 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5469 SmallVector<Value *, 16> Args(CI->args());
5470
5471 // Create AddrSpaceCast to shared_cluster if needed.
5472 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5473 unsigned AS = CI->getArgOperand(i: 0)->getType()->getPointerAddressSpace();
5474 if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
5475 Args[0] = Builder.CreateAddrSpaceCast(
5476 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5477
5478 // Attach the flag argument for cta_group, with a
5479 // default value of 0. This handles case (2) in
5480 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5481 size_t NumArgs = CI->arg_size();
5482 Value *FlagArg = CI->getArgOperand(i: NumArgs - 3);
5483 if (!FlagArg->getType()->isIntegerTy(BitWidth: 1))
5484 Args.push_back(Elt: ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0));
5485
5486 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5487 NewCall->takeName(V: CI);
5488 CI->replaceAllUsesWith(V: NewCall);
5489 CI->eraseFromParent();
5490 return;
5491 }
5492 case Intrinsic::riscv_sha256sig0:
5493 case Intrinsic::riscv_sha256sig1:
5494 case Intrinsic::riscv_sha256sum0:
5495 case Intrinsic::riscv_sha256sum1:
5496 case Intrinsic::riscv_sm3p0:
5497 case Intrinsic::riscv_sm3p1: {
5498 // The last argument to these intrinsics used to be i8 and changed to i32.
5499 // The type overload for sm4ks and sm4ed was removed.
5500 if (!CI->getType()->isIntegerTy(BitWidth: 64))
5501 return;
5502
5503 Value *Arg =
5504 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty());
5505
5506 NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg);
5507 Value *Res =
5508 Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5509 NewCall->takeName(V: CI);
5510 CI->replaceAllUsesWith(V: Res);
5511 CI->eraseFromParent();
5512 return;
5513 }
5514
5515 case Intrinsic::x86_xop_vfrcz_ss:
5516 case Intrinsic::x86_xop_vfrcz_sd:
5517 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)});
5518 break;
5519
5520 case Intrinsic::x86_xop_vpermil2pd:
5521 case Intrinsic::x86_xop_vpermil2ps:
5522 case Intrinsic::x86_xop_vpermil2pd_256:
5523 case Intrinsic::x86_xop_vpermil2ps_256: {
5524 SmallVector<Value *, 4> Args(CI->args());
5525 VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType());
5526 VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy);
5527 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy);
5528 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5529 break;
5530 }
5531
5532 case Intrinsic::x86_sse41_ptestc:
5533 case Intrinsic::x86_sse41_ptestz:
5534 case Intrinsic::x86_sse41_ptestnzc: {
5535 // The arguments for these intrinsics used to be v4f32, and changed
5536 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5537 // So, the only thing required is a bitcast for both arguments.
5538 // First, check the arguments have the old type.
5539 Value *Arg0 = CI->getArgOperand(i: 0);
5540 if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4))
5541 return;
5542
5543 // Old intrinsic, add bitcasts
5544 Value *Arg1 = CI->getArgOperand(i: 1);
5545
5546 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
5547
5548 Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast");
5549 Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
5550
5551 NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1});
5552 break;
5553 }
5554
5555 case Intrinsic::x86_rdtscp: {
5556 // This used to take 1 arguments. If we have no arguments, it is already
5557 // upgraded.
5558 if (CI->getNumOperands() == 0)
5559 return;
5560
5561 NewCall = Builder.CreateCall(Callee: NewFn);
5562 // Extract the second result and store it.
5563 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
5564 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
5565 // Replace the original call result with the first result of the new call.
5566 Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
5567
5568 NewCall->takeName(V: CI);
5569 CI->replaceAllUsesWith(V: TSC);
5570 CI->eraseFromParent();
5571 return;
5572 }
5573
5574 case Intrinsic::x86_sse41_insertps:
5575 case Intrinsic::x86_sse41_dppd:
5576 case Intrinsic::x86_sse41_dpps:
5577 case Intrinsic::x86_sse41_mpsadbw:
5578 case Intrinsic::x86_avx_dp_ps_256:
5579 case Intrinsic::x86_avx2_mpsadbw: {
5580 // Need to truncate the last argument from i32 to i8 -- this argument models
5581 // an inherently 8-bit immediate operand to these x86 instructions.
5582 SmallVector<Value *, 4> Args(CI->args());
5583
5584 // Replace the last argument with a trunc.
5585 Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc");
5586 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5587 break;
5588 }
5589
5590 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5591 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5592 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5593 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5594 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5595 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5596 SmallVector<Value *, 4> Args(CI->args());
5597 unsigned NumElts =
5598 cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements();
5599 Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts);
5600
5601 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5602 Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr);
5603
5604 NewCall->takeName(V: CI);
5605 CI->replaceAllUsesWith(V: Res);
5606 CI->eraseFromParent();
5607 return;
5608 }
5609
5610 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5611 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5612 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5613 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5614 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5615 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5616 SmallVector<Value *, 4> Args(CI->args());
5617 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
5618 if (NewFn->getIntrinsicID() ==
5619 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5620 Args[1] = Builder.CreateBitCast(
5621 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5622
5623 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5624 Value *Res = Builder.CreateBitCast(
5625 V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts));
5626
5627 NewCall->takeName(V: CI);
5628 CI->replaceAllUsesWith(V: Res);
5629 CI->eraseFromParent();
5630 return;
5631 }
5632 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5633 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5634 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5635 SmallVector<Value *, 4> Args(CI->args());
5636 unsigned NumElts =
5637 cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2;
5638 Args[1] = Builder.CreateBitCast(
5639 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5640 Args[2] = Builder.CreateBitCast(
5641 V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5642
5643 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5644 break;
5645 }
5646
5647 case Intrinsic::thread_pointer: {
5648 NewCall = Builder.CreateCall(Callee: NewFn, Args: {});
5649 break;
5650 }
5651
5652 case Intrinsic::memcpy:
5653 case Intrinsic::memmove:
5654 case Intrinsic::memset: {
5655 // We have to make sure that the call signature is what we're expecting.
5656 // We only want to change the old signatures by removing the alignment arg:
5657 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5658 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5659 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5660 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5661 // Note: i8*'s in the above can be any pointer type
5662 if (CI->arg_size() != 5) {
5663 DefaultCase();
5664 return;
5665 }
5666 // Remove alignment argument (3), and add alignment attributes to the
5667 // dest/src pointers.
5668 Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5669 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
5670 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5671 AttributeList OldAttrs = CI->getAttributes();
5672 AttributeList NewAttrs = AttributeList::get(
5673 C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(),
5674 ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1),
5675 OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)});
5676 NewCall->setAttributes(NewAttrs);
5677 auto *MemCI = cast<MemIntrinsic>(Val: NewCall);
5678 // All mem intrinsics support dest alignment.
5679 const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
5680 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5681 // Memcpy/Memmove also support source alignment.
5682 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI))
5683 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5684 break;
5685 }
5686
5687 case Intrinsic::masked_load:
5688 case Intrinsic::masked_gather:
5689 case Intrinsic::masked_store:
5690 case Intrinsic::masked_scatter: {
5691 if (CI->arg_size() != 4) {
5692 DefaultCase();
5693 return;
5694 }
5695
5696 auto GetMaybeAlign = [](Value *Op) {
5697 if (auto *CI = dyn_cast<ConstantInt>(Val: Op)) {
5698 uint64_t Val = CI->getZExtValue();
5699 if (Val == 0)
5700 return MaybeAlign();
5701 if (isPowerOf2_64(Value: Val))
5702 return MaybeAlign(Val);
5703 }
5704 reportFatalUsageError(reason: "Invalid alignment argument");
5705 };
5706 auto GetAlign = [&](Value *Op) {
5707 MaybeAlign Align = GetMaybeAlign(Op);
5708 if (Align)
5709 return *Align;
5710 reportFatalUsageError(reason: "Invalid zero alignment argument");
5711 };
5712
5713 const DataLayout &DL = CI->getDataLayout();
5714 switch (NewFn->getIntrinsicID()) {
5715 case Intrinsic::masked_load:
5716 NewCall = Builder.CreateMaskedLoad(
5717 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0), Alignment: GetAlign(CI->getArgOperand(i: 1)),
5718 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5719 break;
5720 case Intrinsic::masked_gather:
5721 NewCall = Builder.CreateMaskedGather(
5722 Ty: CI->getType(), Ptrs: CI->getArgOperand(i: 0),
5723 Alignment: DL.getValueOrABITypeAlignment(Alignment: GetMaybeAlign(CI->getArgOperand(i: 1)),
5724 Ty: CI->getType()->getScalarType()),
5725 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5726 break;
5727 case Intrinsic::masked_store:
5728 NewCall = Builder.CreateMaskedStore(
5729 Val: CI->getArgOperand(i: 0), Ptr: CI->getArgOperand(i: 1),
5730 Alignment: GetAlign(CI->getArgOperand(i: 2)), Mask: CI->getArgOperand(i: 3));
5731 break;
5732 case Intrinsic::masked_scatter:
5733 NewCall = Builder.CreateMaskedScatter(
5734 Val: CI->getArgOperand(i: 0), Ptrs: CI->getArgOperand(i: 1),
5735 Alignment: DL.getValueOrABITypeAlignment(
5736 Alignment: GetMaybeAlign(CI->getArgOperand(i: 2)),
5737 Ty: CI->getArgOperand(i: 0)->getType()->getScalarType()),
5738 Mask: CI->getArgOperand(i: 3));
5739 break;
5740 default:
5741 llvm_unreachable("Unexpected intrinsic ID");
5742 }
5743 // Previous metadata is still valid.
5744 NewCall->copyMetadata(SrcInst: *CI);
5745 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
5746 break;
5747 }
5748
5749 case Intrinsic::lifetime_start:
5750 case Intrinsic::lifetime_end: {
5751 if (CI->arg_size() != 2) {
5752 DefaultCase();
5753 return;
5754 }
5755
5756 Value *Ptr = CI->getArgOperand(i: 1);
5757 // Try to strip pointer casts, such that the lifetime works on an alloca.
5758 Ptr = Ptr->stripPointerCasts();
5759 if (isa<AllocaInst>(Val: Ptr)) {
5760 // Don't use NewFn, as we might have looked through an addrspacecast.
5761 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5762 NewCall = Builder.CreateLifetimeStart(Ptr);
5763 else
5764 NewCall = Builder.CreateLifetimeEnd(Ptr);
5765 break;
5766 }
5767
5768 // Otherwise remove the lifetime marker.
5769 CI->eraseFromParent();
5770 return;
5771 }
5772
5773 case Intrinsic::x86_avx512_vpdpbusd_128:
5774 case Intrinsic::x86_avx512_vpdpbusd_256:
5775 case Intrinsic::x86_avx512_vpdpbusd_512:
5776 case Intrinsic::x86_avx512_vpdpbusds_128:
5777 case Intrinsic::x86_avx512_vpdpbusds_256:
5778 case Intrinsic::x86_avx512_vpdpbusds_512:
5779 case Intrinsic::x86_avx2_vpdpbssd_128:
5780 case Intrinsic::x86_avx2_vpdpbssd_256:
5781 case Intrinsic::x86_avx10_vpdpbssd_512:
5782 case Intrinsic::x86_avx2_vpdpbssds_128:
5783 case Intrinsic::x86_avx2_vpdpbssds_256:
5784 case Intrinsic::x86_avx10_vpdpbssds_512:
5785 case Intrinsic::x86_avx2_vpdpbsud_128:
5786 case Intrinsic::x86_avx2_vpdpbsud_256:
5787 case Intrinsic::x86_avx10_vpdpbsud_512:
5788 case Intrinsic::x86_avx2_vpdpbsuds_128:
5789 case Intrinsic::x86_avx2_vpdpbsuds_256:
5790 case Intrinsic::x86_avx10_vpdpbsuds_512:
5791 case Intrinsic::x86_avx2_vpdpbuud_128:
5792 case Intrinsic::x86_avx2_vpdpbuud_256:
5793 case Intrinsic::x86_avx10_vpdpbuud_512:
5794 case Intrinsic::x86_avx2_vpdpbuuds_128:
5795 case Intrinsic::x86_avx2_vpdpbuuds_256:
5796 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5797 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5798 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5799 CI->getArgOperand(i: 2)};
5800 Type *NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: NumElts, Scalable: false);
5801 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5802 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5803
5804 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5805 break;
5806 }
5807 case Intrinsic::x86_avx512_vpdpwssd_128:
5808 case Intrinsic::x86_avx512_vpdpwssd_256:
5809 case Intrinsic::x86_avx512_vpdpwssd_512:
5810 case Intrinsic::x86_avx512_vpdpwssds_128:
5811 case Intrinsic::x86_avx512_vpdpwssds_256:
5812 case Intrinsic::x86_avx512_vpdpwssds_512:
5813 case Intrinsic::x86_avx2_vpdpwsud_128:
5814 case Intrinsic::x86_avx2_vpdpwsud_256:
5815 case Intrinsic::x86_avx10_vpdpwsud_512:
5816 case Intrinsic::x86_avx2_vpdpwsuds_128:
5817 case Intrinsic::x86_avx2_vpdpwsuds_256:
5818 case Intrinsic::x86_avx10_vpdpwsuds_512:
5819 case Intrinsic::x86_avx2_vpdpwusd_128:
5820 case Intrinsic::x86_avx2_vpdpwusd_256:
5821 case Intrinsic::x86_avx10_vpdpwusd_512:
5822 case Intrinsic::x86_avx2_vpdpwusds_128:
5823 case Intrinsic::x86_avx2_vpdpwusds_256:
5824 case Intrinsic::x86_avx10_vpdpwusds_512:
5825 case Intrinsic::x86_avx2_vpdpwuud_128:
5826 case Intrinsic::x86_avx2_vpdpwuud_256:
5827 case Intrinsic::x86_avx10_vpdpwuud_512:
5828 case Intrinsic::x86_avx2_vpdpwuuds_128:
5829 case Intrinsic::x86_avx2_vpdpwuuds_256:
5830 case Intrinsic::x86_avx10_vpdpwuuds_512:
5831 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5832 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5833 CI->getArgOperand(i: 2)};
5834 Type *NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: NumElts, Scalable: false);
5835 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5836 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5837
5838 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5839 break;
5840 }
5841 assert(NewCall && "Should have either set this variable or returned through "
5842 "the default case");
5843 NewCall->takeName(V: CI);
5844 CI->replaceAllUsesWith(V: NewCall);
5845 CI->eraseFromParent();
5846}
5847
5848void llvm::UpgradeCallsToIntrinsic(Function *F) {
5849 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5850
5851 // Check if this function should be upgraded and get the replacement function
5852 // if there is one.
5853 Function *NewFn;
5854 if (UpgradeIntrinsicFunction(F, NewFn)) {
5855 // Replace all users of the old function with the new function or new
5856 // instructions. This is not a range loop because the call is deleted.
5857 for (User *U : make_early_inc_range(Range: F->users()))
5858 if (CallBase *CB = dyn_cast<CallBase>(Val: U))
5859 UpgradeIntrinsicCall(CI: CB, NewFn);
5860
5861 // Remove old function, no longer used, from the module.
5862 if (F != NewFn)
5863 F->eraseFromParent();
5864 }
5865}
5866
5867MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5868 const unsigned NumOperands = MD.getNumOperands();
5869 if (NumOperands == 0)
5870 return &MD; // Invalid, punt to a verifier error.
5871
5872 // Check if the tag uses struct-path aware TBAA format.
5873 if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3)
5874 return &MD;
5875
5876 auto &Context = MD.getContext();
5877 if (NumOperands == 3) {
5878 Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)};
5879 MDNode *ScalarType = MDNode::get(Context, MDs: Elts);
5880 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5881 Metadata *Elts2[] = {ScalarType, ScalarType,
5882 ConstantAsMetadata::get(
5883 C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))),
5884 MD.getOperand(I: 2)};
5885 return MDNode::get(Context, MDs: Elts2);
5886 }
5887 // Create a MDNode <MD, MD, offset 0>
5888 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue(
5889 Ty: Type::getInt64Ty(C&: Context)))};
5890 return MDNode::get(Context, MDs: Elts);
5891}
5892
5893Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5894 Instruction *&Temp) {
5895 if (Opc != Instruction::BitCast)
5896 return nullptr;
5897
5898 Temp = nullptr;
5899 Type *SrcTy = V->getType();
5900 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5901 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5902 LLVMContext &Context = V->getContext();
5903
5904 // We have no information about target data layout, so we assume that
5905 // the maximum pointer size is 64bit.
5906 Type *MidTy = Type::getInt64Ty(C&: Context);
5907 Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy);
5908
5909 return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy);
5910 }
5911
5912 return nullptr;
5913}
5914
5915Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5916 if (Opc != Instruction::BitCast)
5917 return nullptr;
5918
5919 Type *SrcTy = C->getType();
5920 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5921 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5922 LLVMContext &Context = C->getContext();
5923
5924 // We have no information about target data layout, so we assume that
5925 // the maximum pointer size is 64bit.
5926 Type *MidTy = Type::getInt64Ty(C&: Context);
5927
5928 return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy),
5929 Ty: DestTy);
5930 }
5931
5932 return nullptr;
5933}
5934
5935/// Check the debug info version number, if it is out-dated, drop the debug
5936/// info. Return true if module is modified.
5937bool llvm::UpgradeDebugInfo(Module &M) {
5938 if (DisableAutoUpgradeDebugInfo)
5939 return false;
5940
5941 llvm::TimeTraceScope timeScope("Upgrade debug info");
5942 // We need to get metadata before the module is verified (i.e., getModuleFlag
5943 // makes assumptions that we haven't verified yet). Carefully extract the flag
5944 // from the metadata.
5945 unsigned Version = 0;
5946 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5947 auto OpIt = find_if(Range: ModFlags->operands(), P: [](const MDNode *Flag) {
5948 if (Flag->getNumOperands() < 3)
5949 return false;
5950 if (MDString *K = dyn_cast_or_null<MDString>(Val: Flag->getOperand(I: 1)))
5951 return K->getString() == "Debug Info Version";
5952 return false;
5953 });
5954 if (OpIt != ModFlags->op_end()) {
5955 const MDOperand &ValOp = (*OpIt)->getOperand(I: 2);
5956 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD: ValOp))
5957 Version = CI->getZExtValue();
5958 }
5959 }
5960
5961 if (Version == DEBUG_METADATA_VERSION) {
5962 bool BrokenDebugInfo = false;
5963 if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo))
5964 report_fatal_error(reason: "Broken module found, compilation aborted!");
5965 if (!BrokenDebugInfo)
5966 // Everything is ok.
5967 return false;
5968 else {
5969 // Diagnose malformed debug info.
5970 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5971 M.getContext().diagnose(DI: Diag);
5972 }
5973 }
5974 bool Modified = StripDebugInfo(M);
5975 if (Modified && Version != DEBUG_METADATA_VERSION) {
5976 // Diagnose a version mismatch.
5977 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5978 M.getContext().diagnose(DI: DiagVersion);
5979 }
5980 return Modified;
5981}
5982
5983static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5984 GlobalValue *GV, const Metadata *V) {
5985 Function *F = cast<Function>(Val: GV);
5986
5987 constexpr StringLiteral DefaultValue = "1";
5988 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5989 unsigned Length = 0;
5990
5991 if (F->hasFnAttribute(Kind: Attr)) {
5992 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5993 // parse these elements placing them into Vect3
5994 StringRef S = F->getFnAttribute(Kind: Attr).getValueAsString();
5995 for (; Length < 3 && !S.empty(); Length++) {
5996 auto [Part, Rest] = S.split(Separator: ',');
5997 Vect3[Length] = Part.trim();
5998 S = Rest;
5999 }
6000 }
6001
6002 const unsigned Dim = DimC - 'x';
6003 assert(Dim < 3 && "Unexpected dim char");
6004
6005 const uint64_t VInt = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
6006
6007 // local variable required for StringRef in Vect3 to point to.
6008 const std::string VStr = llvm::utostr(X: VInt);
6009 Vect3[Dim] = VStr;
6010 Length = std::max(a: Length, b: Dim + 1);
6011
6012 const std::string NewAttr = llvm::join(R: ArrayRef(Vect3, Length), Separator: ",");
6013 F->addFnAttr(Kind: Attr, Val: NewAttr);
6014}
6015
6016static inline bool isXYZ(StringRef S) {
6017 return S == "x" || S == "y" || S == "z";
6018}
6019
6020bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
6021 const Metadata *V) {
6022 if (K == "kernel") {
6023 if (!mdconst::extract<ConstantInt>(MD&: V)->isZero())
6024 cast<Function>(Val: GV)->setCallingConv(CallingConv::PTX_Kernel);
6025 return true;
6026 }
6027 if (K == "align") {
6028 // V is a bitfeild specifying two 16-bit values. The alignment value is
6029 // specfied in low 16-bits, The index is specified in the high bits. For the
6030 // index, 0 indicates the return value while higher values correspond to
6031 // each parameter (idx = param + 1).
6032 const uint64_t AlignIdxValuePair =
6033 mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
6034 const unsigned Idx = (AlignIdxValuePair >> 16);
6035 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
6036 cast<Function>(Val: GV)->addAttributeAtIndex(
6037 i: Idx, Attr: Attribute::getWithStackAlignment(Context&: GV->getContext(), Alignment: StackAlign));
6038 return true;
6039 }
6040 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
6041 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
6042 cast<Function>(Val: GV)->addFnAttr(Kind: NVVMAttr::MaxClusterRank, Val: llvm::utostr(X: CV));
6043 return true;
6044 }
6045 if (K == "minctasm") {
6046 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
6047 cast<Function>(Val: GV)->addFnAttr(Kind: NVVMAttr::MinCTASm, Val: llvm::utostr(X: CV));
6048 return true;
6049 }
6050 if (K == "maxnreg") {
6051 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
6052 cast<Function>(Val: GV)->addFnAttr(Kind: NVVMAttr::MaxNReg, Val: llvm::utostr(X: CV));
6053 return true;
6054 }
6055 if (K.consume_front(Prefix: "maxntid") && isXYZ(S: K)) {
6056 upgradeNVVMFnVectorAttr(Attr: NVVMAttr::MaxNTID, DimC: K[0], GV, V);
6057 return true;
6058 }
6059 if (K.consume_front(Prefix: "reqntid") && isXYZ(S: K)) {
6060 upgradeNVVMFnVectorAttr(Attr: NVVMAttr::ReqNTID, DimC: K[0], GV, V);
6061 return true;
6062 }
6063 if (K.consume_front(Prefix: "cluster_dim_") && isXYZ(S: K)) {
6064 upgradeNVVMFnVectorAttr(Attr: NVVMAttr::ClusterDim, DimC: K[0], GV, V);
6065 return true;
6066 }
6067 if (K == "grid_constant") {
6068 const auto Attr = Attribute::get(Context&: GV->getContext(), Kind: NVVMAttr::GridConstant);
6069 for (const auto &Op : cast<MDNode>(Val: V)->operands()) {
6070 // For some reason, the index is 1-based in the metadata. Good thing we're
6071 // able to auto-upgrade it!
6072 const auto Index = mdconst::extract<ConstantInt>(MD: Op)->getZExtValue() - 1;
6073 cast<Function>(Val: GV)->addParamAttr(ArgNo: Index, Attr);
6074 }
6075 return true;
6076 }
6077
6078 return false;
6079}
6080
6081void llvm::UpgradeNVVMAnnotations(Module &M) {
6082 NamedMDNode *NamedMD = M.getNamedMetadata(Name: "nvvm.annotations");
6083 if (!NamedMD)
6084 return;
6085
6086 SmallVector<MDNode *, 8> NewNodes;
6087 SmallPtrSet<const MDNode *, 8> SeenNodes;
6088 for (MDNode *MD : NamedMD->operands()) {
6089 if (!SeenNodes.insert(Ptr: MD).second)
6090 continue;
6091
6092 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD: MD->getOperand(I: 0));
6093 if (!GV)
6094 continue;
6095
6096 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6097
6098 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(I: 0)};
6099 // Each nvvm.annotations metadata entry will be of the following form:
6100 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6101 // start index = 1, to skip the global variable key
6102 // increment = 2, to skip the value for each property-value pairs
6103 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6104 MDString *K = cast<MDString>(Val: MD->getOperand(I: j));
6105 const MDOperand &V = MD->getOperand(I: j + 1);
6106 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K: K->getString(), V);
6107 if (!Upgraded)
6108 NewOperands.append(IL: {K, V});
6109 }
6110
6111 if (NewOperands.size() > 1)
6112 NewNodes.push_back(Elt: MDNode::get(Context&: M.getContext(), MDs: NewOperands));
6113 }
6114
6115 NamedMD->clearOperands();
6116 for (MDNode *N : NewNodes)
6117 NamedMD->addOperand(M: N);
6118}
6119
6120/// This checks for objc retain release marker which should be upgraded. It
6121/// returns true if module is modified.
6122static bool upgradeRetainReleaseMarker(Module &M) {
6123 bool Changed = false;
6124 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6125 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey);
6126 if (ModRetainReleaseMarker) {
6127 MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0);
6128 if (Op) {
6129 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0));
6130 if (ID) {
6131 SmallVector<StringRef, 4> ValueComp;
6132 ID->getString().split(A&: ValueComp, Separator: "#");
6133 if (ValueComp.size() == 2) {
6134 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6135 ID = MDString::get(Context&: M.getContext(), Str: NewValue);
6136 }
6137 M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID);
6138 M.eraseNamedMetadata(NMD: ModRetainReleaseMarker);
6139 Changed = true;
6140 }
6141 }
6142 }
6143 return Changed;
6144}
6145
6146void llvm::UpgradeARCRuntime(Module &M) {
6147 // This lambda converts normal function calls to ARC runtime functions to
6148 // intrinsic calls.
6149 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6150 llvm::Intrinsic::ID IntrinsicFunc) {
6151 Function *Fn = M.getFunction(Name: OldFunc);
6152
6153 if (!Fn)
6154 return;
6155
6156 Function *NewFn =
6157 llvm::Intrinsic::getOrInsertDeclaration(M: &M, id: IntrinsicFunc);
6158
6159 for (User *U : make_early_inc_range(Range: Fn->users())) {
6160 CallInst *CI = dyn_cast<CallInst>(Val: U);
6161 if (!CI || CI->getCalledFunction() != Fn)
6162 continue;
6163
6164 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6165 FunctionType *NewFuncTy = NewFn->getFunctionType();
6166 SmallVector<Value *, 2> Args;
6167
6168 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6169 // value to the return type of the old function.
6170 if (NewFuncTy->getReturnType() != CI->getType() &&
6171 !CastInst::castIsValid(op: Instruction::BitCast, S: CI,
6172 DstTy: NewFuncTy->getReturnType()))
6173 continue;
6174
6175 bool InvalidCast = false;
6176
6177 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6178 Value *Arg = CI->getArgOperand(i: I);
6179
6180 // Bitcast argument to the parameter type of the new function if it's
6181 // not a variadic argument.
6182 if (I < NewFuncTy->getNumParams()) {
6183 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6184 // to the parameter type of the new function.
6185 if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg,
6186 DstTy: NewFuncTy->getParamType(i: I))) {
6187 InvalidCast = true;
6188 break;
6189 }
6190 Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I));
6191 }
6192 Args.push_back(Elt: Arg);
6193 }
6194
6195 if (InvalidCast)
6196 continue;
6197
6198 // Create a call instruction that calls the new function.
6199 CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args);
6200 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
6201 NewCall->takeName(V: CI);
6202
6203 // Bitcast the return value back to the type of the old call.
6204 Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType());
6205
6206 if (!CI->use_empty())
6207 CI->replaceAllUsesWith(V: NewRetVal);
6208 CI->eraseFromParent();
6209 }
6210
6211 if (Fn->use_empty())
6212 Fn->eraseFromParent();
6213 };
6214
6215 // Unconditionally convert a call to "clang.arc.use" to a call to
6216 // "llvm.objc.clang.arc.use".
6217 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6218
6219 // Upgrade the retain release marker. If there is no need to upgrade
6220 // the marker, that means either the module is already new enough to contain
6221 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6222 if (!upgradeRetainReleaseMarker(M))
6223 return;
6224
6225 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6226 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6227 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6228 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6229 {"objc_autoreleaseReturnValue",
6230 llvm::Intrinsic::objc_autoreleaseReturnValue},
6231 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6232 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6233 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6234 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6235 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6236 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6237 {"objc_release", llvm::Intrinsic::objc_release},
6238 {"objc_retain", llvm::Intrinsic::objc_retain},
6239 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6240 {"objc_retainAutoreleaseReturnValue",
6241 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6242 {"objc_retainAutoreleasedReturnValue",
6243 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6244 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6245 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6246 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6247 {"objc_unsafeClaimAutoreleasedReturnValue",
6248 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6249 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6250 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6251 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6252 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6253 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6254 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6255 {"objc_arc_annotation_topdown_bbstart",
6256 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6257 {"objc_arc_annotation_topdown_bbend",
6258 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6259 {"objc_arc_annotation_bottomup_bbstart",
6260 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6261 {"objc_arc_annotation_bottomup_bbend",
6262 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6263
6264 for (auto &I : RuntimeFuncs)
6265 UpgradeToIntrinsic(I.first, I.second);
6266}
6267
6268bool llvm::UpgradeModuleFlags(Module &M) {
6269 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6270 if (!ModFlags)
6271 return false;
6272
6273 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6274 bool HasSwiftVersionFlag = false;
6275 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6276 uint32_t SwiftABIVersion;
6277 auto Int8Ty = Type::getInt8Ty(C&: M.getContext());
6278 auto Int32Ty = Type::getInt32Ty(C&: M.getContext());
6279
6280 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6281 MDNode *Op = ModFlags->getOperand(i: I);
6282 if (Op->getNumOperands() != 3)
6283 continue;
6284 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6285 if (!ID)
6286 continue;
6287 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6288 Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get(
6289 Ty: Type::getInt32Ty(C&: M.getContext()), V: B)),
6290 MDString::get(Context&: M.getContext(), Str: ID->getString()),
6291 Op->getOperand(I: 2)};
6292 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6293 Changed = true;
6294 };
6295
6296 if (ID->getString() == "Objective-C Image Info Version")
6297 HasObjCFlag = true;
6298 if (ID->getString() == "Objective-C Class Properties")
6299 HasClassProperties = true;
6300 // Upgrade PIC from Error/Max to Min.
6301 if (ID->getString() == "PIC Level") {
6302 if (auto *Behavior =
6303 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6304 uint64_t V = Behavior->getLimitedValue();
6305 if (V == Module::Error || V == Module::Max)
6306 SetBehavior(Module::Min);
6307 }
6308 }
6309 // Upgrade "PIE Level" from Error to Max.
6310 if (ID->getString() == "PIE Level")
6311 if (auto *Behavior =
6312 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0)))
6313 if (Behavior->getLimitedValue() == Module::Error)
6314 SetBehavior(Module::Max);
6315
6316 // Upgrade branch protection and return address signing module flags. The
6317 // module flag behavior for these fields were Error and now they are Min.
6318 if (ID->getString() == "branch-target-enforcement" ||
6319 ID->getString().starts_with(Prefix: "sign-return-address")) {
6320 if (auto *Behavior =
6321 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6322 if (Behavior->getLimitedValue() == Module::Error) {
6323 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
6324 Metadata *Ops[3] = {
6325 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)),
6326 Op->getOperand(I: 1), Op->getOperand(I: 2)};
6327 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6328 Changed = true;
6329 }
6330 }
6331 }
6332
6333 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6334 // section name so that llvm-lto will not complain about mismatching
6335 // module flags that is functionally the same.
6336 if (ID->getString() == "Objective-C Image Info Section") {
6337 if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) {
6338 SmallVector<StringRef, 4> ValueComp;
6339 Value->getString().split(A&: ValueComp, Separator: " ");
6340 if (ValueComp.size() != 1) {
6341 std::string NewValue;
6342 for (auto &S : ValueComp)
6343 NewValue += S.str();
6344 Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1),
6345 MDString::get(Context&: M.getContext(), Str: NewValue)};
6346 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6347 Changed = true;
6348 }
6349 }
6350 }
6351
6352 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6353 // If the higher bits are set, it adds new module flag for swift info.
6354 if (ID->getString() == "Objective-C Garbage Collection") {
6355 auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2));
6356 if (Md) {
6357 assert(Md->getValue() && "Expected non-empty metadata");
6358 auto Type = Md->getValue()->getType();
6359 if (Type == Int8Ty)
6360 continue;
6361 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6362 if ((Val & 0xff) != Val) {
6363 HasSwiftVersionFlag = true;
6364 SwiftABIVersion = (Val & 0xff00) >> 8;
6365 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6366 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6367 }
6368 Metadata *Ops[3] = {
6369 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)),
6370 Op->getOperand(I: 1),
6371 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))};
6372 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6373 Changed = true;
6374 }
6375 }
6376
6377 if (ID->getString() == "amdgpu_code_object_version") {
6378 Metadata *Ops[3] = {
6379 Op->getOperand(I: 0),
6380 MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version"),
6381 Op->getOperand(I: 2)};
6382 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6383 Changed = true;
6384 }
6385 }
6386
6387 // "Objective-C Class Properties" is recently added for Objective-C. We
6388 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6389 // flag of value 0, so we can correclty downgrade this flag when trying to
6390 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6391 // this module flag.
6392 if (HasObjCFlag && !HasClassProperties) {
6393 M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties",
6394 Val: (uint32_t)0);
6395 Changed = true;
6396 }
6397
6398 if (HasSwiftVersionFlag) {
6399 M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version",
6400 Val: SwiftABIVersion);
6401 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version",
6402 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion));
6403 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version",
6404 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion));
6405 Changed = true;
6406 }
6407
6408 return Changed;
6409}
6410
6411bool llvm::UpgradeCFIFunctionsMetadata(Module &M) {
6412 NamedMDNode *CFIConsts = M.getNamedMetadata(Name: "cfi.functions");
6413 // If this metadata has operands, we expect all of them to be either from
6414 // before or from after the format change handled here, so we can bail out
6415 // fast if the first (if any) operands is of the new format.
6416 auto MatchesVersion = [](const MDNode *Op) {
6417 return Op->getNumOperands() >= 3 &&
6418 isa<ConstantAsMetadata>(Val: Op->getOperand(I: 2)) &&
6419 cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2))
6420 ->getType()
6421 ->isIntegerTy(BitWidth: 64);
6422 };
6423
6424 if (!CFIConsts || !CFIConsts->getNumOperands() ||
6425 MatchesVersion(CFIConsts->getOperand(i: 0)))
6426 return false;
6427
6428 bool Changed = false;
6429 for (unsigned I = 0, E = CFIConsts->getNumOperands(); I != E; ++I) {
6430 MDNode *Op = CFIConsts->getOperand(i: I);
6431 assert(!MatchesVersion(Op) && "Unexpected mix of CFIConstant formats");
6432 assert(Op->getNumOperands() >= 2 &&
6433 "Expected at least 2 operands - name and linkage type");
6434 MDString *NameMD = dyn_cast<MDString>(Val: Op->getOperand(I: 0));
6435 StringRef Name = NameMD->getString();
6436 GlobalValue::GUID GUID = GlobalValue::getGUIDAssumingExternalLinkage(
6437 GlobalName: GlobalValue::dropLLVMManglingEscape(Name));
6438
6439 SmallVector<Metadata *, 4> Elts;
6440 Elts.push_back(Elt: Op->getOperand(I: 0));
6441 Elts.push_back(Elt: Op->getOperand(I: 1));
6442 Elts.push_back(Elt: ConstantAsMetadata::get(
6443 C: ConstantInt::get(Ty: Type::getInt64Ty(C&: M.getContext()), V: GUID)));
6444
6445 for (unsigned J = 2, EJ = Op->getNumOperands(); J != EJ; ++J)
6446 Elts.push_back(Elt: Op->getOperand(I: J));
6447
6448 CFIConsts->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Elts));
6449 Changed = true;
6450 }
6451
6452 return Changed;
6453}
6454
6455void llvm::UpgradeSectionAttributes(Module &M) {
6456 auto TrimSpaces = [](StringRef Section) -> std::string {
6457 SmallVector<StringRef, 5> Components;
6458 Section.split(A&: Components, Separator: ',');
6459
6460 SmallString<32> Buffer;
6461 raw_svector_ostream OS(Buffer);
6462
6463 for (auto Component : Components)
6464 OS << ',' << Component.trim();
6465
6466 return std::string(OS.str().substr(Start: 1));
6467 };
6468
6469 for (auto &GV : M.globals()) {
6470 if (!GV.hasSection())
6471 continue;
6472
6473 StringRef Section = GV.getSection();
6474
6475 if (!Section.starts_with(Prefix: "__DATA, __objc_catlist"))
6476 continue;
6477
6478 // __DATA, __objc_catlist, regular, no_dead_strip
6479 // __DATA,__objc_catlist,regular,no_dead_strip
6480 GV.setSection(TrimSpaces(Section));
6481 }
6482}
6483
6484namespace {
6485// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6486// callsites within a function that did not also have the strictfp attribute.
6487// Since 10.0, if strict FP semantics are needed within a function, the
6488// function must have the strictfp attribute and all calls within the function
6489// must also have the strictfp attribute. This latter restriction is
6490// necessary to prevent unwanted libcall simplification when a function is
6491// being cloned (such as for inlining).
6492//
6493// The "dangling" strictfp attribute usage was only used to prevent constant
6494// folding and other libcall simplification. The nobuiltin attribute on the
6495// callsite has the same effect.
6496struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6497 StrictFPUpgradeVisitor() = default;
6498
6499 void visitCallBase(CallBase &Call) {
6500 if (!Call.isStrictFP())
6501 return;
6502 if (isa<ConstrainedFPIntrinsic>(Val: &Call))
6503 return;
6504 // If we get here, the caller doesn't have the strictfp attribute
6505 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6506 Call.removeFnAttr(Kind: Attribute::StrictFP);
6507 Call.addFnAttr(Kind: Attribute::NoBuiltin);
6508 }
6509};
6510
6511/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6512struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6513 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6514 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6515
6516 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6517 if (!RMW.isFloatingPointOperation())
6518 return;
6519
6520 MDNode *Empty = MDNode::get(Context&: RMW.getContext(), MDs: {});
6521 RMW.setMetadata(Kind: "amdgpu.no.fine.grained.host.memory", Node: Empty);
6522 RMW.setMetadata(Kind: "amdgpu.no.remote.memory.access", Node: Empty);
6523 RMW.setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: Empty);
6524 }
6525};
6526} // namespace
6527
6528void llvm::UpgradeFunctionAttributes(Function &F) {
6529 // If a function definition doesn't have the strictfp attribute,
6530 // convert any callsite strictfp attributes to nobuiltin.
6531 if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) {
6532 StrictFPUpgradeVisitor SFPV;
6533 SFPV.visit(F);
6534 }
6535
6536 // Remove all incompatibile attributes from function.
6537 F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(
6538 Ty: F.getReturnType(), AS: F.getAttributes().getRetAttrs()));
6539 for (auto &Arg : F.args())
6540 Arg.removeAttrs(
6541 AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType(), AS: Arg.getAttributes()));
6542
6543 bool AddingAttrs = false, RemovingAttrs = false;
6544 AttrBuilder AttrsToAdd(F.getContext());
6545 AttributeMask AttrsToRemove;
6546
6547 // Older versions of LLVM treated an "implicit-section-name" attribute
6548 // similarly to directly setting the section on a Function.
6549 if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name");
6550 A.isValid() && A.isStringAttribute()) {
6551 F.setSection(A.getValueAsString());
6552 AttrsToRemove.addAttribute(A: "implicit-section-name");
6553 RemovingAttrs = true;
6554 }
6555
6556 if (Attribute A = F.getFnAttribute(Kind: "nooutline");
6557 A.isValid() && A.isStringAttribute()) {
6558 AttrsToRemove.addAttribute(A: "nooutline");
6559 AttrsToAdd.addAttribute(Val: Attribute::NoOutline);
6560 AddingAttrs = RemovingAttrs = true;
6561 }
6562
6563 if (Attribute A = F.getFnAttribute(Kind: "uniform-work-group-size");
6564 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6565 AttrsToRemove.addAttribute(A: "uniform-work-group-size");
6566 RemovingAttrs = true;
6567 if (A.getValueAsString() == "true") {
6568 AttrsToAdd.addAttribute(A: "uniform-work-group-size");
6569 AddingAttrs = true;
6570 }
6571 }
6572
6573 if (!F.empty()) {
6574 // For some reason this is called twice, and the first time is before any
6575 // instructions are loaded into the body.
6576
6577 if (Attribute A = F.getFnAttribute(Kind: "amdgpu-unsafe-fp-atomics");
6578 A.isValid()) {
6579
6580 if (A.getValueAsBool()) {
6581 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6582 Visitor.visit(F);
6583 }
6584
6585 // We will leave behind dead attribute uses on external declarations, but
6586 // clang never added these to declarations anyway.
6587 AttrsToRemove.addAttribute(A: "amdgpu-unsafe-fp-atomics");
6588 RemovingAttrs = true;
6589 }
6590 }
6591
6592 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6593 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6594
6595 bool HandleDenormalMode = false;
6596
6597 if (Attribute Attr = F.getFnAttribute(Kind: "denormal-fp-math"); Attr.isValid()) {
6598 DenormalMode ParsedMode = parseDenormalFPAttribute(Str: Attr.getValueAsString());
6599 if (ParsedMode.isValid()) {
6600 DenormalFPMath = ParsedMode;
6601 AttrsToRemove.addAttribute(A: "denormal-fp-math");
6602 AddingAttrs = RemovingAttrs = true;
6603 HandleDenormalMode = true;
6604 }
6605 }
6606
6607 if (Attribute Attr = F.getFnAttribute(Kind: "denormal-fp-math-f32");
6608 Attr.isValid()) {
6609 DenormalMode ParsedMode = parseDenormalFPAttribute(Str: Attr.getValueAsString());
6610 if (ParsedMode.isValid()) {
6611 DenormalFPMathF32 = ParsedMode;
6612 AttrsToRemove.addAttribute(A: "denormal-fp-math-f32");
6613 AddingAttrs = RemovingAttrs = true;
6614 HandleDenormalMode = true;
6615 }
6616 }
6617
6618 if (HandleDenormalMode)
6619 AttrsToAdd.addDenormalFPEnvAttr(
6620 Mode: DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6621
6622 if (RemovingAttrs)
6623 F.removeFnAttrs(Attrs: AttrsToRemove);
6624
6625 if (AddingAttrs)
6626 F.addFnAttrs(Attrs: AttrsToAdd);
6627}
6628
6629// Check if the function attribute is not present and set it.
6630static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName,
6631 StringRef Value) {
6632 if (!F.hasFnAttribute(Kind: FnAttrName))
6633 F.addFnAttr(Kind: FnAttrName, Val: Value);
6634}
6635
6636// Check if the function attribute is not present and set it if needed.
6637// If the attribute is "false" then removes it.
6638// If the attribute is "true" resets it to a valueless attribute.
6639static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6640 if (!F.hasFnAttribute(Kind: FnAttrName)) {
6641 if (Set)
6642 F.addFnAttr(Kind: FnAttrName);
6643 } else {
6644 auto A = F.getFnAttribute(Kind: FnAttrName);
6645 if ("false" == A.getValueAsString())
6646 F.removeFnAttr(Kind: FnAttrName);
6647 else if ("true" == A.getValueAsString()) {
6648 F.removeFnAttr(Kind: FnAttrName);
6649 F.addFnAttr(Kind: FnAttrName);
6650 }
6651 }
6652}
6653
6654void llvm::copyModuleAttrToFunctions(Module &M) {
6655 Triple T(M.getTargetTriple());
6656 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6657 return;
6658
6659 uint64_t BTEValue = 0;
6660 uint64_t BPPLRValue = 0;
6661 uint64_t GCSValue = 0;
6662 uint64_t SRAValue = 0;
6663 uint64_t SRAALLValue = 0;
6664 uint64_t SRABKeyValue = 0;
6665
6666 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6667 if (ModFlags) {
6668 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6669 MDNode *Op = ModFlags->getOperand(i: I);
6670 if (Op->getNumOperands() != 3)
6671 continue;
6672
6673 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6674 auto *CI = mdconst::dyn_extract<ConstantInt>(MD: Op->getOperand(I: 2));
6675 if (!ID || !CI)
6676 continue;
6677
6678 StringRef IDStr = ID->getString();
6679 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6680 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6681 : IDStr == "guarded-control-stack" ? &GCSValue
6682 : IDStr == "sign-return-address" ? &SRAValue
6683 : IDStr == "sign-return-address-all" ? &SRAALLValue
6684 : IDStr == "sign-return-address-with-bkey"
6685 ? &SRABKeyValue
6686 : nullptr;
6687 if (!ValPtr)
6688 continue;
6689
6690 *ValPtr = CI->getZExtValue();
6691 if (*ValPtr == 2)
6692 return;
6693 }
6694 }
6695
6696 bool BTE = BTEValue == 1;
6697 bool BPPLR = BPPLRValue == 1;
6698 bool GCS = GCSValue == 1;
6699 bool SRA = SRAValue == 1;
6700
6701 StringRef SignTypeValue = "non-leaf";
6702 if (SRA && SRAALLValue == 1)
6703 SignTypeValue = "all";
6704
6705 StringRef SignKeyValue = "a_key";
6706 if (SRA && SRABKeyValue == 1)
6707 SignKeyValue = "b_key";
6708
6709 for (Function &F : M.getFunctionList()) {
6710 if (F.isDeclaration())
6711 continue;
6712
6713 if (SRA) {
6714 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address", Value: SignTypeValue);
6715 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address-key", Value: SignKeyValue);
6716 } else {
6717 if (auto A = F.getFnAttribute(Kind: "sign-return-address");
6718 A.isValid() && "none" == A.getValueAsString()) {
6719 F.removeFnAttr(Kind: "sign-return-address");
6720 F.removeFnAttr(Kind: "sign-return-address-key");
6721 }
6722 }
6723 ConvertFunctionAttr(F, Set: BTE, FnAttrName: "branch-target-enforcement");
6724 ConvertFunctionAttr(F, Set: BPPLR, FnAttrName: "branch-protection-pauth-lr");
6725 ConvertFunctionAttr(F, Set: GCS, FnAttrName: "guarded-control-stack");
6726 }
6727
6728 if (BTE)
6729 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-target-enforcement", Val: 2);
6730 if (BPPLR)
6731 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-protection-pauth-lr", Val: 2);
6732 if (GCS)
6733 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "guarded-control-stack", Val: 2);
6734 if (SRA) {
6735 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address", Val: 2);
6736 if (SRAALLValue == 1)
6737 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-all", Val: 2);
6738 if (SRABKeyValue == 1)
6739 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-with-bkey", Val: 2);
6740 }
6741}
6742
6743static bool isOldLoopArgument(Metadata *MD) {
6744 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6745 if (!T)
6746 return false;
6747 if (T->getNumOperands() < 1)
6748 return false;
6749 auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6750 if (!S)
6751 return false;
6752 return S->getString().starts_with(Prefix: "llvm.vectorizer.");
6753}
6754
6755static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
6756 StringRef OldPrefix = "llvm.vectorizer.";
6757 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6758
6759 if (OldTag == "llvm.vectorizer.unroll")
6760 return MDString::get(Context&: C, Str: "llvm.loop.interleave.count");
6761
6762 return MDString::get(
6763 Context&: C, Str: (Twine("llvm.loop.vectorize.") + OldTag.drop_front(N: OldPrefix.size()))
6764 .str());
6765}
6766
6767static Metadata *upgradeLoopArgument(Metadata *MD) {
6768 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6769 if (!T)
6770 return MD;
6771 if (T->getNumOperands() < 1)
6772 return MD;
6773 auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6774 if (!OldTag)
6775 return MD;
6776 if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer."))
6777 return MD;
6778
6779 // This has an old tag. Upgrade it.
6780 SmallVector<Metadata *, 8> Ops;
6781 Ops.reserve(N: T->getNumOperands());
6782 Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString()));
6783 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6784 Ops.push_back(Elt: T->getOperand(I));
6785
6786 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6787}
6788
6789MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
6790 auto *T = dyn_cast<MDTuple>(Val: &N);
6791 if (!T)
6792 return &N;
6793
6794 if (none_of(Range: T->operands(), P: isOldLoopArgument))
6795 return &N;
6796
6797 SmallVector<Metadata *, 8> Ops;
6798 Ops.reserve(N: T->getNumOperands());
6799 for (Metadata *MD : T->operands())
6800 Ops.push_back(Elt: upgradeLoopArgument(MD));
6801
6802 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6803}
6804
6805std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
6806 Triple T(TT);
6807 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6808 // the address space of globals to 1. This does not apply to SPIRV Logical.
6809 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6810 !DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G")) {
6811 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6812 }
6813
6814 if (T.isLoongArch64() || T.isRISCV64()) {
6815 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6816 auto I = DL.find(Str: "-n64-");
6817 if (I != StringRef::npos)
6818 return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str();
6819 return DL.str();
6820 }
6821
6822 // AMDGPU data layout upgrades.
6823 std::string Res = DL.str();
6824 if (T.isAMDGPU()) {
6825 // Define address spaces for constants.
6826 if (!DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G"))
6827 Res.append(s: Res.empty() ? "G1" : "-G1");
6828
6829 // AMDGCN data layout upgrades.
6830 if (T.isAMDGCN()) {
6831
6832 // Add missing non-integral declarations.
6833 // This goes before adding new address spaces to prevent incoherent string
6834 // values.
6835 if (!DL.contains(Other: "-ni") && !DL.starts_with(Prefix: "ni"))
6836 Res.append(s: "-ni:7:8:9");
6837 // Update ni:7 to ni:7:8:9.
6838 if (DL.ends_with(Suffix: "ni:7"))
6839 Res.append(s: ":8:9");
6840 if (DL.ends_with(Suffix: "ni:7:8"))
6841 Res.append(s: ":9");
6842
6843 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6844 // resources) An empty data layout has already been upgraded to G1 by now.
6845 if (!DL.contains(Other: "-p7") && !DL.starts_with(Prefix: "p7"))
6846 Res.append(s: "-p7:160:256:256:32");
6847 if (!DL.contains(Other: "-p8") && !DL.starts_with(Prefix: "p8"))
6848 Res.append(s: "-p8:128:128:128:48");
6849 constexpr StringRef OldP8("-p8:128:128-");
6850 if (DL.contains(Other: OldP8))
6851 Res.replace(pos: Res.find(svt: OldP8), n1: OldP8.size(), s: "-p8:128:128:128:48-");
6852 if (!DL.contains(Other: "-p9") && !DL.starts_with(Prefix: "p9"))
6853 Res.append(s: "-p9:192:256:256:32");
6854 }
6855
6856 // Upgrade the ELF mangling mode.
6857 if (!DL.contains(Other: "m:e"))
6858 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6859
6860 return Res;
6861 }
6862
6863 if (T.isSystemZ() && !DL.empty()) {
6864 // Make sure the stack alignment is present.
6865 if (!DL.contains(Other: "-S64"))
6866 return "E-S64" + DL.drop_front(N: 1).str();
6867 return DL.str();
6868 }
6869
6870 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6871 // If the datalayout matches the expected format, add pointer size address
6872 // spaces to the datalayout.
6873 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6874 if (!DL.contains(Other: AddrSpaces)) {
6875 SmallVector<StringRef, 4> Groups;
6876 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6877 if (R.match(String: Res, Matches: &Groups))
6878 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6879 }
6880 };
6881
6882 // AArch64 data layout upgrades.
6883 if (T.isAArch64()) {
6884 // Add "-Fn32"
6885 if (!DL.empty() && !DL.contains(Other: "-Fn32"))
6886 Res.append(s: "-Fn32");
6887 AddPtr32Ptr64AddrSpaces();
6888 return Res;
6889 }
6890
6891 if (T.isSPARC() || (T.isMIPS64() && !DL.contains(Other: "m:m")) || T.isPPC64() ||
6892 T.isWasm()) {
6893 // Mips64 with o32 ABI did not add "-i128:128".
6894 // Add "-i128:128"
6895 std::string I64 = "-i64:64";
6896 std::string I128 = "-i128:128";
6897 if (!StringRef(Res).contains(Other: I128)) {
6898 size_t Pos = Res.find(str: I64);
6899 if (Pos != size_t(-1))
6900 Res.insert(pos1: Pos + I64.size(), str: I128);
6901 }
6902 }
6903
6904 if (T.isPPC() && T.isOSAIX() && !DL.contains(Other: "f64:32:64") && !DL.empty()) {
6905 size_t Pos = Res.find(s: "-S128");
6906 if (Pos == StringRef::npos)
6907 Pos = Res.size();
6908 Res.insert(pos: Pos, s: "-f64:32:64");
6909 }
6910
6911 if (!T.isX86())
6912 return Res;
6913
6914 AddPtr32Ptr64AddrSpaces();
6915
6916 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6917 // for i128 operations prior to this being reflected in the data layout, and
6918 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6919 // boundaries, so although this is a breaking change, the upgrade is expected
6920 // to fix more IR than it breaks.
6921 // Intel MCU is an exception and uses 4-byte-alignment.
6922 if (!T.isOSIAMCU()) {
6923 std::string I128 = "-i128:128";
6924 if (StringRef Ref = Res; !Ref.contains(Other: I128)) {
6925 SmallVector<StringRef, 4> Groups;
6926 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6927 if (R.match(String: Res, Matches: &Groups))
6928 Res = (Groups[1] + I128 + Groups[3]).str();
6929 }
6930 }
6931
6932 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6933 // Raising the alignment is safe because Clang did not produce f80 values in
6934 // the MSVC environment before this upgrade was added.
6935 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6936 StringRef Ref = Res;
6937 auto I = Ref.find(Str: "-f80:32-");
6938 if (I != StringRef::npos)
6939 Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str();
6940 }
6941
6942 return Res;
6943}
6944
6945void llvm::UpgradeAttributes(AttrBuilder &B) {
6946 StringRef FramePointer;
6947 Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim");
6948 if (A.isValid()) {
6949 // The value can be "true" or "false".
6950 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6951 B.removeAttribute(A: "no-frame-pointer-elim");
6952 }
6953 if (B.contains(A: "no-frame-pointer-elim-non-leaf")) {
6954 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6955 if (FramePointer != "all")
6956 FramePointer = "non-leaf";
6957 B.removeAttribute(A: "no-frame-pointer-elim-non-leaf");
6958 }
6959 if (!FramePointer.empty())
6960 B.addAttribute(A: "frame-pointer", V: FramePointer);
6961
6962 A = B.getAttribute(Kind: "null-pointer-is-valid");
6963 if (A.isValid()) {
6964 // The value can be "true" or "false".
6965 bool NullPointerIsValid = A.getValueAsString() == "true";
6966 B.removeAttribute(A: "null-pointer-is-valid");
6967 if (NullPointerIsValid)
6968 B.addAttribute(Val: Attribute::NullPointerIsValid);
6969 }
6970
6971 A = B.getAttribute(Kind: "uniform-work-group-size");
6972 if (A.isValid()) {
6973 StringRef Val = A.getValueAsString();
6974 if (!Val.empty()) {
6975 bool IsTrue = Val == "true";
6976 B.removeAttribute(A: "uniform-work-group-size");
6977 if (IsTrue)
6978 B.addAttribute(A: "uniform-work-group-size");
6979 }
6980 }
6981}
6982
6983void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6984 // clang.arc.attachedcall bundles are now required to have an operand.
6985 // If they don't, it's okay to drop them entirely: when there is an operand,
6986 // the "attachedcall" is meaningful and required, but without an operand,
6987 // it's just a marker NOP. Dropping it merely prevents an optimization.
6988 erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) {
6989 return OBD.getTag() == "clang.arc.attachedcall" &&
6990 OBD.inputs().empty();
6991 });
6992}
6993