1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/BinaryFormat/Dwarf.h"
21#include "llvm/IR/AttributeMask.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
26#include "llvm/IR/DebugInfoMetadata.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/IntrinsicInst.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
47#include "llvm/Support/AMDGPUAddrSpace.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/NVPTXAddrSpace.h"
51#include "llvm/Support/NVVMAttributes.h"
52#include "llvm/Support/Regex.h"
53#include "llvm/Support/TimeProfiler.h"
54#include "llvm/TargetParser/Triple.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(O&: llvm::errs());
72 llvm::errs() << "\n";
73 reportFatalUsageError(reason);
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
78static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(i: 0);
83 if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(GV: F);
88 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
94static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 i: F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(Bitwidth: 32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(GV: F);
104 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
110static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(GV: F);
117 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
123static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID,
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(Bitwidth: 8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(Bitwidth: 8))
132 return false;
133
134 rename(GV: F);
135 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
141static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID,
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(Bitwidth: 16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(Bitwidth: 16))
150 return false;
151
152 rename(GV: F);
153 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
154 return true;
155}
156
157static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(GV: F);
163 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
164 return true;
165}
166
167static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(GV: F);
173 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
174 return true;
175}
176
177static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front(Prefix: "avx."))
184 return (Name.starts_with(Prefix: "blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with(Prefix: "movnt.") || // Added in 3.2
189 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
190 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
191 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 3.5
192 Name.starts_with(Prefix: "vbroadcastf128") || // Added in 4.0
193 Name.starts_with(Prefix: "vextractf128.") || // Added in 3.7
194 Name.starts_with(Prefix: "vinsertf128.") || // Added in 3.7
195 Name.starts_with(Prefix: "vperm2f128.") || // Added in 6.0
196 Name.starts_with(Prefix: "vpermil.")); // Added in 3.1
197
198 if (Name.consume_front(Prefix: "avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
201 Name.starts_with(Prefix: "padds.") || // Added in 8.0
202 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
203 Name.starts_with(Prefix: "pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.8
206 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
207 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
208 Name.starts_with(Prefix: "pmax") || // Added in 3.9
209 Name.starts_with(Prefix: "pmin") || // Added in 3.9
210 Name.starts_with(Prefix: "pmovsx") || // Added in 3.9
211 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
215 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
216 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
217 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
218 Name.starts_with(Prefix: "vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front(Prefix: "avx512.")) {
225 if (Name.consume_front(Prefix: "mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with(Prefix: "add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with(Prefix: "and.") || // Added in 3.9
229 Name.starts_with(Prefix: "andn.") || // Added in 3.9
230 Name.starts_with(Prefix: "broadcast.s") || // Added in 3.9
231 Name.starts_with(Prefix: "broadcastf32x4.") || // Added in 6.0
232 Name.starts_with(Prefix: "broadcastf32x8.") || // Added in 6.0
233 Name.starts_with(Prefix: "broadcastf64x2.") || // Added in 6.0
234 Name.starts_with(Prefix: "broadcastf64x4.") || // Added in 6.0
235 Name.starts_with(Prefix: "broadcasti32x4.") || // Added in 6.0
236 Name.starts_with(Prefix: "broadcasti32x8.") || // Added in 6.0
237 Name.starts_with(Prefix: "broadcasti64x2.") || // Added in 6.0
238 Name.starts_with(Prefix: "broadcasti64x4.") || // Added in 6.0
239 Name.starts_with(Prefix: "cmp.b") || // Added in 5.0
240 Name.starts_with(Prefix: "cmp.d") || // Added in 5.0
241 Name.starts_with(Prefix: "cmp.q") || // Added in 5.0
242 Name.starts_with(Prefix: "cmp.w") || // Added in 5.0
243 Name.starts_with(Prefix: "compress.b") || // Added in 9.0
244 Name.starts_with(Prefix: "compress.d") || // Added in 9.0
245 Name.starts_with(Prefix: "compress.p") || // Added in 9.0
246 Name.starts_with(Prefix: "compress.q") || // Added in 9.0
247 Name.starts_with(Prefix: "compress.store.") || // Added in 7.0
248 Name.starts_with(Prefix: "compress.w") || // Added in 9.0
249 Name.starts_with(Prefix: "conflict.") || // Added in 9.0
250 Name.starts_with(Prefix: "cvtdq2pd.") || // Added in 4.0
251 Name.starts_with(Prefix: "cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with(Prefix: "cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with(Prefix: "cvtudq2pd.") || // Added in 4.0
263 Name.starts_with(Prefix: "cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with(Prefix: "cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with(Prefix: "dbpsadbw.") || // Added in 7.0
268 Name.starts_with(Prefix: "div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with(Prefix: "expand.b") || // Added in 9.0
270 Name.starts_with(Prefix: "expand.d") || // Added in 9.0
271 Name.starts_with(Prefix: "expand.load.") || // Added in 7.0
272 Name.starts_with(Prefix: "expand.p") || // Added in 9.0
273 Name.starts_with(Prefix: "expand.q") || // Added in 9.0
274 Name.starts_with(Prefix: "expand.w") || // Added in 9.0
275 Name.starts_with(Prefix: "fpclass.p") || // Added in 7.0
276 Name.starts_with(Prefix: "insert") || // Added in 4.0
277 Name.starts_with(Prefix: "load.") || // Added in 3.9
278 Name.starts_with(Prefix: "loadu.") || // Added in 3.9
279 Name.starts_with(Prefix: "lzcnt.") || // Added in 5.0
280 Name.starts_with(Prefix: "max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with(Prefix: "min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with(Prefix: "movddup") || // Added in 3.9
283 Name.starts_with(Prefix: "move.s") || // Added in 4.0
284 Name.starts_with(Prefix: "movshdup") || // Added in 3.9
285 Name.starts_with(Prefix: "movsldup") || // Added in 3.9
286 Name.starts_with(Prefix: "mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with(Prefix: "or.") || // Added in 3.9
288 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
289 Name.starts_with(Prefix: "packssdw.") || // Added in 5.0
290 Name.starts_with(Prefix: "packsswb.") || // Added in 5.0
291 Name.starts_with(Prefix: "packusdw.") || // Added in 5.0
292 Name.starts_with(Prefix: "packuswb.") || // Added in 5.0
293 Name.starts_with(Prefix: "padd.") || // Added in 4.0
294 Name.starts_with(Prefix: "padds.") || // Added in 8.0
295 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
296 Name.starts_with(Prefix: "palignr.") || // Added in 3.9
297 Name.starts_with(Prefix: "pand.") || // Added in 3.9
298 Name.starts_with(Prefix: "pandn.") || // Added in 3.9
299 Name.starts_with(Prefix: "pavg") || // Added in 6.0
300 Name.starts_with(Prefix: "pbroadcast") || // Added in 6.0
301 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.9
302 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.9
303 Name.starts_with(Prefix: "perm.df.") || // Added in 3.9
304 Name.starts_with(Prefix: "perm.di.") || // Added in 3.9
305 Name.starts_with(Prefix: "permvar.") || // Added in 7.0
306 Name.starts_with(Prefix: "pmaddubs.w.") || // Added in 7.0
307 Name.starts_with(Prefix: "pmaddw.d.") || // Added in 7.0
308 Name.starts_with(Prefix: "pmax") || // Added in 4.0
309 Name.starts_with(Prefix: "pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with(Prefix: "pmovsx") || // Added in 4.0
315 Name.starts_with(Prefix: "pmovzx") || // Added in 4.0
316 Name.starts_with(Prefix: "pmul.dq.") || // Added in 4.0
317 Name.starts_with(Prefix: "pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with(Prefix: "pmulh.w.") || // Added in 7.0
319 Name.starts_with(Prefix: "pmulhu.w.") || // Added in 7.0
320 Name.starts_with(Prefix: "pmull.") || // Added in 4.0
321 Name.starts_with(Prefix: "pmultishift.qb.") || // Added in 8.0
322 Name.starts_with(Prefix: "pmulu.dq.") || // Added in 4.0
323 Name.starts_with(Prefix: "por.") || // Added in 3.9
324 Name.starts_with(Prefix: "prol.") || // Added in 8.0
325 Name.starts_with(Prefix: "prolv.") || // Added in 8.0
326 Name.starts_with(Prefix: "pror.") || // Added in 8.0
327 Name.starts_with(Prefix: "prorv.") || // Added in 8.0
328 Name.starts_with(Prefix: "pshuf.b.") || // Added in 4.0
329 Name.starts_with(Prefix: "pshuf.d.") || // Added in 3.9
330 Name.starts_with(Prefix: "pshufh.w.") || // Added in 3.9
331 Name.starts_with(Prefix: "pshufl.w.") || // Added in 3.9
332 Name.starts_with(Prefix: "psll.d") || // Added in 4.0
333 Name.starts_with(Prefix: "psll.q") || // Added in 4.0
334 Name.starts_with(Prefix: "psll.w") || // Added in 4.0
335 Name.starts_with(Prefix: "pslli") || // Added in 4.0
336 Name.starts_with(Prefix: "psllv") || // Added in 4.0
337 Name.starts_with(Prefix: "psra.d") || // Added in 4.0
338 Name.starts_with(Prefix: "psra.q") || // Added in 4.0
339 Name.starts_with(Prefix: "psra.w") || // Added in 4.0
340 Name.starts_with(Prefix: "psrai") || // Added in 4.0
341 Name.starts_with(Prefix: "psrav") || // Added in 4.0
342 Name.starts_with(Prefix: "psrl.d") || // Added in 4.0
343 Name.starts_with(Prefix: "psrl.q") || // Added in 4.0
344 Name.starts_with(Prefix: "psrl.w") || // Added in 4.0
345 Name.starts_with(Prefix: "psrli") || // Added in 4.0
346 Name.starts_with(Prefix: "psrlv") || // Added in 4.0
347 Name.starts_with(Prefix: "psub.") || // Added in 4.0
348 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
349 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
350 Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
351 Name.starts_with(Prefix: "punpckh") || // Added in 3.9
352 Name.starts_with(Prefix: "punpckl") || // Added in 3.9
353 Name.starts_with(Prefix: "pxor.") || // Added in 3.9
354 Name.starts_with(Prefix: "shuf.f") || // Added in 6.0
355 Name.starts_with(Prefix: "shuf.i") || // Added in 6.0
356 Name.starts_with(Prefix: "shuf.p") || // Added in 4.0
357 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
358 Name.starts_with(Prefix: "store.b.") || // Added in 3.9
359 Name.starts_with(Prefix: "store.d.") || // Added in 3.9
360 Name.starts_with(Prefix: "store.p") || // Added in 3.9
361 Name.starts_with(Prefix: "store.q.") || // Added in 3.9
362 Name.starts_with(Prefix: "store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
365 Name.starts_with(Prefix: "sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with(Prefix: "ucmp.") || // Added in 5.0
367 Name.starts_with(Prefix: "unpckh.") || // Added in 3.9
368 Name.starts_with(Prefix: "unpckl.") || // Added in 3.9
369 Name.starts_with(Prefix: "valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with(Prefix: "vextract") || // Added in 4.0
373 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
374 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
375 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
376 Name.starts_with(Prefix: "vfnmsub.") || // Added in 7.0
377 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
378 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
379 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
380 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
381 Name.starts_with(Prefix: "vpermi2var.") || // Added in 7.0
382 Name.starts_with(Prefix: "vpermil.p") || // Added in 3.9
383 Name.starts_with(Prefix: "vpermilvar.") || // Added in 4.0
384 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
385 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
386 Name.starts_with(Prefix: "vpshld.") || // Added in 7.0
387 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
388 Name.starts_with(Prefix: "vpshrd.") || // Added in 7.0
389 Name.starts_with(Prefix: "vpshrdv.") || // Added in 8.0
390 Name.starts_with(Prefix: "vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with(Prefix: "xor.")); // Added in 3.9
392
393 if (Name.consume_front(Prefix: "mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
396 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
397 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
398 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
399 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front(Prefix: "maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
404 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
405 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
406 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
407 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
408 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
409 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
410 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
411 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
412 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
413 Name.starts_with(Prefix: "vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with(Prefix: "broadcastm") || // Added in 6.0
420 Name.starts_with(Prefix: "cmp.p") || // Added in 12.0
421 Name.starts_with(Prefix: "cvtb2mask.") || // Added in 7.0
422 Name.starts_with(Prefix: "cvtd2mask.") || // Added in 7.0
423 Name.starts_with(Prefix: "cvtmask2") || // Added in 5.0
424 Name.starts_with(Prefix: "cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with(Prefix: "cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with(Prefix: "kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with(Prefix: "padds.") || // Added in 8.0
437 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.9
438 Name.starts_with(Prefix: "prol") || // Added in 8.0
439 Name.starts_with(Prefix: "pror") || // Added in 8.0
440 Name.starts_with(Prefix: "psll.dq") || // Added in 3.9
441 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.9
442 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
443 Name.starts_with(Prefix: "ptestm") || // Added in 6.0
444 Name.starts_with(Prefix: "ptestnm") || // Added in 6.0
445 Name.starts_with(Prefix: "storent.") || // Added in 3.9
446 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 7.0
447 Name.starts_with(Prefix: "vpshld.") || // Added in 8.0
448 Name.starts_with(Prefix: "vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front(Prefix: "fma."))
452 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
453 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
454 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
455 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
456 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front(Prefix: "fma4."))
459 return Name.starts_with(Prefix: "vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front(Prefix: "sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front(Prefix: "sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with(Prefix: "padds.") || // Added in 8.0
483 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
484 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
485 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with(Prefix: "pshuf") || // Added in 3.9
492 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
493 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
494 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
495 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
496 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front(Prefix: "sse41."))
503 return (Name.starts_with(Prefix: "blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with(Prefix: "pmovsx") || // Added in 3.8
515 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front(Prefix: "sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front(Prefix: "sse4a."))
522 return Name.starts_with(Prefix: "movnt."); // Added in 3.9
523
524 if (Name.consume_front(Prefix: "ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front(Prefix: "xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with(Prefix: "vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with(Prefix: "vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with(Prefix: "vcvtph2ps.")); // Added in 11.0
542}
543
544static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front(Prefix: "x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(GV: F);
561 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
562 id: Intrinsic::x86_rdtscp);
563 return true;
564 }
565
566 Intrinsic::ID ID;
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front(Prefix: "sse41.ptest")) { // Added in 3.2
570 ID = StringSwitch<Intrinsic::ID>(Name)
571 .Case(S: "c", Value: Intrinsic::x86_sse41_ptestc)
572 .Case(S: "z", Value: Intrinsic::x86_sse41_ptestz)
573 .Case(S: "nzc", Value: Intrinsic::x86_sse41_ptestnzc)
574 .Default(Value: Intrinsic::not_intrinsic);
575 if (ID != Intrinsic::not_intrinsic)
576 return upgradePTESTIntrinsic(F, IID: ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
585 ID = StringSwitch<Intrinsic::ID>(Name)
586 .Case(S: "sse41.insertps", Value: Intrinsic::x86_sse41_insertps)
587 .Case(S: "sse41.dppd", Value: Intrinsic::x86_sse41_dppd)
588 .Case(S: "sse41.dpps", Value: Intrinsic::x86_sse41_dpps)
589 .Case(S: "sse41.mpsadbw", Value: Intrinsic::x86_sse41_mpsadbw)
590 .Case(S: "avx.dp.ps.256", Value: Intrinsic::x86_avx_dp_ps_256)
591 .Case(S: "avx2.mpsadbw", Value: Intrinsic::x86_avx2_mpsadbw)
592 .Default(Value: Intrinsic::not_intrinsic);
593 if (ID != Intrinsic::not_intrinsic)
594 return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn);
595
596 if (Name.consume_front(Prefix: "avx512.")) {
597 if (Name.consume_front(Prefix: "mask.cmp.")) {
598 // Added in 7.0
599 ID = StringSwitch<Intrinsic::ID>(Name)
600 .Case(S: "pd.128", Value: Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case(S: "pd.256", Value: Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case(S: "pd.512", Value: Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case(S: "ps.128", Value: Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case(S: "ps.256", Value: Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case(S: "ps.512", Value: Intrinsic::x86_avx512_mask_cmp_ps_512)
606 .Default(Value: Intrinsic::not_intrinsic);
607 if (ID != Intrinsic::not_intrinsic)
608 return upgradeX86MaskedFPCompare(F, IID: ID, NewFn);
609 } else if (Name.starts_with(Prefix: "vpdpbusd.") ||
610 Name.starts_with(Prefix: "vpdpbusds.")) {
611 // Added in 21.1
612 ID = StringSwitch<Intrinsic::ID>(Name)
613 .Case(S: "vpdpbusd.128", Value: Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case(S: "vpdpbusd.256", Value: Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case(S: "vpdpbusd.512", Value: Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case(S: "vpdpbusds.128", Value: Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case(S: "vpdpbusds.256", Value: Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case(S: "vpdpbusds.512", Value: Intrinsic::x86_avx512_vpdpbusds_512)
619 .Default(Value: Intrinsic::not_intrinsic);
620 if (ID != Intrinsic::not_intrinsic)
621 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
622 } else if (Name.starts_with(Prefix: "vpdpwssd.") ||
623 Name.starts_with(Prefix: "vpdpwssds.")) {
624 // Added in 21.1
625 ID = StringSwitch<Intrinsic::ID>(Name)
626 .Case(S: "vpdpwssd.128", Value: Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case(S: "vpdpwssd.256", Value: Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case(S: "vpdpwssd.512", Value: Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case(S: "vpdpwssds.128", Value: Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case(S: "vpdpwssds.256", Value: Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case(S: "vpdpwssds.512", Value: Intrinsic::x86_avx512_vpdpwssds_512)
632 .Default(Value: Intrinsic::not_intrinsic);
633 if (ID != Intrinsic::not_intrinsic)
634 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front(Prefix: "avx2.")) {
640 if (Name.consume_front(Prefix: "vpdpb")) {
641 // Added in 21.1
642 ID = StringSwitch<Intrinsic::ID>(Name)
643 .Case(S: "ssd.128", Value: Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case(S: "ssd.256", Value: Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case(S: "ssds.128", Value: Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case(S: "ssds.256", Value: Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpbuuds_256)
655 .Default(Value: Intrinsic::not_intrinsic);
656 if (ID != Intrinsic::not_intrinsic)
657 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
658 } else if (Name.consume_front(Prefix: "vpdpw")) {
659 // Added in 21.1
660 ID = StringSwitch<Intrinsic::ID>(Name)
661 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case(S: "usd.128", Value: Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case(S: "usd.256", Value: Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case(S: "usds.128", Value: Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case(S: "usds.256", Value: Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpwuuds_256)
673 .Default(Value: Intrinsic::not_intrinsic);
674 if (ID != Intrinsic::not_intrinsic)
675 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front(Prefix: "avx10.")) {
681 if (Name.consume_front(Prefix: "vpdpb")) {
682 // Added in 21.1
683 ID = StringSwitch<Intrinsic::ID>(Name)
684 .Case(S: "ssd.512", Value: Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case(S: "ssds.512", Value: Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpbuuds_512)
690 .Default(Value: Intrinsic::not_intrinsic);
691 if (ID != Intrinsic::not_intrinsic)
692 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
693 } else if (Name.consume_front(Prefix: "vpdpw")) {
694 ID = StringSwitch<Intrinsic::ID>(Name)
695 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case(S: "usd.512", Value: Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case(S: "usds.512", Value: Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpwuuds_512)
701 .Default(Value: Intrinsic::not_intrinsic);
702 if (ID != Intrinsic::not_intrinsic)
703 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front(Prefix: "avx512bf16.")) {
709 // Added in 9.0
710 ID = StringSwitch<Intrinsic::ID>(Name)
711 .Case(S: "cvtne2ps2bf16.128",
712 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case(S: "cvtne2ps2bf16.256",
714 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case(S: "cvtne2ps2bf16.512",
716 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case(S: "mask.cvtneps2bf16.128",
718 Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case(S: "cvtneps2bf16.256",
720 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case(S: "cvtneps2bf16.512",
722 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
723 .Default(Value: Intrinsic::not_intrinsic);
724 if (ID != Intrinsic::not_intrinsic)
725 return upgradeX86BF16Intrinsic(F, IID: ID, NewFn);
726
727 // Added in 9.0
728 ID = StringSwitch<Intrinsic::ID>(Name)
729 .Case(S: "dpbf16ps.128", Value: Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case(S: "dpbf16ps.256", Value: Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case(S: "dpbf16ps.512", Value: Intrinsic::x86_avx512bf16_dpbf16ps_512)
732 .Default(Value: Intrinsic::not_intrinsic);
733 if (ID != Intrinsic::not_intrinsic)
734 return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front(Prefix: "xop.")) {
739 Intrinsic::ID ID = Intrinsic::not_intrinsic;
740 if (Name.starts_with(Prefix: "vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(i: 2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
758 ID = StringSwitch<Intrinsic::ID>(Name)
759 .Case(S: "vfrcz.ss", Value: Intrinsic::x86_xop_vfrcz_ss)
760 .Case(S: "vfrcz.sd", Value: Intrinsic::x86_xop_vfrcz_sd)
761 .Default(Value: Intrinsic::not_intrinsic);
762
763 if (ID != Intrinsic::not_intrinsic) {
764 rename(GV: F);
765 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
773 id: Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
782static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with(Prefix: "rbit")) {
786 // '(arm|aarch64).rbit'.
787 NewFn = Intrinsic::getOrInsertDeclaration(
788 M: F->getParent(), id: Intrinsic::bitreverse, OverloadTys: F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
794 NewFn = Intrinsic::getOrInsertDeclaration(
795 M: F->getParent(), id: Intrinsic::thread_pointer, OverloadTys: F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front(Prefix: "neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front(Prefix: "bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
806 Intrinsic::ID ID =
807 StringSwitch<Intrinsic::ID>(Name)
808 .Cases(CaseStrings: {"v2f32.v8i8", "v4f32.v16i8"},
809 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
811 .Default(Value: Intrinsic::not_intrinsic);
812 if (ID != Intrinsic::not_intrinsic) {
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 ._M_elems: {F->getReturnType(),
819 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, OverloadTys: Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front(Prefix: "bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(Suffix: ".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
832 Intrinsic::ID ID =
833 StringSwitch<Intrinsic::ID>(Name)
834 .Case(S: "mla",
835 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case(S: "lalb",
838 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case(S: "lalt",
841 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
843 .Default(Value: Intrinsic::not_intrinsic);
844 if (ID != Intrinsic::not_intrinsic) {
845 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
860 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
861 .StartsWith(S: "vclz.", Value: Intrinsic::ctlz)
862 .StartsWith(S: "vcnt.", Value: Intrinsic::ctpop)
863 .StartsWith(S: "vqadds.", Value: Intrinsic::sadd_sat)
864 .StartsWith(S: "vqaddu.", Value: Intrinsic::uadd_sat)
865 .StartsWith(S: "vqsubs.", Value: Intrinsic::ssub_sat)
866 .StartsWith(S: "vqsubu.", Value: Intrinsic::usub_sat)
867 .StartsWith(S: "vrinta.", Value: Intrinsic::round)
868 .StartsWith(S: "vrintn.", Value: Intrinsic::roundeven)
869 .StartsWith(S: "vrintm.", Value: Intrinsic::floor)
870 .StartsWith(S: "vrintp.", Value: Intrinsic::ceil)
871 .StartsWith(S: "vrintx.", Value: Intrinsic::rint)
872 .StartsWith(S: "vrintz.", Value: Intrinsic::trunc)
873 .Default(Value: Intrinsic::not_intrinsic);
874 if (ID != Intrinsic::not_intrinsic) {
875 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
876 OverloadTys: F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front(Prefix: "vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
883 SmallVector<StringRef, 2> Groups;
884 if (vstRegex.match(String: Name, Matches: &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
896 NewFn = Intrinsic::getOrInsertDeclaration(
897 M: F->getParent(), id: StoreInts[fArgs.size() - 3], OverloadTys: Tys);
898 else
899 NewFn = Intrinsic::getOrInsertDeclaration(
900 M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], OverloadTys: Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front(Prefix: "mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(GV: F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with(Prefix: "vrintn.v")) {
922 NewFn = Intrinsic::getOrInsertDeclaration(
923 M: F->getParent(), id: Intrinsic::roundeven, OverloadTys: F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(Suffix: ".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(Suffix: ".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front(Prefix: "vldr.gather.");
937 if (IsGather || Name.consume_front(Prefix: "vstr.scatter.")) {
938 if (Name.consume_front(Prefix: "base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front(Prefix: "wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front(Prefix: "offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front(Prefix: "cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
974 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
975 .StartsWith(S: "frintn", Value: Intrinsic::roundeven)
976 .StartsWith(S: "rbit", Value: Intrinsic::bitreverse)
977 .Default(Value: Intrinsic::not_intrinsic);
978 if (ID != Intrinsic::not_intrinsic) {
979 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
980 OverloadTys: F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with(Prefix: "addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
990 NewFn = Intrinsic::getOrInsertDeclaration(
991 M: F->getParent(), id: Intrinsic::aarch64_neon_faddp, OverloadTys: Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with(Prefix: "bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front(Prefix: "sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front(Prefix: "bf")) {
1007 if (Name.consume_back(Suffix: ".lane")) {
1008 // 'aarch64.sve.bf*.lane'.
1009 Intrinsic::ID ID =
1010 StringSwitch<Intrinsic::ID>(Name)
1011 .Case(S: "dot", Value: Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .Case(S: "mlalb", Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .Case(S: "mlalt", Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1014 .Default(Value: Intrinsic::not_intrinsic);
1015 if (ID != Intrinsic::not_intrinsic) {
1016 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1017 return true;
1018 }
1019 return false; // No other 'aarch64.sve.bf*.lane'.
1020 }
1021 return false; // No other 'aarch64.sve.bf*'.
1022 }
1023
1024 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1025 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1026 NewFn = nullptr;
1027 return true;
1028 }
1029
1030 if (Name.consume_front(Prefix: "addqv")) {
1031 // 'aarch64.sve.addqv'.
1032 if (!F->getReturnType()->isFPOrFPVectorTy())
1033 return false;
1034
1035 auto Args = F->getFunctionType()->params();
1036 Type *Tys[] = {F->getReturnType(), Args[1]};
1037 NewFn = Intrinsic::getOrInsertDeclaration(
1038 M: F->getParent(), id: Intrinsic::aarch64_sve_faddqv, OverloadTys: Tys);
1039 return true;
1040 }
1041
1042 if (Name.consume_front(Prefix: "ld")) {
1043 // 'aarch64.sve.ld*'.
1044 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.match(String: Name)) {
1046 Type *ScalarTy =
1047 cast<VectorType>(Val: F->getReturnType())->getElementType();
1048 ElementCount EC =
1049 cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount();
1050 Type *Ty = VectorType::get(ElementType: ScalarTy, EC);
1051 static const Intrinsic::ID LoadIDs[] = {
1052 Intrinsic::aarch64_sve_ld2_sret,
1053 Intrinsic::aarch64_sve_ld3_sret,
1054 Intrinsic::aarch64_sve_ld4_sret,
1055 };
1056 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1057 id: LoadIDs[Name[0] - '2'], OverloadTys: Ty);
1058 return true;
1059 }
1060 return false; // No other 'aarch64.sve.ld*'.
1061 }
1062
1063 if (Name.consume_front(Prefix: "tuple.")) {
1064 // 'aarch64.sve.tuple.*'.
1065 if (Name.starts_with(Prefix: "get")) {
1066 // 'aarch64.sve.tuple.get*'.
1067 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1068 NewFn = Intrinsic::getOrInsertDeclaration(
1069 M: F->getParent(), id: Intrinsic::vector_extract, OverloadTys: Tys);
1070 return true;
1071 }
1072
1073 if (Name.starts_with(Prefix: "set")) {
1074 // 'aarch64.sve.tuple.set*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {Args[0], Args[2], Args[1]};
1077 NewFn = Intrinsic::getOrInsertDeclaration(
1078 M: F->getParent(), id: Intrinsic::vector_insert, OverloadTys: Tys);
1079 return true;
1080 }
1081
1082 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1083 if (CreateTupleRegex.match(String: Name)) {
1084 // 'aarch64.sve.tuple.create*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {F->getReturnType(), Args[1]};
1087 NewFn = Intrinsic::getOrInsertDeclaration(
1088 M: F->getParent(), id: Intrinsic::vector_insert, OverloadTys: Tys);
1089 return true;
1090 }
1091 return false; // No other 'aarch64.sve.tuple.*'.
1092 }
1093
1094 if (Name.starts_with(Prefix: "rev.nxv")) {
1095 // 'aarch64.sve.rev.<Ty>'
1096 NewFn = Intrinsic::getOrInsertDeclaration(
1097 M: F->getParent(), id: Intrinsic::vector_reverse, OverloadTys: F->getReturnType());
1098 return true;
1099 }
1100
1101 return false; // No other 'aarch64.sve.*'.
1102 }
1103 }
1104 return false; // No other 'arm.*', 'aarch64.*'.
1105}
1106
1107static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
1108 StringRef Name) {
1109 if (Name.consume_front(Prefix: "cp.async.bulk.tensor.g2s.")) {
1110 Intrinsic::ID ID =
1111 StringSwitch<Intrinsic::ID>(Name)
1112 .Case(S: "im2col.3d",
1113 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1114 .Case(S: "im2col.4d",
1115 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1116 .Case(S: "im2col.5d",
1117 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1118 .Case(S: "tile.1d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1119 .Case(S: "tile.2d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1120 .Case(S: "tile.3d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1121 .Case(S: "tile.4d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1122 .Case(S: "tile.5d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1123 .Default(Value: Intrinsic::not_intrinsic);
1124
1125 if (ID == Intrinsic::not_intrinsic)
1126 return ID;
1127
1128 // These intrinsics may need upgrade for two reasons:
1129 // (1) When the address-space of the first argument is shared[AS=3]
1130 // (and we upgrade it to use shared_cluster address-space[AS=7])
1131 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1132 NVPTXAS::ADDRESS_SPACE_SHARED)
1133 return ID;
1134
1135 // (2) When there are only two boolean flag arguments at the end:
1136 //
1137 // The last three parameters of the older version of these
1138 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1139 //
1140 // The newer version reads as:
1141 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1142 //
1143 // So, when the type of the [N-3]rd argument is "not i1", then
1144 // it is the older version and we need to upgrade.
1145 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1146 Type *ArgType = F->getFunctionType()->getParamType(i: FlagStartIndex);
1147 if (!ArgType->isIntegerTy(Bitwidth: 1))
1148 return ID;
1149 }
1150
1151 return Intrinsic::not_intrinsic;
1152}
1153
1154static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1155 StringRef Name) {
1156 if (Name.consume_front(Prefix: "mapa.shared.cluster"))
1157 if (F->getReturnType()->getPointerAddressSpace() ==
1158 NVPTXAS::ADDRESS_SPACE_SHARED)
1159 return Intrinsic::nvvm_mapa_shared_cluster;
1160
1161 if (Name.consume_front(Prefix: "cp.async.bulk.")) {
1162 Intrinsic::ID ID =
1163 StringSwitch<Intrinsic::ID>(Name)
1164 .Case(S: "global.to.shared.cluster",
1165 Value: Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1166 .Case(S: "shared.cta.to.cluster",
1167 Value: Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1168 .Default(Value: Intrinsic::not_intrinsic);
1169
1170 if (ID != Intrinsic::not_intrinsic)
1171 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1172 NVPTXAS::ADDRESS_SPACE_SHARED)
1173 return ID;
1174 }
1175
1176 return Intrinsic::not_intrinsic;
1177}
1178
1179static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1180 if (Name.consume_front(Prefix: "fma.rn."))
1181 return StringSwitch<Intrinsic::ID>(Name)
1182 .Case(S: "bf16", Value: Intrinsic::nvvm_fma_rn_bf16)
1183 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fma_rn_bf16x2)
1184 .Case(S: "relu.bf16", Value: Intrinsic::nvvm_fma_rn_relu_bf16)
1185 .Case(S: "relu.bf16x2", Value: Intrinsic::nvvm_fma_rn_relu_bf16x2)
1186 .Default(Value: Intrinsic::not_intrinsic);
1187
1188 if (Name.consume_front(Prefix: "fmax."))
1189 return StringSwitch<Intrinsic::ID>(Name)
1190 .Case(S: "bf16", Value: Intrinsic::nvvm_fmax_bf16)
1191 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmax_bf16x2)
1192 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmax_ftz_bf16)
1193 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_bf16x2)
1194 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16)
1195 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1196 .Case(S: "ftz.nan.xorsign.abs.bf16",
1197 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1198 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1199 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1200 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1201 .Case(S: "ftz.xorsign.abs.bf16x2",
1202 Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1203 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmax_nan_bf16)
1204 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmax_nan_bf16x2)
1205 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1206 .Case(S: "nan.xorsign.abs.bf16x2",
1207 Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1208 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1209 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1210 .Default(Value: Intrinsic::not_intrinsic);
1211
1212 if (Name.consume_front(Prefix: "fmin."))
1213 return StringSwitch<Intrinsic::ID>(Name)
1214 .Case(S: "bf16", Value: Intrinsic::nvvm_fmin_bf16)
1215 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmin_bf16x2)
1216 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmin_ftz_bf16)
1217 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_bf16x2)
1218 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16)
1219 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1220 .Case(S: "ftz.nan.xorsign.abs.bf16",
1221 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1222 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1223 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1224 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1225 .Case(S: "ftz.xorsign.abs.bf16x2",
1226 Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1227 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmin_nan_bf16)
1228 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmin_nan_bf16x2)
1229 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1230 .Case(S: "nan.xorsign.abs.bf16x2",
1231 Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1232 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1233 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1234 .Default(Value: Intrinsic::not_intrinsic);
1235
1236 if (Name.consume_front(Prefix: "neg."))
1237 return StringSwitch<Intrinsic::ID>(Name)
1238 .Case(S: "bf16", Value: Intrinsic::nvvm_neg_bf16)
1239 .Case(S: "bf16x2", Value: Intrinsic::nvvm_neg_bf16x2)
1240 .Default(Value: Intrinsic::not_intrinsic);
1241
1242 return Intrinsic::not_intrinsic;
1243}
1244
1245static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1246 return Name.consume_front(Prefix: "local") || Name.consume_front(Prefix: "shared") ||
1247 Name.consume_front(Prefix: "global") || Name.consume_front(Prefix: "constant") ||
1248 Name.consume_front(Prefix: "param");
1249}
1250
1251static bool convertIntrinsicValidType(StringRef Name,
1252 const FunctionType *FuncTy) {
1253 Type *HalfTy = Type::getHalfTy(C&: FuncTy->getContext());
1254 if (Name.starts_with(Prefix: "to.fp16")) {
1255 return CastInst::castIsValid(op: Instruction::FPTrunc, SrcTy: FuncTy->getParamType(i: 0),
1256 DstTy: HalfTy) &&
1257 CastInst::castIsValid(op: Instruction::BitCast, SrcTy: HalfTy,
1258 DstTy: FuncTy->getReturnType());
1259 }
1260
1261 if (Name.starts_with(Prefix: "from.fp16")) {
1262 return CastInst::castIsValid(op: Instruction::BitCast, SrcTy: FuncTy->getParamType(i: 0),
1263 DstTy: HalfTy) &&
1264 CastInst::castIsValid(op: Instruction::FPExt, SrcTy: HalfTy,
1265 DstTy: FuncTy->getReturnType());
1266 }
1267
1268 return false;
1269}
1270
1271static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1272 bool CanUpgradeDebugIntrinsicsToRecords) {
1273 assert(F && "Illegal to upgrade a non-existent Function.");
1274
1275 StringRef Name = F->getName();
1276
1277 // Quickly eliminate it, if it's not a candidate.
1278 if (!Name.consume_front(Prefix: "llvm.") || Name.empty())
1279 return false;
1280
1281 switch (Name[0]) {
1282 default: break;
1283 case 'a': {
1284 bool IsArm = Name.consume_front(Prefix: "arm.");
1285 if (IsArm || Name.consume_front(Prefix: "aarch64.")) {
1286 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1287 return true;
1288 break;
1289 }
1290
1291 if (Name.consume_front(Prefix: "amdgcn.")) {
1292 if (Name == "alignbit") {
1293 // Target specific intrinsic became redundant
1294 NewFn = Intrinsic::getOrInsertDeclaration(
1295 M: F->getParent(), id: Intrinsic::fshr, OverloadTys: {F->getReturnType()});
1296 return true;
1297 }
1298
1299 if (Name.consume_front(Prefix: "atomic.")) {
1300 if (Name.starts_with(Prefix: "inc") || Name.starts_with(Prefix: "dec") ||
1301 Name.starts_with(Prefix: "cond.sub") || Name.starts_with(Prefix: "csub")) {
1302 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1303 // and usub_sat so there's no new declaration.
1304 NewFn = nullptr;
1305 return true;
1306 }
1307 break; // No other 'amdgcn.atomic.*'
1308 }
1309
1310 switch (F->getIntrinsicID()) {
1311 default:
1312 break;
1313 // Legacy wmma iu intrinsics without the optional clamp operand.
1314 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1315 if (F->arg_size() == 7) {
1316 NewFn = nullptr;
1317 return true;
1318 }
1319 break;
1320 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1321 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1322 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1323 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1324 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1325 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1326 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1327 if (F->arg_size() == 8) {
1328 NewFn = nullptr;
1329 return true;
1330 }
1331 break;
1332 }
1333
1334 if (Name.consume_front(Prefix: "ds.") || Name.consume_front(Prefix: "global.atomic.") ||
1335 Name.consume_front(Prefix: "flat.atomic.")) {
1336 if (Name.starts_with(Prefix: "fadd") ||
1337 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1338 (Name.starts_with(Prefix: "fmin") && !Name.starts_with(Prefix: "fmin.num")) ||
1339 (Name.starts_with(Prefix: "fmax") && !Name.starts_with(Prefix: "fmax.num"))) {
1340 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1341 // declaration.
1342 NewFn = nullptr;
1343 return true;
1344 }
1345 }
1346
1347 if (Name.starts_with(Prefix: "ldexp.")) {
1348 // Target specific intrinsic became redundant
1349 NewFn = Intrinsic::getOrInsertDeclaration(
1350 M: F->getParent(), id: Intrinsic::ldexp,
1351 OverloadTys: {F->getReturnType(), F->getArg(i: 1)->getType()});
1352 return true;
1353 }
1354 break; // No other 'amdgcn.*'
1355 }
1356
1357 break;
1358 }
1359 case 'c': {
1360 if (F->arg_size() == 1) {
1361 if (Name.consume_front(Prefix: "convert.")) {
1362 if (convertIntrinsicValidType(Name, FuncTy: F->getFunctionType())) {
1363 NewFn = nullptr;
1364 return true;
1365 }
1366 }
1367
1368 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1369 .StartsWith(S: "ctlz.", Value: Intrinsic::ctlz)
1370 .StartsWith(S: "cttz.", Value: Intrinsic::cttz)
1371 .Default(Value: Intrinsic::not_intrinsic);
1372 if (ID != Intrinsic::not_intrinsic) {
1373 rename(GV: F);
1374 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1375 OverloadTys: F->arg_begin()->getType());
1376 return true;
1377 }
1378 }
1379
1380 if (F->arg_size() == 2 && Name == "coro.end") {
1381 rename(GV: F);
1382 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1383 id: Intrinsic::coro_end);
1384 return true;
1385 }
1386
1387 break;
1388 }
1389 case 'd':
1390 if (Name.consume_front(Prefix: "dbg.")) {
1391 // Mark debug intrinsics for upgrade to new debug format.
1392 if (CanUpgradeDebugIntrinsicsToRecords) {
1393 if (Name == "addr" || Name == "value" || Name == "assign" ||
1394 Name == "declare" || Name == "label") {
1395 // There's no function to replace these with.
1396 NewFn = nullptr;
1397 // But we do want these to get upgraded.
1398 return true;
1399 }
1400 }
1401 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1402 // converted to DbgVariableRecords later.
1403 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1404 rename(GV: F);
1405 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1406 id: Intrinsic::dbg_value);
1407 return true;
1408 }
1409 break; // No other 'dbg.*'.
1410 }
1411 break;
1412 case 'e':
1413 if (Name.consume_front(Prefix: "experimental.vector.")) {
1414 Intrinsic::ID ID =
1415 StringSwitch<Intrinsic::ID>(Name)
1416 // Skip over extract.last.active, otherwise it will be 'upgraded'
1417 // to a regular vector extract which is a different operation.
1418 .StartsWith(S: "extract.last.active.", Value: Intrinsic::not_intrinsic)
1419 .StartsWith(S: "extract.", Value: Intrinsic::vector_extract)
1420 .StartsWith(S: "insert.", Value: Intrinsic::vector_insert)
1421 .StartsWith(S: "reverse.", Value: Intrinsic::vector_reverse)
1422 .StartsWith(S: "interleave2.", Value: Intrinsic::vector_interleave2)
1423 .StartsWith(S: "deinterleave2.", Value: Intrinsic::vector_deinterleave2)
1424 .StartsWith(S: "partial.reduce.add",
1425 Value: Intrinsic::vector_partial_reduce_add)
1426 .Default(Value: Intrinsic::not_intrinsic);
1427 if (ID != Intrinsic::not_intrinsic) {
1428 const auto *FT = F->getFunctionType();
1429 SmallVector<Type *, 2> Tys;
1430 if (ID == Intrinsic::vector_extract ||
1431 ID == Intrinsic::vector_interleave2)
1432 // Extracting overloads the return type.
1433 Tys.push_back(Elt: FT->getReturnType());
1434 if (ID != Intrinsic::vector_interleave2)
1435 Tys.push_back(Elt: FT->getParamType(i: 0));
1436 if (ID == Intrinsic::vector_insert ||
1437 ID == Intrinsic::vector_partial_reduce_add)
1438 // Inserting overloads the inserted type.
1439 Tys.push_back(Elt: FT->getParamType(i: 1));
1440 rename(GV: F);
1441 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, OverloadTys: Tys);
1442 return true;
1443 }
1444
1445 if (Name.consume_front(Prefix: "reduce.")) {
1446 SmallVector<StringRef, 2> Groups;
1447 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1448 if (R.match(String: Name, Matches: &Groups))
1449 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1450 .Case(S: "add", Value: Intrinsic::vector_reduce_add)
1451 .Case(S: "mul", Value: Intrinsic::vector_reduce_mul)
1452 .Case(S: "and", Value: Intrinsic::vector_reduce_and)
1453 .Case(S: "or", Value: Intrinsic::vector_reduce_or)
1454 .Case(S: "xor", Value: Intrinsic::vector_reduce_xor)
1455 .Case(S: "smax", Value: Intrinsic::vector_reduce_smax)
1456 .Case(S: "smin", Value: Intrinsic::vector_reduce_smin)
1457 .Case(S: "umax", Value: Intrinsic::vector_reduce_umax)
1458 .Case(S: "umin", Value: Intrinsic::vector_reduce_umin)
1459 .Case(S: "fmax", Value: Intrinsic::vector_reduce_fmax)
1460 .Case(S: "fmin", Value: Intrinsic::vector_reduce_fmin)
1461 .Default(Value: Intrinsic::not_intrinsic);
1462
1463 bool V2 = false;
1464 if (ID == Intrinsic::not_intrinsic) {
1465 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1466 Groups.clear();
1467 V2 = true;
1468 if (R2.match(String: Name, Matches: &Groups))
1469 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1470 .Case(S: "fadd", Value: Intrinsic::vector_reduce_fadd)
1471 .Case(S: "fmul", Value: Intrinsic::vector_reduce_fmul)
1472 .Default(Value: Intrinsic::not_intrinsic);
1473 }
1474 if (ID != Intrinsic::not_intrinsic) {
1475 rename(GV: F);
1476 auto Args = F->getFunctionType()->params();
1477 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1478 OverloadTys: {Args[V2 ? 1 : 0]});
1479 return true;
1480 }
1481 break; // No other 'expermental.vector.reduce.*'.
1482 }
1483
1484 if (Name.consume_front(Prefix: "splice"))
1485 return true;
1486 break; // No other 'experimental.vector.*'.
1487 }
1488 if (Name.consume_front(Prefix: "experimental.stepvector.")) {
1489 Intrinsic::ID ID = Intrinsic::stepvector;
1490 rename(GV: F);
1491 NewFn = Intrinsic::getOrInsertDeclaration(
1492 M: F->getParent(), id: ID, OverloadTys: F->getFunctionType()->getReturnType());
1493 return true;
1494 }
1495 break; // No other 'e*'.
1496 case 'f':
1497 if (Name.starts_with(Prefix: "flt.rounds")) {
1498 rename(GV: F);
1499 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1500 id: Intrinsic::get_rounding);
1501 return true;
1502 }
1503 break;
1504 case 'i':
1505 if (Name.starts_with(Prefix: "invariant.group.barrier")) {
1506 // Rename invariant.group.barrier to launder.invariant.group
1507 auto Args = F->getFunctionType()->params();
1508 Type* ObjectPtr[1] = {Args[0]};
1509 rename(GV: F);
1510 NewFn = Intrinsic::getOrInsertDeclaration(
1511 M: F->getParent(), id: Intrinsic::launder_invariant_group, OverloadTys: ObjectPtr);
1512 return true;
1513 }
1514 break;
1515 case 'l':
1516 if ((Name.starts_with(Prefix: "lifetime.start") ||
1517 Name.starts_with(Prefix: "lifetime.end")) &&
1518 F->arg_size() == 2) {
1519 Intrinsic::ID IID = Name.starts_with(Prefix: "lifetime.start")
1520 ? Intrinsic::lifetime_start
1521 : Intrinsic::lifetime_end;
1522 rename(GV: F);
1523 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1524 OverloadTys: F->getArg(i: 0)->getType());
1525 return true;
1526 }
1527 break;
1528 case 'm': {
1529 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1530 // alignment parameter to embedding the alignment as an attribute of
1531 // the pointer args.
1532 if (unsigned ID = StringSwitch<unsigned>(Name)
1533 .StartsWith(S: "memcpy.", Value: Intrinsic::memcpy)
1534 .StartsWith(S: "memmove.", Value: Intrinsic::memmove)
1535 .Default(Value: 0)) {
1536 if (F->arg_size() == 5) {
1537 rename(GV: F);
1538 // Get the types of dest, src, and len
1539 ArrayRef<Type *> ParamTypes =
1540 F->getFunctionType()->params().slice(N: 0, M: 3);
1541 NewFn =
1542 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, OverloadTys: ParamTypes);
1543 return true;
1544 }
1545 }
1546 if (Name.starts_with(Prefix: "memset.") && F->arg_size() == 5) {
1547 rename(GV: F);
1548 // Get the types of dest, and len
1549 const auto *FT = F->getFunctionType();
1550 Type *ParamTypes[2] = {
1551 FT->getParamType(i: 0), // Dest
1552 FT->getParamType(i: 2) // len
1553 };
1554 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1555 id: Intrinsic::memset, OverloadTys: ParamTypes);
1556 return true;
1557 }
1558
1559 unsigned MaskedID =
1560 StringSwitch<unsigned>(Name)
1561 .StartsWith(S: "masked.load", Value: Intrinsic::masked_load)
1562 .StartsWith(S: "masked.gather", Value: Intrinsic::masked_gather)
1563 .StartsWith(S: "masked.store", Value: Intrinsic::masked_store)
1564 .StartsWith(S: "masked.scatter", Value: Intrinsic::masked_scatter)
1565 .Default(Value: 0);
1566 if (MaskedID && F->arg_size() == 4) {
1567 rename(GV: F);
1568 if (MaskedID == Intrinsic::masked_load ||
1569 MaskedID == Intrinsic::masked_gather) {
1570 NewFn = Intrinsic::getOrInsertDeclaration(
1571 M: F->getParent(), id: MaskedID,
1572 OverloadTys: {F->getReturnType(), F->getArg(i: 0)->getType()});
1573 return true;
1574 }
1575 NewFn = Intrinsic::getOrInsertDeclaration(
1576 M: F->getParent(), id: MaskedID,
1577 OverloadTys: {F->getArg(i: 0)->getType(), F->getArg(i: 1)->getType()});
1578 return true;
1579 }
1580 break;
1581 }
1582 case 'n': {
1583 if (Name.consume_front(Prefix: "nvvm.")) {
1584 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1585 if (F->arg_size() == 1) {
1586 Intrinsic::ID IID =
1587 StringSwitch<Intrinsic::ID>(Name)
1588 .Cases(CaseStrings: {"brev32", "brev64"}, Value: Intrinsic::bitreverse)
1589 .Case(S: "clz.i", Value: Intrinsic::ctlz)
1590 .Case(S: "popc.i", Value: Intrinsic::ctpop)
1591 .Default(Value: Intrinsic::not_intrinsic);
1592 if (IID != Intrinsic::not_intrinsic) {
1593 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1594 OverloadTys: {F->getReturnType()});
1595 return true;
1596 }
1597 } else if (F->arg_size() == 2) {
1598 Intrinsic::ID IID =
1599 StringSwitch<Intrinsic::ID>(Name)
1600 .Cases(CaseStrings: {"max.s", "max.i", "max.ll"}, Value: Intrinsic::smax)
1601 .Cases(CaseStrings: {"min.s", "min.i", "min.ll"}, Value: Intrinsic::smin)
1602 .Cases(CaseStrings: {"max.us", "max.ui", "max.ull"}, Value: Intrinsic::umax)
1603 .Cases(CaseStrings: {"min.us", "min.ui", "min.ull"}, Value: Intrinsic::umin)
1604 .Default(Value: Intrinsic::not_intrinsic);
1605 if (IID != Intrinsic::not_intrinsic) {
1606 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1607 OverloadTys: {F->getReturnType()});
1608 return true;
1609 }
1610 }
1611
1612 // Check for nvvm intrinsics that need a return type adjustment.
1613 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1614 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1615 if (IID != Intrinsic::not_intrinsic) {
1616 NewFn = nullptr;
1617 return true;
1618 }
1619 }
1620
1621 // Upgrade Distributed Shared Memory Intrinsics
1622 Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1623 if (IID != Intrinsic::not_intrinsic) {
1624 rename(GV: F);
1625 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1626 return true;
1627 }
1628
1629 // Upgrade TMA copy G2S Intrinsics
1630 IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1631 if (IID != Intrinsic::not_intrinsic) {
1632 rename(GV: F);
1633 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1634 return true;
1635 }
1636
1637 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1638 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1639 //
1640 // TODO: We could add lohi.i2d.
1641 bool Expand = false;
1642 if (Name.consume_front(Prefix: "abs."))
1643 // nvvm.abs.{i,ii}
1644 Expand =
1645 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1646 else if (Name.consume_front(Prefix: "fabs."))
1647 // nvvm.fabs.{f,ftz.f,d}
1648 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1649 else if (Name.consume_front(Prefix: "ex2.approx."))
1650 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1651 Expand =
1652 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1653 else if (Name.consume_front(Prefix: "atomic.load."))
1654 // nvvm.atomic.load.add.{f32,f64}.p
1655 // nvvm.atomic.load.{inc,dec}.32.p
1656 Expand = StringSwitch<bool>(Name)
1657 .StartsWith(S: "add.f32.p", Value: true)
1658 .StartsWith(S: "add.f64.p", Value: true)
1659 .StartsWith(S: "inc.32.p", Value: true)
1660 .StartsWith(S: "dec.32.p", Value: true)
1661 .Default(Value: false);
1662 else if (Name.consume_front(Prefix: "bitcast."))
1663 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1664 Expand =
1665 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1666 else if (Name.consume_front(Prefix: "rotate."))
1667 // nvvm.rotate.{b32,b64,right.b64}
1668 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1669 else if (Name.consume_front(Prefix: "ptr.gen.to."))
1670 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1671 Expand = consumeNVVMPtrAddrSpace(Name);
1672 else if (Name.consume_front(Prefix: "ptr."))
1673 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1674 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(Prefix: ".to.gen");
1675 else if (Name.consume_front(Prefix: "ldg.global."))
1676 // nvvm.ldg.global.{i,p,f}
1677 Expand = (Name.starts_with(Prefix: "i.") || Name.starts_with(Prefix: "f.") ||
1678 Name.starts_with(Prefix: "p."));
1679 else
1680 Expand = StringSwitch<bool>(Name)
1681 .Case(S: "barrier0", Value: true)
1682 .Case(S: "barrier.n", Value: true)
1683 .Case(S: "barrier.sync.cnt", Value: true)
1684 .Case(S: "barrier.sync", Value: true)
1685 .Case(S: "barrier", Value: true)
1686 .Case(S: "bar.sync", Value: true)
1687 .Case(S: "barrier0.popc", Value: true)
1688 .Case(S: "barrier0.and", Value: true)
1689 .Case(S: "barrier0.or", Value: true)
1690 .Case(S: "clz.ll", Value: true)
1691 .Case(S: "popc.ll", Value: true)
1692 .Case(S: "h2f", Value: true)
1693 .Case(S: "swap.lo.hi.b64", Value: true)
1694 .Case(S: "tanh.approx.f32", Value: true)
1695 .Default(Value: false);
1696
1697 if (Expand) {
1698 NewFn = nullptr;
1699 return true;
1700 }
1701 break; // No other 'nvvm.*'.
1702 }
1703 break;
1704 }
1705 case 'o':
1706 if (Name.starts_with(Prefix: "objectsize.")) {
1707 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1708 if (F->arg_size() == 2 || F->arg_size() == 3) {
1709 rename(GV: F);
1710 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1711 id: Intrinsic::objectsize, OverloadTys: Tys);
1712 return true;
1713 }
1714 }
1715 break;
1716
1717 case 'p':
1718 if (Name.starts_with(Prefix: "ptr.annotation.") && F->arg_size() == 4) {
1719 rename(GV: F);
1720 NewFn = Intrinsic::getOrInsertDeclaration(
1721 M: F->getParent(), id: Intrinsic::ptr_annotation,
1722 OverloadTys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()});
1723 return true;
1724 }
1725 break;
1726
1727 case 'r': {
1728 if (Name.consume_front(Prefix: "riscv.")) {
1729 Intrinsic::ID ID;
1730 ID = StringSwitch<Intrinsic::ID>(Name)
1731 .Case(S: "aes32dsi", Value: Intrinsic::riscv_aes32dsi)
1732 .Case(S: "aes32dsmi", Value: Intrinsic::riscv_aes32dsmi)
1733 .Case(S: "aes32esi", Value: Intrinsic::riscv_aes32esi)
1734 .Case(S: "aes32esmi", Value: Intrinsic::riscv_aes32esmi)
1735 .Default(Value: Intrinsic::not_intrinsic);
1736 if (ID != Intrinsic::not_intrinsic) {
1737 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) {
1738 rename(GV: F);
1739 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1740 return true;
1741 }
1742 break; // No other applicable upgrades.
1743 }
1744
1745 ID = StringSwitch<Intrinsic::ID>(Name)
1746 .StartsWith(S: "sm4ks", Value: Intrinsic::riscv_sm4ks)
1747 .StartsWith(S: "sm4ed", Value: Intrinsic::riscv_sm4ed)
1748 .Default(Value: Intrinsic::not_intrinsic);
1749 if (ID != Intrinsic::not_intrinsic) {
1750 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) ||
1751 F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1752 rename(GV: F);
1753 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1754 return true;
1755 }
1756 break; // No other applicable upgrades.
1757 }
1758
1759 ID = StringSwitch<Intrinsic::ID>(Name)
1760 .StartsWith(S: "sha256sig0", Value: Intrinsic::riscv_sha256sig0)
1761 .StartsWith(S: "sha256sig1", Value: Intrinsic::riscv_sha256sig1)
1762 .StartsWith(S: "sha256sum0", Value: Intrinsic::riscv_sha256sum0)
1763 .StartsWith(S: "sha256sum1", Value: Intrinsic::riscv_sha256sum1)
1764 .StartsWith(S: "sm3p0", Value: Intrinsic::riscv_sm3p0)
1765 .StartsWith(S: "sm3p1", Value: Intrinsic::riscv_sm3p1)
1766 .Default(Value: Intrinsic::not_intrinsic);
1767 if (ID != Intrinsic::not_intrinsic) {
1768 if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1769 rename(GV: F);
1770 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1771 return true;
1772 }
1773 break; // No other applicable upgrades.
1774 }
1775
1776 // Replace llvm.riscv.clmul with llvm.clmul.
1777 if (Name == "clmul.i32" || Name == "clmul.i64") {
1778 NewFn = Intrinsic::getOrInsertDeclaration(
1779 M: F->getParent(), id: Intrinsic::clmul, OverloadTys: {F->getReturnType()});
1780 return true;
1781 }
1782
1783 break; // No other 'riscv.*' intrinsics
1784 }
1785 } break;
1786
1787 case 's':
1788 if (Name == "stackprotectorcheck") {
1789 NewFn = nullptr;
1790 return true;
1791 }
1792 break;
1793
1794 case 't':
1795 if (Name == "thread.pointer") {
1796 NewFn = Intrinsic::getOrInsertDeclaration(
1797 M: F->getParent(), id: Intrinsic::thread_pointer, OverloadTys: F->getReturnType());
1798 return true;
1799 }
1800 break;
1801
1802 case 'v': {
1803 if (Name == "var.annotation" && F->arg_size() == 4) {
1804 rename(GV: F);
1805 NewFn = Intrinsic::getOrInsertDeclaration(
1806 M: F->getParent(), id: Intrinsic::var_annotation,
1807 OverloadTys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}});
1808 return true;
1809 }
1810 if (Name.consume_front(Prefix: "vector.splice")) {
1811 if (Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"))
1812 break;
1813 return true;
1814 }
1815 break;
1816 }
1817
1818 case 'w':
1819 if (Name.consume_front(Prefix: "wasm.")) {
1820 Intrinsic::ID ID =
1821 StringSwitch<Intrinsic::ID>(Name)
1822 .StartsWith(S: "fma.", Value: Intrinsic::wasm_relaxed_madd)
1823 .StartsWith(S: "fms.", Value: Intrinsic::wasm_relaxed_nmadd)
1824 .StartsWith(S: "laneselect.", Value: Intrinsic::wasm_relaxed_laneselect)
1825 .Default(Value: Intrinsic::not_intrinsic);
1826 if (ID != Intrinsic::not_intrinsic) {
1827 rename(GV: F);
1828 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1829 OverloadTys: F->getReturnType());
1830 return true;
1831 }
1832
1833 if (Name.consume_front(Prefix: "dot.i8x16.i7x16.")) {
1834 ID = StringSwitch<Intrinsic::ID>(Name)
1835 .Case(S: "signed", Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1836 .Case(S: "add.signed",
1837 Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1838 .Default(Value: Intrinsic::not_intrinsic);
1839 if (ID != Intrinsic::not_intrinsic) {
1840 rename(GV: F);
1841 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1842 return true;
1843 }
1844 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1845 }
1846 break; // No other 'wasm.*'.
1847 }
1848 break;
1849
1850 case 'x':
1851 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1852 return true;
1853 }
1854
1855 auto *ST = dyn_cast<StructType>(Val: F->getReturnType());
1856 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1857 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1858 // Replace return type with literal non-packed struct. Only do this for
1859 // intrinsics declared to return a struct, not for intrinsics with
1860 // overloaded return type, in which case the exact struct type will be
1861 // mangled into the name.
1862 SmallVector<Intrinsic::IITDescriptor> Desc;
1863 Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc);
1864 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1865 auto *FT = F->getFunctionType();
1866 auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements());
1867 auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg());
1868 std::string Name = F->getName().str();
1869 rename(GV: F);
1870 NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
1871 N: Name, M: F->getParent());
1872
1873 // The new function may also need remangling.
1874 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn))
1875 NewFn = *Result;
1876 return true;
1877 }
1878 }
1879
1880 // Remangle our intrinsic since we upgrade the mangling
1881 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1882 if (Result != std::nullopt) {
1883 NewFn = *Result;
1884 return true;
1885 }
1886
1887 // This may not belong here. This function is effectively being overloaded
1888 // to both detect an intrinsic which needs upgrading, and to provide the
1889 // upgraded form of the intrinsic. We should perhaps have two separate
1890 // functions for this.
1891 return false;
1892}
1893
1894bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1895 bool CanUpgradeDebugIntrinsicsToRecords) {
1896 NewFn = nullptr;
1897 bool Upgraded =
1898 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1899
1900 // Upgrade intrinsic attributes. This does not change the function.
1901 if (NewFn)
1902 F = NewFn;
1903 if (Intrinsic::ID id = F->getIntrinsicID()) {
1904 // Only do this if the intrinsic signature is valid.
1905 SmallVector<Type *> OverloadTys;
1906 if (Intrinsic::getIntrinsicSignature(id, FT: F->getFunctionType(), OverloadTys))
1907 F->setAttributes(
1908 Intrinsic::getAttributes(C&: F->getContext(), id, FT: F->getFunctionType()));
1909 }
1910 return Upgraded;
1911}
1912
1913GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1914 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1915 GV->getName() == "llvm.global_dtors")) ||
1916 !GV->hasInitializer())
1917 return nullptr;
1918 ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType());
1919 if (!ATy)
1920 return nullptr;
1921 StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType());
1922 if (!STy || STy->getNumElements() != 2)
1923 return nullptr;
1924
1925 LLVMContext &C = GV->getContext();
1926 IRBuilder<> IRB(C);
1927 auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1),
1928 elts: IRB.getPtrTy());
1929 Constant *Init = GV->getInitializer();
1930 unsigned N = Init->getNumOperands();
1931 std::vector<Constant *> NewCtors(N);
1932 for (unsigned i = 0; i != N; ++i) {
1933 auto Ctor = cast<Constant>(Val: Init->getOperand(i));
1934 NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u),
1935 Vs: Ctor->getAggregateElement(Elt: 1),
1936 Vs: ConstantPointerNull::get(T: IRB.getPtrTy()));
1937 }
1938 Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors);
1939
1940 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1941 NewInit, GV->getName());
1942}
1943
1944// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1945// to byte shuffles.
1946static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1947 unsigned Shift) {
1948 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1949 unsigned NumElts = ResultTy->getNumElements() * 8;
1950
1951 // Bitcast from a 64-bit element type to a byte element type.
1952 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1953 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1954
1955 // We'll be shuffling in zeroes.
1956 Value *Res = Constant::getNullValue(Ty: VecTy);
1957
1958 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1959 // we'll just return the zero vector.
1960 if (Shift < 16) {
1961 int Idxs[64];
1962 // 256/512-bit version is split into 2/4 16-byte lanes.
1963 for (unsigned l = 0; l != NumElts; l += 16)
1964 for (unsigned i = 0; i != 16; ++i) {
1965 unsigned Idx = NumElts + i - Shift;
1966 if (Idx < NumElts)
1967 Idx -= NumElts - 16; // end of lane, switch operand.
1968 Idxs[l + i] = Idx + l;
1969 }
1970
1971 Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts));
1972 }
1973
1974 // Bitcast back to a 64-bit element type.
1975 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1976}
1977
1978// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1979// to byte shuffles.
1980static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1981 unsigned Shift) {
1982 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1983 unsigned NumElts = ResultTy->getNumElements() * 8;
1984
1985 // Bitcast from a 64-bit element type to a byte element type.
1986 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1987 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1988
1989 // We'll be shuffling in zeroes.
1990 Value *Res = Constant::getNullValue(Ty: VecTy);
1991
1992 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1993 // we'll just return the zero vector.
1994 if (Shift < 16) {
1995 int Idxs[64];
1996 // 256/512-bit version is split into 2/4 16-byte lanes.
1997 for (unsigned l = 0; l != NumElts; l += 16)
1998 for (unsigned i = 0; i != 16; ++i) {
1999 unsigned Idx = i + Shift;
2000 if (Idx >= 16)
2001 Idx += NumElts - 16; // end of lane, switch operand.
2002 Idxs[l + i] = Idx + l;
2003 }
2004
2005 Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts));
2006 }
2007
2008 // Bitcast back to a 64-bit element type.
2009 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
2010}
2011
2012static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2013 unsigned NumElts) {
2014 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2015 llvm::VectorType *MaskTy = FixedVectorType::get(
2016 ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
2017 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2018
2019 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2020 // i8 and we need to extract down to the right number of elements.
2021 if (NumElts <= 4) {
2022 int Indices[4];
2023 for (unsigned i = 0; i != NumElts; ++i)
2024 Indices[i] = i;
2025 Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts),
2026 Name: "extract");
2027 }
2028
2029 return Mask;
2030}
2031
2032static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2033 Value *Op1) {
2034 // If the mask is all ones just emit the first operation.
2035 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2036 if (C->isAllOnesValue())
2037 return Op0;
2038
2039 Mask = getX86MaskVec(Builder, Mask,
2040 NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements());
2041 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2042}
2043
2044static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2045 Value *Op1) {
2046 // If the mask is all ones just emit the first operation.
2047 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2048 if (C->isAllOnesValue())
2049 return Op0;
2050
2051 auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(),
2052 NumElts: Mask->getType()->getIntegerBitWidth());
2053 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2054 Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
2055 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2056}
2057
2058// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2059// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2060// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2061static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
2062 Value *Op1, Value *Shift,
2063 Value *Passthru, Value *Mask,
2064 bool IsVALIGN) {
2065 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue();
2066
2067 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2068 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2069 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2070 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2071
2072 // Mask the immediate for VALIGN.
2073 if (IsVALIGN)
2074 ShiftVal &= (NumElts - 1);
2075
2076 // If palignr is shifting the pair of vectors more than the size of two
2077 // lanes, emit zero.
2078 if (ShiftVal >= 32)
2079 return llvm::Constant::getNullValue(Ty: Op0->getType());
2080
2081 // If palignr is shifting the pair of input vectors more than one lane,
2082 // but less than two lanes, convert to shifting in zeroes.
2083 if (ShiftVal > 16) {
2084 ShiftVal -= 16;
2085 Op1 = Op0;
2086 Op0 = llvm::Constant::getNullValue(Ty: Op0->getType());
2087 }
2088
2089 int Indices[64];
2090 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2091 for (unsigned l = 0; l < NumElts; l += 16) {
2092 for (unsigned i = 0; i != 16; ++i) {
2093 unsigned Idx = ShiftVal + i;
2094 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2095 Idx += NumElts - 16; // End of lane, switch operand.
2096 Indices[l + i] = Idx + l;
2097 }
2098 }
2099
2100 Value *Align = Builder.CreateShuffleVector(
2101 V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr");
2102
2103 return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru);
2104}
2105
2106static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
2107 bool ZeroMask, bool IndexForm) {
2108 Type *Ty = CI.getType();
2109 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2110 unsigned EltWidth = Ty->getScalarSizeInBits();
2111 bool IsFloat = Ty->isFPOrFPVectorTy();
2112 Intrinsic::ID IID;
2113 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2115 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2116 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2117 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2118 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2119 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2120 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2121 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2123 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2125 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2127 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2129 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2131 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2133 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2135 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2137 else if (VecWidth == 128 && EltWidth == 16)
2138 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2139 else if (VecWidth == 256 && EltWidth == 16)
2140 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2141 else if (VecWidth == 512 && EltWidth == 16)
2142 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2143 else if (VecWidth == 128 && EltWidth == 8)
2144 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2145 else if (VecWidth == 256 && EltWidth == 8)
2146 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2147 else if (VecWidth == 512 && EltWidth == 8)
2148 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2149 else
2150 llvm_unreachable("Unexpected intrinsic");
2151
2152 Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1),
2153 CI.getArgOperand(i: 2) };
2154
2155 // If this isn't index form we need to swap operand 0 and 1.
2156 if (!IndexForm)
2157 std::swap(a&: Args[0], b&: Args[1]);
2158
2159 Value *V = Builder.CreateIntrinsic(ID: IID, Args);
2160 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2161 : Builder.CreateBitCast(V: CI.getArgOperand(i: 1),
2162 DestTy: Ty);
2163 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru);
2164}
2165
2166static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
2167 Intrinsic::ID IID) {
2168 Type *Ty = CI.getType();
2169 Value *Op0 = CI.getOperand(i_nocapture: 0);
2170 Value *Op1 = CI.getOperand(i_nocapture: 1);
2171 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1});
2172
2173 if (CI.arg_size() == 4) { // For masked intrinsics.
2174 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2175 Value *Mask = CI.getOperand(i_nocapture: 3);
2176 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2177 }
2178 return Res;
2179}
2180
2181static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
2182 bool IsRotateRight) {
2183 Type *Ty = CI.getType();
2184 Value *Src = CI.getArgOperand(i: 0);
2185 Value *Amt = CI.getArgOperand(i: 1);
2186
2187 // Amount may be scalar immediate, in which case create a splat vector.
2188 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2189 // we only care about the lowest log2 bits anyway.
2190 if (Amt->getType() != Ty) {
2191 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2192 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2193 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2194 }
2195
2196 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2197 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Src, Src, Amt});
2198
2199 if (CI.arg_size() == 4) { // For masked intrinsics.
2200 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2201 Value *Mask = CI.getOperand(i_nocapture: 3);
2202 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2203 }
2204 return Res;
2205}
2206
2207static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2208 bool IsSigned) {
2209 Type *Ty = CI.getType();
2210 Value *LHS = CI.getArgOperand(i: 0);
2211 Value *RHS = CI.getArgOperand(i: 1);
2212
2213 CmpInst::Predicate Pred;
2214 switch (Imm) {
2215 case 0x0:
2216 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2217 break;
2218 case 0x1:
2219 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2220 break;
2221 case 0x2:
2222 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2223 break;
2224 case 0x3:
2225 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2226 break;
2227 case 0x4:
2228 Pred = ICmpInst::ICMP_EQ;
2229 break;
2230 case 0x5:
2231 Pred = ICmpInst::ICMP_NE;
2232 break;
2233 case 0x6:
2234 return Constant::getNullValue(Ty); // FALSE
2235 case 0x7:
2236 return Constant::getAllOnesValue(Ty); // TRUE
2237 default:
2238 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2239 }
2240
2241 Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS);
2242 Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty);
2243 return Ext;
2244}
2245
2246static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
2247 bool IsShiftRight, bool ZeroMask) {
2248 Type *Ty = CI.getType();
2249 Value *Op0 = CI.getArgOperand(i: 0);
2250 Value *Op1 = CI.getArgOperand(i: 1);
2251 Value *Amt = CI.getArgOperand(i: 2);
2252
2253 if (IsShiftRight)
2254 std::swap(a&: Op0, b&: Op1);
2255
2256 // Amount may be scalar immediate, in which case create a splat vector.
2257 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2258 // we only care about the lowest log2 bits anyway.
2259 if (Amt->getType() != Ty) {
2260 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2261 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2262 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2263 }
2264
2265 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2266 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1, Amt});
2267
2268 unsigned NumArgs = CI.arg_size();
2269 if (NumArgs >= 4) { // For masked intrinsics.
2270 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) :
2271 ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) :
2272 CI.getArgOperand(i: 0);
2273 Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1);
2274 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2275 }
2276 return Res;
2277}
2278
2279static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2280 Value *Mask, bool Aligned) {
2281 const Align Alignment =
2282 Aligned
2283 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2284 : Align(1);
2285
2286 // If the mask is all ones just emit a regular store.
2287 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2288 if (C->isAllOnesValue())
2289 return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment);
2290
2291 // Convert the mask from an integer type to a vector of i1.
2292 unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements();
2293 Mask = getX86MaskVec(Builder, Mask, NumElts);
2294 return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask);
2295}
2296
2297static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2298 Value *Passthru, Value *Mask, bool Aligned) {
2299 Type *ValTy = Passthru->getType();
2300 const Align Alignment =
2301 Aligned
2302 ? Align(
2303 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2304 8)
2305 : Align(1);
2306
2307 // If the mask is all ones just emit a regular store.
2308 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2309 if (C->isAllOnesValue())
2310 return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment);
2311
2312 // Convert the mask from an integer type to a vector of i1.
2313 unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements();
2314 Mask = getX86MaskVec(Builder, Mask, NumElts);
2315 return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru);
2316}
2317
2318static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2319 Type *Ty = CI.getType();
2320 Value *Op0 = CI.getArgOperand(i: 0);
2321 Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::abs, Types: Ty,
2322 Args: {Op0, Builder.getInt1(V: false)});
2323 if (CI.arg_size() == 3)
2324 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1));
2325 return Res;
2326}
2327
2328static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2329 Type *Ty = CI.getType();
2330
2331 // Arguments have a vXi32 type so cast to vXi64.
2332 Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty);
2333 Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty);
2334
2335 if (IsSigned) {
2336 // Shift left then arithmetic shift right.
2337 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
2338 LHS = Builder.CreateShl(LHS, RHS: ShiftAmt);
2339 LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt);
2340 RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
2341 RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
2342 } else {
2343 // Clear the upper bits.
2344 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
2345 LHS = Builder.CreateAnd(LHS, RHS: Mask);
2346 RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask);
2347 }
2348
2349 Value *Res = Builder.CreateMul(LHS, RHS);
2350
2351 if (CI.arg_size() == 4)
2352 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2));
2353
2354 return Res;
2355}
2356
2357// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2358static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2359 Value *Mask) {
2360 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
2361 if (Mask) {
2362 const auto *C = dyn_cast<Constant>(Val: Mask);
2363 if (!C || !C->isAllOnesValue())
2364 Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts));
2365 }
2366
2367 if (NumElts < 8) {
2368 int Indices[8];
2369 for (unsigned i = 0; i != NumElts; ++i)
2370 Indices[i] = i;
2371 for (unsigned i = NumElts; i != 8; ++i)
2372 Indices[i] = NumElts + i % NumElts;
2373 Vec = Builder.CreateShuffleVector(V1: Vec,
2374 V2: Constant::getNullValue(Ty: Vec->getType()),
2375 Mask: Indices);
2376 }
2377 return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U)));
2378}
2379
2380static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2381 unsigned CC, bool Signed) {
2382 Value *Op0 = CI.getArgOperand(i: 0);
2383 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2384
2385 Value *Cmp;
2386 if (CC == 3) {
2387 Cmp = Constant::getNullValue(
2388 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2389 } else if (CC == 7) {
2390 Cmp = Constant::getAllOnesValue(
2391 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2392 } else {
2393 ICmpInst::Predicate Pred;
2394 switch (CC) {
2395 default: llvm_unreachable("Unknown condition code");
2396 case 0: Pred = ICmpInst::ICMP_EQ; break;
2397 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2398 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2399 case 4: Pred = ICmpInst::ICMP_NE; break;
2400 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2401 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2402 }
2403 Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1));
2404 }
2405
2406 Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1);
2407
2408 return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask);
2409}
2410
2411// Replace a masked intrinsic with an older unmasked intrinsic.
2412static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2413 Intrinsic::ID IID) {
2414 Value *Rep =
2415 Builder.CreateIntrinsic(ID: IID, Args: {CI.getArgOperand(i: 0), CI.getArgOperand(i: 1)});
2416 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2));
2417}
2418
2419static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2420 Value* A = CI.getArgOperand(i: 0);
2421 Value* B = CI.getArgOperand(i: 1);
2422 Value* Src = CI.getArgOperand(i: 2);
2423 Value* Mask = CI.getArgOperand(i: 3);
2424
2425 Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1));
2426 Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode);
2427 Value* Extract1 = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
2428 Value* Extract2 = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0);
2429 Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2);
2430 return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0);
2431}
2432
2433static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2434 Value* Op = CI.getArgOperand(i: 0);
2435 Type* ReturnOp = CI.getType();
2436 unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements();
2437 Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts);
2438 return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2");
2439}
2440
2441// Replace intrinsic with unmasked version and a select.
2442static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2443 CallBase &CI, Value *&Rep) {
2444 Name = Name.substr(Start: 12); // Remove avx512.mask.
2445
2446 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2447 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2448 Intrinsic::ID IID;
2449 if (Name.starts_with(Prefix: "max.p")) {
2450 if (VecWidth == 128 && EltWidth == 32)
2451 IID = Intrinsic::x86_sse_max_ps;
2452 else if (VecWidth == 128 && EltWidth == 64)
2453 IID = Intrinsic::x86_sse2_max_pd;
2454 else if (VecWidth == 256 && EltWidth == 32)
2455 IID = Intrinsic::x86_avx_max_ps_256;
2456 else if (VecWidth == 256 && EltWidth == 64)
2457 IID = Intrinsic::x86_avx_max_pd_256;
2458 else
2459 llvm_unreachable("Unexpected intrinsic");
2460 } else if (Name.starts_with(Prefix: "min.p")) {
2461 if (VecWidth == 128 && EltWidth == 32)
2462 IID = Intrinsic::x86_sse_min_ps;
2463 else if (VecWidth == 128 && EltWidth == 64)
2464 IID = Intrinsic::x86_sse2_min_pd;
2465 else if (VecWidth == 256 && EltWidth == 32)
2466 IID = Intrinsic::x86_avx_min_ps_256;
2467 else if (VecWidth == 256 && EltWidth == 64)
2468 IID = Intrinsic::x86_avx_min_pd_256;
2469 else
2470 llvm_unreachable("Unexpected intrinsic");
2471 } else if (Name.starts_with(Prefix: "pshuf.b.")) {
2472 if (VecWidth == 128)
2473 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2474 else if (VecWidth == 256)
2475 IID = Intrinsic::x86_avx2_pshuf_b;
2476 else if (VecWidth == 512)
2477 IID = Intrinsic::x86_avx512_pshuf_b_512;
2478 else
2479 llvm_unreachable("Unexpected intrinsic");
2480 } else if (Name.starts_with(Prefix: "pmul.hr.sw.")) {
2481 if (VecWidth == 128)
2482 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2483 else if (VecWidth == 256)
2484 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2485 else if (VecWidth == 512)
2486 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2487 else
2488 llvm_unreachable("Unexpected intrinsic");
2489 } else if (Name.starts_with(Prefix: "pmulh.w.")) {
2490 if (VecWidth == 128)
2491 IID = Intrinsic::x86_sse2_pmulh_w;
2492 else if (VecWidth == 256)
2493 IID = Intrinsic::x86_avx2_pmulh_w;
2494 else if (VecWidth == 512)
2495 IID = Intrinsic::x86_avx512_pmulh_w_512;
2496 else
2497 llvm_unreachable("Unexpected intrinsic");
2498 } else if (Name.starts_with(Prefix: "pmulhu.w.")) {
2499 if (VecWidth == 128)
2500 IID = Intrinsic::x86_sse2_pmulhu_w;
2501 else if (VecWidth == 256)
2502 IID = Intrinsic::x86_avx2_pmulhu_w;
2503 else if (VecWidth == 512)
2504 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2505 else
2506 llvm_unreachable("Unexpected intrinsic");
2507 } else if (Name.starts_with(Prefix: "pmaddw.d.")) {
2508 if (VecWidth == 128)
2509 IID = Intrinsic::x86_sse2_pmadd_wd;
2510 else if (VecWidth == 256)
2511 IID = Intrinsic::x86_avx2_pmadd_wd;
2512 else if (VecWidth == 512)
2513 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2514 else
2515 llvm_unreachable("Unexpected intrinsic");
2516 } else if (Name.starts_with(Prefix: "pmaddubs.w.")) {
2517 if (VecWidth == 128)
2518 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2519 else if (VecWidth == 256)
2520 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2521 else if (VecWidth == 512)
2522 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2523 else
2524 llvm_unreachable("Unexpected intrinsic");
2525 } else if (Name.starts_with(Prefix: "packsswb.")) {
2526 if (VecWidth == 128)
2527 IID = Intrinsic::x86_sse2_packsswb_128;
2528 else if (VecWidth == 256)
2529 IID = Intrinsic::x86_avx2_packsswb;
2530 else if (VecWidth == 512)
2531 IID = Intrinsic::x86_avx512_packsswb_512;
2532 else
2533 llvm_unreachable("Unexpected intrinsic");
2534 } else if (Name.starts_with(Prefix: "packssdw.")) {
2535 if (VecWidth == 128)
2536 IID = Intrinsic::x86_sse2_packssdw_128;
2537 else if (VecWidth == 256)
2538 IID = Intrinsic::x86_avx2_packssdw;
2539 else if (VecWidth == 512)
2540 IID = Intrinsic::x86_avx512_packssdw_512;
2541 else
2542 llvm_unreachable("Unexpected intrinsic");
2543 } else if (Name.starts_with(Prefix: "packuswb.")) {
2544 if (VecWidth == 128)
2545 IID = Intrinsic::x86_sse2_packuswb_128;
2546 else if (VecWidth == 256)
2547 IID = Intrinsic::x86_avx2_packuswb;
2548 else if (VecWidth == 512)
2549 IID = Intrinsic::x86_avx512_packuswb_512;
2550 else
2551 llvm_unreachable("Unexpected intrinsic");
2552 } else if (Name.starts_with(Prefix: "packusdw.")) {
2553 if (VecWidth == 128)
2554 IID = Intrinsic::x86_sse41_packusdw;
2555 else if (VecWidth == 256)
2556 IID = Intrinsic::x86_avx2_packusdw;
2557 else if (VecWidth == 512)
2558 IID = Intrinsic::x86_avx512_packusdw_512;
2559 else
2560 llvm_unreachable("Unexpected intrinsic");
2561 } else if (Name.starts_with(Prefix: "vpermilvar.")) {
2562 if (VecWidth == 128 && EltWidth == 32)
2563 IID = Intrinsic::x86_avx_vpermilvar_ps;
2564 else if (VecWidth == 128 && EltWidth == 64)
2565 IID = Intrinsic::x86_avx_vpermilvar_pd;
2566 else if (VecWidth == 256 && EltWidth == 32)
2567 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2568 else if (VecWidth == 256 && EltWidth == 64)
2569 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2570 else if (VecWidth == 512 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2572 else if (VecWidth == 512 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2574 else
2575 llvm_unreachable("Unexpected intrinsic");
2576 } else if (Name == "cvtpd2dq.256") {
2577 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2578 } else if (Name == "cvtpd2ps.256") {
2579 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2580 } else if (Name == "cvttpd2dq.256") {
2581 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2582 } else if (Name == "cvttps2dq.128") {
2583 IID = Intrinsic::x86_sse2_cvttps2dq;
2584 } else if (Name == "cvttps2dq.256") {
2585 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2586 } else if (Name.starts_with(Prefix: "permvar.")) {
2587 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2588 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2589 IID = Intrinsic::x86_avx2_permps;
2590 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2591 IID = Intrinsic::x86_avx2_permd;
2592 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2593 IID = Intrinsic::x86_avx512_permvar_df_256;
2594 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2595 IID = Intrinsic::x86_avx512_permvar_di_256;
2596 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx512_permvar_sf_512;
2598 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx512_permvar_si_512;
2600 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_512;
2602 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_512;
2604 else if (VecWidth == 128 && EltWidth == 16)
2605 IID = Intrinsic::x86_avx512_permvar_hi_128;
2606 else if (VecWidth == 256 && EltWidth == 16)
2607 IID = Intrinsic::x86_avx512_permvar_hi_256;
2608 else if (VecWidth == 512 && EltWidth == 16)
2609 IID = Intrinsic::x86_avx512_permvar_hi_512;
2610 else if (VecWidth == 128 && EltWidth == 8)
2611 IID = Intrinsic::x86_avx512_permvar_qi_128;
2612 else if (VecWidth == 256 && EltWidth == 8)
2613 IID = Intrinsic::x86_avx512_permvar_qi_256;
2614 else if (VecWidth == 512 && EltWidth == 8)
2615 IID = Intrinsic::x86_avx512_permvar_qi_512;
2616 else
2617 llvm_unreachable("Unexpected intrinsic");
2618 } else if (Name.starts_with(Prefix: "dbpsadbw.")) {
2619 if (VecWidth == 128)
2620 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2621 else if (VecWidth == 256)
2622 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2623 else if (VecWidth == 512)
2624 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2625 else
2626 llvm_unreachable("Unexpected intrinsic");
2627 } else if (Name.starts_with(Prefix: "pmultishift.qb.")) {
2628 if (VecWidth == 128)
2629 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2630 else if (VecWidth == 256)
2631 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2632 else if (VecWidth == 512)
2633 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2634 else
2635 llvm_unreachable("Unexpected intrinsic");
2636 } else if (Name.starts_with(Prefix: "conflict.")) {
2637 if (Name[9] == 'd' && VecWidth == 128)
2638 IID = Intrinsic::x86_avx512_conflict_d_128;
2639 else if (Name[9] == 'd' && VecWidth == 256)
2640 IID = Intrinsic::x86_avx512_conflict_d_256;
2641 else if (Name[9] == 'd' && VecWidth == 512)
2642 IID = Intrinsic::x86_avx512_conflict_d_512;
2643 else if (Name[9] == 'q' && VecWidth == 128)
2644 IID = Intrinsic::x86_avx512_conflict_q_128;
2645 else if (Name[9] == 'q' && VecWidth == 256)
2646 IID = Intrinsic::x86_avx512_conflict_q_256;
2647 else if (Name[9] == 'q' && VecWidth == 512)
2648 IID = Intrinsic::x86_avx512_conflict_q_512;
2649 else
2650 llvm_unreachable("Unexpected intrinsic");
2651 } else if (Name.starts_with(Prefix: "pavg.")) {
2652 if (Name[5] == 'b' && VecWidth == 128)
2653 IID = Intrinsic::x86_sse2_pavg_b;
2654 else if (Name[5] == 'b' && VecWidth == 256)
2655 IID = Intrinsic::x86_avx2_pavg_b;
2656 else if (Name[5] == 'b' && VecWidth == 512)
2657 IID = Intrinsic::x86_avx512_pavg_b_512;
2658 else if (Name[5] == 'w' && VecWidth == 128)
2659 IID = Intrinsic::x86_sse2_pavg_w;
2660 else if (Name[5] == 'w' && VecWidth == 256)
2661 IID = Intrinsic::x86_avx2_pavg_w;
2662 else if (Name[5] == 'w' && VecWidth == 512)
2663 IID = Intrinsic::x86_avx512_pavg_w_512;
2664 else
2665 llvm_unreachable("Unexpected intrinsic");
2666 } else
2667 return false;
2668
2669 SmallVector<Value *, 4> Args(CI.args());
2670 Args.pop_back();
2671 Args.pop_back();
2672 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2673 unsigned NumArgs = CI.arg_size();
2674 Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep,
2675 Op1: CI.getArgOperand(i: NumArgs - 2));
2676 return true;
2677}
2678
2679/// Upgrade comment in call to inline asm that represents an objc retain release
2680/// marker.
2681void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2682 size_t Pos;
2683 if (AsmStr->find(s: "mov\tfp") == 0 &&
2684 AsmStr->find(s: "objc_retainAutoreleaseReturnValue") != std::string::npos &&
2685 (Pos = AsmStr->find(s: "# marker")) != std::string::npos) {
2686 AsmStr->replace(pos: Pos, n1: 1, s: ";");
2687 }
2688}
2689
2690static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2691 Function *F, IRBuilder<> &Builder) {
2692 Value *Rep = nullptr;
2693
2694 if (Name == "abs.i" || Name == "abs.ll") {
2695 Value *Arg = CI->getArgOperand(i: 0);
2696 Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg");
2697 Value *Cmp = Builder.CreateICmpSGE(
2698 LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond");
2699 Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs");
2700 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2701 Type *Ty = (Name == "abs.bf16")
2702 ? Builder.getBFloatTy()
2703 : FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 2);
2704 Value *Arg = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Ty);
2705 Value *Abs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::nvvm_fabs, V: Arg);
2706 Rep = Builder.CreateBitCast(V: Abs, DestTy: CI->getType());
2707 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2708 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2709 : Intrinsic::nvvm_fabs;
2710 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2711 } else if (Name.consume_front(Prefix: "ex2.approx.")) {
2712 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2713 Intrinsic::ID IID = Name.starts_with(Prefix: "ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2714 : Intrinsic::nvvm_ex2_approx;
2715 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2716 } else if (Name.starts_with(Prefix: "atomic.load.add.f32.p") ||
2717 Name.starts_with(Prefix: "atomic.load.add.f64.p")) {
2718 Value *Ptr = CI->getArgOperand(i: 0);
2719 Value *Val = CI->getArgOperand(i: 1);
2720 Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(),
2721 Ordering: AtomicOrdering::Monotonic);
2722 } else if (Name.starts_with(Prefix: "atomic.load.inc.32.p") ||
2723 Name.starts_with(Prefix: "atomic.load.dec.32.p")) {
2724 Value *Ptr = CI->getArgOperand(i: 0);
2725 Value *Val = CI->getArgOperand(i: 1);
2726 auto Op = Name.starts_with(Prefix: "atomic.load.inc") ? AtomicRMWInst::UIncWrap
2727 : AtomicRMWInst::UDecWrap;
2728 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, Align: MaybeAlign(),
2729 Ordering: AtomicOrdering::Monotonic);
2730 } else if (Name == "clz.ll") {
2731 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2732 Value *Arg = CI->getArgOperand(i: 0);
2733 Value *Ctlz = Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Arg->getType()},
2734 Args: {Arg, Builder.getFalse()},
2735 /*FMFSource=*/nullptr, Name: "ctlz");
2736 Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc");
2737 } else if (Name == "popc.ll") {
2738 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2739 // i64.
2740 Value *Arg = CI->getArgOperand(i: 0);
2741 Value *Popc = Builder.CreateIntrinsic(ID: Intrinsic::ctpop, Types: {Arg->getType()},
2742 Args: Arg, /*FMFSource=*/nullptr, Name: "ctpop");
2743 Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc");
2744 } else if (Name == "h2f") {
2745 Value *Cast =
2746 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
2747 Rep = Builder.CreateFPExt(V: Cast, DestTy: Builder.getFloatTy());
2748 } else if (Name.consume_front(Prefix: "bitcast.") &&
2749 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2750 Name == "d2ll")) {
2751 Rep = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2752 } else if (Name == "rotate.b32") {
2753 Value *Arg = CI->getOperand(i_nocapture: 0);
2754 Value *ShiftAmt = CI->getOperand(i_nocapture: 1);
2755 Rep = Builder.CreateIntrinsic(RetTy: Builder.getInt32Ty(), ID: Intrinsic::fshl,
2756 Args: {Arg, Arg, ShiftAmt});
2757 } else if (Name == "rotate.b64") {
2758 Type *Int64Ty = Builder.getInt64Ty();
2759 Value *Arg = CI->getOperand(i_nocapture: 0);
2760 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2761 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2762 Args: {Arg, Arg, ZExtShiftAmt});
2763 } else if (Name == "rotate.right.b64") {
2764 Type *Int64Ty = Builder.getInt64Ty();
2765 Value *Arg = CI->getOperand(i_nocapture: 0);
2766 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2767 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshr,
2768 Args: {Arg, Arg, ZExtShiftAmt});
2769 } else if (Name == "swap.lo.hi.b64") {
2770 Type *Int64Ty = Builder.getInt64Ty();
2771 Value *Arg = CI->getOperand(i_nocapture: 0);
2772 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2773 Args: {Arg, Arg, Builder.getInt64(C: 32)});
2774 } else if ((Name.consume_front(Prefix: "ptr.gen.to.") &&
2775 consumeNVVMPtrAddrSpace(Name)) ||
2776 (Name.consume_front(Prefix: "ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2777 Name.starts_with(Prefix: ".to.gen"))) {
2778 Rep = Builder.CreateAddrSpaceCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2779 } else if (Name.consume_front(Prefix: "ldg.global")) {
2780 Value *Ptr = CI->getArgOperand(i: 0);
2781 Align PtrAlign = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getAlignValue();
2782 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2783 Value *ASC = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Builder.getPtrTy(AddrSpace: 1));
2784 Instruction *LD = Builder.CreateAlignedLoad(Ty: CI->getType(), Ptr: ASC, Align: PtrAlign);
2785 MDNode *MD = MDNode::get(Context&: Builder.getContext(), MDs: {});
2786 LD->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
2787 return LD;
2788 } else if (Name == "tanh.approx.f32") {
2789 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2790 FastMathFlags FMF;
2791 FMF.setApproxFunc();
2792 Rep = Builder.CreateUnaryIntrinsic(ID: Intrinsic::tanh, V: CI->getArgOperand(i: 0),
2793 FMFSource: FMF);
2794 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2795 Value *Arg =
2796 Name.ends_with(Suffix: '0') ? Builder.getInt32(C: 0) : CI->getArgOperand(i: 0);
2797 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2798 Types: {}, Args: {Arg});
2799 } else if (Name == "barrier") {
2800 Rep = Builder.CreateIntrinsic(
2801 ID: Intrinsic::nvvm_barrier_cta_sync_aligned_count, Types: {},
2802 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2803 } else if (Name == "barrier.sync") {
2804 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_all, Types: {},
2805 Args: {CI->getArgOperand(i: 0)});
2806 } else if (Name == "barrier.sync.cnt") {
2807 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_count, Types: {},
2808 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2809 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2810 Name == "barrier0.or") {
2811 Value *C = CI->getArgOperand(i: 0);
2812 C = Builder.CreateICmpNE(LHS: C, RHS: Builder.getInt32(C: 0));
2813
2814 Intrinsic::ID IID =
2815 StringSwitch<Intrinsic::ID>(Name)
2816 .Case(S: "barrier0.popc",
2817 Value: Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2818 .Case(S: "barrier0.and",
2819 Value: Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2820 .Case(S: "barrier0.or",
2821 Value: Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2822 Value *Bar = Builder.CreateIntrinsic(ID: IID, Types: {}, Args: {Builder.getInt32(C: 0), C});
2823 Rep = Builder.CreateZExt(V: Bar, DestTy: CI->getType());
2824 } else {
2825 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2826 if (IID != Intrinsic::not_intrinsic &&
2827 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2828 rename(GV: F);
2829 Function *NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
2830 SmallVector<Value *, 2> Args;
2831 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2832 Value *Arg = CI->getArgOperand(i: I);
2833 Type *OldType = Arg->getType();
2834 Type *NewType = NewFn->getArg(i: I)->getType();
2835 Args.push_back(
2836 Elt: (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2837 ? Builder.CreateBitCast(V: Arg, DestTy: NewType)
2838 : Arg);
2839 }
2840 Rep = Builder.CreateCall(Callee: NewFn, Args);
2841 if (F->getReturnType()->isIntegerTy())
2842 Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType());
2843 }
2844 }
2845
2846 return Rep;
2847}
2848
2849static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2850 IRBuilder<> &Builder) {
2851 LLVMContext &C = F->getContext();
2852 Value *Rep = nullptr;
2853
2854 if (Name.starts_with(Prefix: "sse4a.movnt.")) {
2855 SmallVector<Metadata *, 1> Elts;
2856 Elts.push_back(
2857 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2858 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2859
2860 Value *Arg0 = CI->getArgOperand(i: 0);
2861 Value *Arg1 = CI->getArgOperand(i: 1);
2862
2863 // Nontemporal (unaligned) store of the 0'th element of the float/double
2864 // vector.
2865 Value *Extract =
2866 Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement");
2867
2868 StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Arg0, Align: Align(1));
2869 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2870 } else if (Name.starts_with(Prefix: "avx.movnt.") ||
2871 Name.starts_with(Prefix: "avx512.storent.")) {
2872 SmallVector<Metadata *, 1> Elts;
2873 Elts.push_back(
2874 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2875 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2876
2877 Value *Arg0 = CI->getArgOperand(i: 0);
2878 Value *Arg1 = CI->getArgOperand(i: 1);
2879
2880 StoreInst *SI = Builder.CreateAlignedStore(
2881 Val: Arg1, Ptr: Arg0,
2882 Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2883 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2884 } else if (Name == "sse2.storel.dq") {
2885 Value *Arg0 = CI->getArgOperand(i: 0);
2886 Value *Arg1 = CI->getArgOperand(i: 1);
2887
2888 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
2889 Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
2890 Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0);
2891 Builder.CreateAlignedStore(Val: Elt, Ptr: Arg0, Align: Align(1));
2892 } else if (Name.starts_with(Prefix: "sse.storeu.") ||
2893 Name.starts_with(Prefix: "sse2.storeu.") ||
2894 Name.starts_with(Prefix: "avx.storeu.")) {
2895 Value *Arg0 = CI->getArgOperand(i: 0);
2896 Value *Arg1 = CI->getArgOperand(i: 1);
2897 Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1));
2898 } else if (Name == "avx512.mask.store.ss") {
2899 Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1));
2900 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2901 Mask, Aligned: false);
2902 } else if (Name.starts_with(Prefix: "avx512.mask.store")) {
2903 // "avx512.mask.storeu." or "avx512.mask.store."
2904 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2905 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2906 Mask: CI->getArgOperand(i: 2), Aligned);
2907 } else if (Name.starts_with(Prefix: "sse2.pcmp") || Name.starts_with(Prefix: "avx2.pcmp")) {
2908 // Upgrade packed integer vector compare intrinsics to compare instructions.
2909 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2910 bool CmpEq = Name[9] == 'e';
2911 Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2912 LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
2913 Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "");
2914 } else if (Name.starts_with(Prefix: "avx512.broadcastm")) {
2915 Type *ExtTy = Type::getInt32Ty(C);
2916 if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8))
2917 ExtTy = Type::getInt64Ty(C);
2918 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2919 ExtTy->getPrimitiveSizeInBits();
2920 Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy);
2921 Rep = Builder.CreateVectorSplat(NumElts, V: Rep);
2922 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2923 Value *Vec = CI->getArgOperand(i: 0);
2924 Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0);
2925 Elt0 = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: Elt0->getType(), Args: Elt0);
2926 Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0);
2927 } else if (Name.starts_with(Prefix: "avx.sqrt.p") ||
2928 Name.starts_with(Prefix: "sse2.sqrt.p") ||
2929 Name.starts_with(Prefix: "sse.sqrt.p")) {
2930 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2931 Args: {CI->getArgOperand(i: 0)});
2932 } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p")) {
2933 if (CI->arg_size() == 4 &&
2934 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2935 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2936 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2937 : Intrinsic::x86_avx512_sqrt_pd_512;
2938
2939 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)};
2940 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2941 } else {
2942 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2943 Args: {CI->getArgOperand(i: 0)});
2944 }
2945 Rep =
2946 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2947 } else if (Name.starts_with(Prefix: "avx512.ptestm") ||
2948 Name.starts_with(Prefix: "avx512.ptestnm")) {
2949 Value *Op0 = CI->getArgOperand(i: 0);
2950 Value *Op1 = CI->getArgOperand(i: 1);
2951 Value *Mask = CI->getArgOperand(i: 2);
2952 Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1);
2953 llvm::Type *Ty = Op0->getType();
2954 Value *Zero = llvm::Constant::getNullValue(Ty);
2955 ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm")
2956 ? ICmpInst::ICMP_NE
2957 : ICmpInst::ICMP_EQ;
2958 Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero);
2959 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask);
2960 } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast")) {
2961 unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType())
2962 ->getNumElements();
2963 Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0));
2964 Rep =
2965 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2966 } else if (Name.starts_with(Prefix: "avx512.kunpck")) {
2967 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2968 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts);
2969 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts);
2970 int Indices[64];
2971 for (unsigned i = 0; i != NumElts; ++i)
2972 Indices[i] = i;
2973
2974 // First extract half of each vector. This gives better codegen than
2975 // doing it in a single shuffle.
2976 LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
2977 RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
2978 // Concat the vectors.
2979 // NOTE: Operands have to be swapped to match intrinsic definition.
2980 Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
2981 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2982 } else if (Name == "avx512.kand.w") {
2983 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2984 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2985 Rep = Builder.CreateAnd(LHS, RHS);
2986 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2987 } else if (Name == "avx512.kandn.w") {
2988 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2989 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2990 LHS = Builder.CreateNot(V: LHS);
2991 Rep = Builder.CreateAnd(LHS, RHS);
2992 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2993 } else if (Name == "avx512.kor.w") {
2994 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2995 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2996 Rep = Builder.CreateOr(LHS, RHS);
2997 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2998 } else if (Name == "avx512.kxor.w") {
2999 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3000 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3001 Rep = Builder.CreateXor(LHS, RHS);
3002 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3003 } else if (Name == "avx512.kxnor.w") {
3004 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3005 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3006 LHS = Builder.CreateNot(V: LHS);
3007 Rep = Builder.CreateXor(LHS, RHS);
3008 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3009 } else if (Name == "avx512.knot.w") {
3010 Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3011 Rep = Builder.CreateNot(V: Rep);
3012 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3013 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3014 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3015 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3016 Rep = Builder.CreateOr(LHS, RHS);
3017 Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty());
3018 Value *C;
3019 if (Name[14] == 'c')
3020 C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty());
3021 else
3022 C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty());
3023 Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C);
3024 Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty());
3025 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3026 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3027 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3028 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3029 Type *I32Ty = Type::getInt32Ty(C);
3030 Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0),
3031 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3032 Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1),
3033 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3034 Value *EltOp;
3035 if (Name.contains(Other: ".add."))
3036 EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1);
3037 else if (Name.contains(Other: ".sub."))
3038 EltOp = Builder.CreateFSub(L: Elt0, R: Elt1);
3039 else if (Name.contains(Other: ".mul."))
3040 EltOp = Builder.CreateFMul(L: Elt0, R: Elt1);
3041 else
3042 EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1);
3043 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp,
3044 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3045 } else if (Name.starts_with(Prefix: "avx512.mask.pcmp")) {
3046 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3047 bool CmpEq = Name[16] == 'e';
3048 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true);
3049 } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb.")) {
3050 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3051 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3052 Intrinsic::ID IID;
3053 switch (VecWidth) {
3054 default:
3055 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3056 break;
3057 case 128:
3058 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3059 break;
3060 case 256:
3061 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3062 break;
3063 case 512:
3064 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3065 break;
3066 }
3067
3068 Rep =
3069 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3070 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3071 } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p")) {
3072 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3073 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3074 unsigned EltWidth = OpTy->getScalarSizeInBits();
3075 Intrinsic::ID IID;
3076 if (VecWidth == 128 && EltWidth == 32)
3077 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3078 else if (VecWidth == 256 && EltWidth == 32)
3079 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3080 else if (VecWidth == 512 && EltWidth == 32)
3081 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3082 else if (VecWidth == 128 && EltWidth == 64)
3083 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3084 else if (VecWidth == 256 && EltWidth == 64)
3085 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3086 else if (VecWidth == 512 && EltWidth == 64)
3087 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3088 else
3089 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3090
3091 Rep =
3092 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3093 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3094 } else if (Name.starts_with(Prefix: "avx512.cmp.p")) {
3095 SmallVector<Value *, 4> Args(CI->args());
3096 Type *OpTy = Args[0]->getType();
3097 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3098 unsigned EltWidth = OpTy->getScalarSizeInBits();
3099 Intrinsic::ID IID;
3100 if (VecWidth == 128 && EltWidth == 32)
3101 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3102 else if (VecWidth == 256 && EltWidth == 32)
3103 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3104 else if (VecWidth == 512 && EltWidth == 32)
3105 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3106 else if (VecWidth == 128 && EltWidth == 64)
3107 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3108 else if (VecWidth == 256 && EltWidth == 64)
3109 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3110 else if (VecWidth == 512 && EltWidth == 64)
3111 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3112 else
3113 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3114
3115 Value *Mask = Constant::getAllOnesValue(Ty: CI->getType());
3116 if (VecWidth == 512)
3117 std::swap(a&: Mask, b&: Args.back());
3118 Args.push_back(Elt: Mask);
3119
3120 Rep = Builder.CreateIntrinsic(ID: IID, Args);
3121 } else if (Name.starts_with(Prefix: "avx512.mask.cmp.")) {
3122 // Integer compare intrinsics.
3123 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3124 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true);
3125 } else if (Name.starts_with(Prefix: "avx512.mask.ucmp.")) {
3126 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3127 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false);
3128 } else if (Name.starts_with(Prefix: "avx512.cvtb2mask.") ||
3129 Name.starts_with(Prefix: "avx512.cvtw2mask.") ||
3130 Name.starts_with(Prefix: "avx512.cvtd2mask.") ||
3131 Name.starts_with(Prefix: "avx512.cvtq2mask.")) {
3132 Value *Op = CI->getArgOperand(i: 0);
3133 Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType());
3134 Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero);
3135 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr);
3136 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3137 Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs") ||
3138 Name.starts_with(Prefix: "avx512.mask.pabs")) {
3139 Rep = upgradeAbs(Builder, CI&: *CI);
3140 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3141 Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs") ||
3142 Name.starts_with(Prefix: "avx512.mask.pmaxs")) {
3143 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax);
3144 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3145 Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu") ||
3146 Name.starts_with(Prefix: "avx512.mask.pmaxu")) {
3147 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax);
3148 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3149 Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins") ||
3150 Name.starts_with(Prefix: "avx512.mask.pmins")) {
3151 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin);
3152 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3153 Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu") ||
3154 Name.starts_with(Prefix: "avx512.mask.pminu")) {
3155 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin);
3156 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3157 Name == "avx512.pmulu.dq.512" ||
3158 Name.starts_with(Prefix: "avx512.mask.pmulu.dq.")) {
3159 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false);
3160 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3161 Name == "avx512.pmul.dq.512" ||
3162 Name.starts_with(Prefix: "avx512.mask.pmul.dq.")) {
3163 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true);
3164 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3165 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3166 Rep =
3167 Builder.CreateSIToFP(V: CI->getArgOperand(i: 1),
3168 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3169 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3170 } else if (Name == "avx512.cvtusi2sd") {
3171 Rep =
3172 Builder.CreateUIToFP(V: CI->getArgOperand(i: 1),
3173 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3174 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3175 } else if (Name == "sse2.cvtss2sd") {
3176 Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0);
3177 Rep = Builder.CreateFPExt(
3178 V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3179 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3180 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3181 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3182 Name.starts_with(Prefix: "avx512.mask.cvtdq2pd.") ||
3183 Name.starts_with(Prefix: "avx512.mask.cvtudq2pd.") ||
3184 Name.starts_with(Prefix: "avx512.mask.cvtdq2ps.") ||
3185 Name.starts_with(Prefix: "avx512.mask.cvtudq2ps.") ||
3186 Name.starts_with(Prefix: "avx512.mask.cvtqq2pd.") ||
3187 Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd.") ||
3188 Name == "avx512.mask.cvtqq2ps.256" ||
3189 Name == "avx512.mask.cvtqq2ps.512" ||
3190 Name == "avx512.mask.cvtuqq2ps.256" ||
3191 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3192 Name == "avx.cvt.ps2.pd.256" ||
3193 Name == "avx512.mask.cvtps2pd.128" ||
3194 Name == "avx512.mask.cvtps2pd.256") {
3195 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3196 Rep = CI->getArgOperand(i: 0);
3197 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3198
3199 unsigned NumDstElts = DstTy->getNumElements();
3200 if (NumDstElts < SrcTy->getNumElements()) {
3201 assert(NumDstElts == 2 && "Unexpected vector size");
3202 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1});
3203 }
3204
3205 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3206 bool IsUnsigned = Name.contains(Other: "cvtu");
3207 if (IsPS2PD)
3208 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd");
3209 else if (CI->arg_size() == 4 &&
3210 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
3211 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
3212 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3213 : Intrinsic::x86_avx512_sitofp_round;
3214 Rep = Builder.CreateIntrinsic(ID: IID, Types: {DstTy, SrcTy},
3215 Args: {Rep, CI->getArgOperand(i: 3)});
3216 } else {
3217 Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt")
3218 : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt");
3219 }
3220
3221 if (CI->arg_size() >= 3)
3222 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3223 Op1: CI->getArgOperand(i: 1));
3224 } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps.") ||
3225 Name.starts_with(Prefix: "vcvtph2ps.")) {
3226 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3227 Rep = CI->getArgOperand(i: 0);
3228 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3229 unsigned NumDstElts = DstTy->getNumElements();
3230 if (NumDstElts != SrcTy->getNumElements()) {
3231 assert(NumDstElts == 4 && "Unexpected vector size");
3232 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3});
3233 }
3234 Rep = Builder.CreateBitCast(
3235 V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts));
3236 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps");
3237 if (CI->arg_size() >= 3)
3238 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3239 Op1: CI->getArgOperand(i: 1));
3240 } else if (Name.starts_with(Prefix: "avx512.mask.load")) {
3241 // "avx512.mask.loadu." or "avx512.mask.load."
3242 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3243 Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1),
3244 Mask: CI->getArgOperand(i: 2), Aligned);
3245 } else if (Name.starts_with(Prefix: "avx512.mask.expand.load.")) {
3246 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3247 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3248 NumElts: ResultTy->getNumElements());
3249
3250 Rep = Builder.CreateIntrinsic(
3251 ID: Intrinsic::masked_expandload, Types: ResultTy,
3252 Args: {CI->getOperand(i_nocapture: 0), MaskVec, CI->getOperand(i_nocapture: 1)});
3253 } else if (Name.starts_with(Prefix: "avx512.mask.compress.store.")) {
3254 auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType());
3255 Value *MaskVec =
3256 getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3257 NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
3258
3259 Rep = Builder.CreateIntrinsic(
3260 ID: Intrinsic::masked_compressstore, Types: ResultTy,
3261 Args: {CI->getArgOperand(i: 1), CI->getArgOperand(i: 0), MaskVec});
3262 } else if (Name.starts_with(Prefix: "avx512.mask.compress.") ||
3263 Name.starts_with(Prefix: "avx512.mask.expand.")) {
3264 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3265
3266 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3267 NumElts: ResultTy->getNumElements());
3268
3269 bool IsCompress = Name[12] == 'c';
3270 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3271 : Intrinsic::x86_avx512_mask_expand;
3272 Rep = Builder.CreateIntrinsic(
3273 ID: IID, Types: ResultTy, Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec});
3274 } else if (Name.starts_with(Prefix: "xop.vpcom")) {
3275 bool IsSigned;
3276 if (Name.ends_with(Suffix: "ub") || Name.ends_with(Suffix: "uw") || Name.ends_with(Suffix: "ud") ||
3277 Name.ends_with(Suffix: "uq"))
3278 IsSigned = false;
3279 else if (Name.ends_with(Suffix: "b") || Name.ends_with(Suffix: "w") ||
3280 Name.ends_with(Suffix: "d") || Name.ends_with(Suffix: "q"))
3281 IsSigned = true;
3282 else
3283 reportFatalUsageErrorWithCI(reason: "Intrinsic has unknown suffix", CI);
3284
3285 unsigned Imm;
3286 if (CI->arg_size() == 3) {
3287 Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3288 } else {
3289 Name = Name.substr(Start: 9); // strip off "xop.vpcom"
3290 if (Name.starts_with(Prefix: "lt"))
3291 Imm = 0;
3292 else if (Name.starts_with(Prefix: "le"))
3293 Imm = 1;
3294 else if (Name.starts_with(Prefix: "gt"))
3295 Imm = 2;
3296 else if (Name.starts_with(Prefix: "ge"))
3297 Imm = 3;
3298 else if (Name.starts_with(Prefix: "eq"))
3299 Imm = 4;
3300 else if (Name.starts_with(Prefix: "ne"))
3301 Imm = 5;
3302 else if (Name.starts_with(Prefix: "false"))
3303 Imm = 6;
3304 else if (Name.starts_with(Prefix: "true"))
3305 Imm = 7;
3306 else
3307 llvm_unreachable("Unknown condition");
3308 }
3309
3310 Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned);
3311 } else if (Name.starts_with(Prefix: "xop.vpcmov")) {
3312 Value *Sel = CI->getArgOperand(i: 2);
3313 Value *NotSel = Builder.CreateNot(V: Sel);
3314 Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel);
3315 Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel);
3316 Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1);
3317 } else if (Name.starts_with(Prefix: "xop.vprot") || Name.starts_with(Prefix: "avx512.prol") ||
3318 Name.starts_with(Prefix: "avx512.mask.prol")) {
3319 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false);
3320 } else if (Name.starts_with(Prefix: "avx512.pror") ||
3321 Name.starts_with(Prefix: "avx512.mask.pror")) {
3322 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true);
3323 } else if (Name.starts_with(Prefix: "avx512.vpshld.") ||
3324 Name.starts_with(Prefix: "avx512.mask.vpshld") ||
3325 Name.starts_with(Prefix: "avx512.maskz.vpshld")) {
3326 bool ZeroMask = Name[11] == 'z';
3327 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask);
3328 } else if (Name.starts_with(Prefix: "avx512.vpshrd.") ||
3329 Name.starts_with(Prefix: "avx512.mask.vpshrd") ||
3330 Name.starts_with(Prefix: "avx512.maskz.vpshrd")) {
3331 bool ZeroMask = Name[11] == 'z';
3332 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask);
3333 } else if (Name == "sse42.crc32.64.8") {
3334 Value *Trunc0 =
3335 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C));
3336 Rep = Builder.CreateIntrinsic(ID: Intrinsic::x86_sse42_crc32_32_8,
3337 Args: {Trunc0, CI->getArgOperand(i: 1)});
3338 Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "");
3339 } else if (Name.starts_with(Prefix: "avx.vbroadcast.s") ||
3340 Name.starts_with(Prefix: "avx512.vbroadcast.s")) {
3341 // Replace broadcasts with a series of insertelements.
3342 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3343 Type *EltTy = VecTy->getElementType();
3344 unsigned EltNum = VecTy->getNumElements();
3345 Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0));
3346 Type *I32Ty = Type::getInt32Ty(C);
3347 Rep = PoisonValue::get(T: VecTy);
3348 for (unsigned I = 0; I < EltNum; ++I)
3349 Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I));
3350 } else if (Name.starts_with(Prefix: "sse41.pmovsx") ||
3351 Name.starts_with(Prefix: "sse41.pmovzx") ||
3352 Name.starts_with(Prefix: "avx2.pmovsx") ||
3353 Name.starts_with(Prefix: "avx2.pmovzx") ||
3354 Name.starts_with(Prefix: "avx512.mask.pmovsx") ||
3355 Name.starts_with(Prefix: "avx512.mask.pmovzx")) {
3356 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3357 unsigned NumDstElts = DstTy->getNumElements();
3358
3359 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3360 SmallVector<int, 8> ShuffleMask(NumDstElts);
3361 for (unsigned i = 0; i != NumDstElts; ++i)
3362 ShuffleMask[i] = i;
3363
3364 Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3365
3366 bool DoSext = Name.contains(Other: "pmovsx");
3367 Rep =
3368 DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy);
3369 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3370 if (CI->arg_size() == 3)
3371 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3372 Op1: CI->getArgOperand(i: 1));
3373 } else if (Name == "avx512.mask.pmov.qd.256" ||
3374 Name == "avx512.mask.pmov.qd.512" ||
3375 Name == "avx512.mask.pmov.wb.256" ||
3376 Name == "avx512.mask.pmov.wb.512") {
3377 Type *Ty = CI->getArgOperand(i: 1)->getType();
3378 Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty);
3379 Rep =
3380 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3381 } else if (Name.starts_with(Prefix: "avx.vbroadcastf128") ||
3382 Name == "avx2.vbroadcasti128") {
3383 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3384 Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType();
3385 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3386 auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts);
3387 Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
3388 if (NumSrcElts == 2)
3389 Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1});
3390 else
3391 Rep = Builder.CreateShuffleVector(V: Load,
3392 Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3393 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i") ||
3394 Name.starts_with(Prefix: "avx512.mask.shuf.f")) {
3395 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3396 Type *VT = CI->getType();
3397 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3398 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3399 unsigned ControlBitsMask = NumLanes - 1;
3400 unsigned NumControlBits = NumLanes / 2;
3401 SmallVector<int, 8> ShuffleMask(0);
3402
3403 for (unsigned l = 0; l != NumLanes; ++l) {
3404 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3405 // We actually need the other source.
3406 if (l >= NumLanes / 2)
3407 LaneMask += NumLanes;
3408 for (unsigned i = 0; i != NumElementsInLane; ++i)
3409 ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i);
3410 }
3411 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3412 V2: CI->getArgOperand(i: 1), Mask: ShuffleMask);
3413 Rep =
3414 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3415 } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf") ||
3416 Name.starts_with(Prefix: "avx512.mask.broadcasti")) {
3417 unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType())
3418 ->getNumElements();
3419 unsigned NumDstElts =
3420 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3421
3422 SmallVector<int, 8> ShuffleMask(NumDstElts);
3423 for (unsigned i = 0; i != NumDstElts; ++i)
3424 ShuffleMask[i] = i % NumSrcElts;
3425
3426 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3427 V2: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3428 Rep =
3429 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3430 } else if (Name.starts_with(Prefix: "avx2.pbroadcast") ||
3431 Name.starts_with(Prefix: "avx2.vbroadcast") ||
3432 Name.starts_with(Prefix: "avx512.pbroadcast") ||
3433 Name.starts_with(Prefix: "avx512.mask.broadcast.s")) {
3434 // Replace vp?broadcasts with a vector shuffle.
3435 Value *Op = CI->getArgOperand(i: 0);
3436 ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount();
3437 Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC);
3438 SmallVector<int, 8> M;
3439 ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M);
3440 Rep = Builder.CreateShuffleVector(V: Op, Mask: M);
3441
3442 if (CI->arg_size() == 3)
3443 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3444 Op1: CI->getArgOperand(i: 1));
3445 } else if (Name.starts_with(Prefix: "sse2.padds.") ||
3446 Name.starts_with(Prefix: "avx2.padds.") ||
3447 Name.starts_with(Prefix: "avx512.padds.") ||
3448 Name.starts_with(Prefix: "avx512.mask.padds.")) {
3449 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat);
3450 } else if (Name.starts_with(Prefix: "sse2.psubs.") ||
3451 Name.starts_with(Prefix: "avx2.psubs.") ||
3452 Name.starts_with(Prefix: "avx512.psubs.") ||
3453 Name.starts_with(Prefix: "avx512.mask.psubs.")) {
3454 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat);
3455 } else if (Name.starts_with(Prefix: "sse2.paddus.") ||
3456 Name.starts_with(Prefix: "avx2.paddus.") ||
3457 Name.starts_with(Prefix: "avx512.mask.paddus.")) {
3458 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat);
3459 } else if (Name.starts_with(Prefix: "sse2.psubus.") ||
3460 Name.starts_with(Prefix: "avx2.psubus.") ||
3461 Name.starts_with(Prefix: "avx512.mask.psubus.")) {
3462 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat);
3463 } else if (Name.starts_with(Prefix: "avx512.mask.palignr.")) {
3464 Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0),
3465 Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2),
3466 Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3467 IsVALIGN: false);
3468 } else if (Name.starts_with(Prefix: "avx512.mask.valign.")) {
3469 Rep = upgradeX86ALIGNIntrinsics(
3470 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3471 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true);
3472 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3473 // 128/256-bit shift left specified in bits.
3474 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3475 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3476 Shift: Shift / 8); // Shift is in bits.
3477 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3478 // 128/256-bit shift right specified in bits.
3479 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3480 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3481 Shift: Shift / 8); // Shift is in bits.
3482 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3483 Name == "avx512.psll.dq.512") {
3484 // 128/256/512-bit shift left specified in bytes.
3485 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3486 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3487 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3488 Name == "avx512.psrl.dq.512") {
3489 // 128/256/512-bit shift right specified in bytes.
3490 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3491 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3492 } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp") ||
3493 Name.starts_with(Prefix: "avx.blend.p") || Name == "avx2.pblendw" ||
3494 Name.starts_with(Prefix: "avx2.pblendd.")) {
3495 Value *Op0 = CI->getArgOperand(i: 0);
3496 Value *Op1 = CI->getArgOperand(i: 1);
3497 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3498 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3499 unsigned NumElts = VecTy->getNumElements();
3500
3501 SmallVector<int, 16> Idxs(NumElts);
3502 for (unsigned i = 0; i != NumElts; ++i)
3503 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3504
3505 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3506 } else if (Name.starts_with(Prefix: "avx.vinsertf128.") ||
3507 Name == "avx2.vinserti128" ||
3508 Name.starts_with(Prefix: "avx512.mask.insert")) {
3509 Value *Op0 = CI->getArgOperand(i: 0);
3510 Value *Op1 = CI->getArgOperand(i: 1);
3511 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3512 unsigned DstNumElts =
3513 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3514 unsigned SrcNumElts =
3515 cast<FixedVectorType>(Val: Op1->getType())->getNumElements();
3516 unsigned Scale = DstNumElts / SrcNumElts;
3517
3518 // Mask off the high bits of the immediate value; hardware ignores those.
3519 Imm = Imm % Scale;
3520
3521 // Extend the second operand into a vector the size of the destination.
3522 SmallVector<int, 8> Idxs(DstNumElts);
3523 for (unsigned i = 0; i != SrcNumElts; ++i)
3524 Idxs[i] = i;
3525 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3526 Idxs[i] = SrcNumElts;
3527 Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs);
3528
3529 // Insert the second operand into the first operand.
3530
3531 // Note that there is no guarantee that instruction lowering will actually
3532 // produce a vinsertf128 instruction for the created shuffles. In
3533 // particular, the 0 immediate case involves no lane changes, so it can
3534 // be handled as a blend.
3535
3536 // Example of shuffle mask for 32-bit elements:
3537 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3538 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3539
3540 // First fill with identify mask.
3541 for (unsigned i = 0; i != DstNumElts; ++i)
3542 Idxs[i] = i;
3543 // Then replace the elements where we need to insert.
3544 for (unsigned i = 0; i != SrcNumElts; ++i)
3545 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3546 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs);
3547
3548 // If the intrinsic has a mask operand, handle that.
3549 if (CI->arg_size() == 5)
3550 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3551 Op1: CI->getArgOperand(i: 3));
3552 } else if (Name.starts_with(Prefix: "avx.vextractf128.") ||
3553 Name == "avx2.vextracti128" ||
3554 Name.starts_with(Prefix: "avx512.mask.vextract")) {
3555 Value *Op0 = CI->getArgOperand(i: 0);
3556 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3557 unsigned DstNumElts =
3558 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3559 unsigned SrcNumElts =
3560 cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
3561 unsigned Scale = SrcNumElts / DstNumElts;
3562
3563 // Mask off the high bits of the immediate value; hardware ignores those.
3564 Imm = Imm % Scale;
3565
3566 // Get indexes for the subvector of the input vector.
3567 SmallVector<int, 8> Idxs(DstNumElts);
3568 for (unsigned i = 0; i != DstNumElts; ++i) {
3569 Idxs[i] = i + (Imm * DstNumElts);
3570 }
3571 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3572
3573 // If the intrinsic has a mask operand, handle that.
3574 if (CI->arg_size() == 4)
3575 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3576 Op1: CI->getArgOperand(i: 2));
3577 } else if (Name.starts_with(Prefix: "avx512.mask.perm.df.") ||
3578 Name.starts_with(Prefix: "avx512.mask.perm.di.")) {
3579 Value *Op0 = CI->getArgOperand(i: 0);
3580 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3581 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3582 unsigned NumElts = VecTy->getNumElements();
3583
3584 SmallVector<int, 8> Idxs(NumElts);
3585 for (unsigned i = 0; i != NumElts; ++i)
3586 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3587
3588 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3589
3590 if (CI->arg_size() == 4)
3591 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3592 Op1: CI->getArgOperand(i: 2));
3593 } else if (Name.starts_with(Prefix: "avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3594 // The immediate permute control byte looks like this:
3595 // [1:0] - select 128 bits from sources for low half of destination
3596 // [2] - ignore
3597 // [3] - zero low half of destination
3598 // [5:4] - select 128 bits from sources for high half of destination
3599 // [6] - ignore
3600 // [7] - zero high half of destination
3601
3602 uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3603
3604 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3605 unsigned HalfSize = NumElts / 2;
3606 SmallVector<int, 8> ShuffleMask(NumElts);
3607
3608 // Determine which operand(s) are actually in use for this instruction.
3609 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3610 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3611
3612 // If needed, replace operands based on zero mask.
3613 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0;
3614 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1;
3615
3616 // Permute low half of result.
3617 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3618 for (unsigned i = 0; i < HalfSize; ++i)
3619 ShuffleMask[i] = StartIndex + i;
3620
3621 // Permute high half of result.
3622 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3623 for (unsigned i = 0; i < HalfSize; ++i)
3624 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3625
3626 Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask);
3627
3628 } else if (Name.starts_with(Prefix: "avx.vpermil.") || Name == "sse2.pshuf.d" ||
3629 Name.starts_with(Prefix: "avx512.mask.vpermil.p") ||
3630 Name.starts_with(Prefix: "avx512.mask.pshuf.d.")) {
3631 Value *Op0 = CI->getArgOperand(i: 0);
3632 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3633 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3634 unsigned NumElts = VecTy->getNumElements();
3635 // Calculate the size of each index in the immediate.
3636 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3637 unsigned IdxMask = ((1 << IdxSize) - 1);
3638
3639 SmallVector<int, 8> Idxs(NumElts);
3640 // Lookup the bits for this element, wrapping around the immediate every
3641 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3642 // to offset by the first index of each group.
3643 for (unsigned i = 0; i != NumElts; ++i)
3644 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3645
3646 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3647
3648 if (CI->arg_size() == 4)
3649 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3650 Op1: CI->getArgOperand(i: 2));
3651 } else if (Name == "sse2.pshufl.w" ||
3652 Name.starts_with(Prefix: "avx512.mask.pshufl.w.")) {
3653 Value *Op0 = CI->getArgOperand(i: 0);
3654 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3655 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3656
3657 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3658 reportFatalUsageErrorWithCI(reason: "Intrinsic has invalid signature", CI);
3659
3660 SmallVector<int, 16> Idxs(NumElts);
3661 for (unsigned l = 0; l != NumElts; l += 8) {
3662 for (unsigned i = 0; i != 4; ++i)
3663 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3664 for (unsigned i = 4; i != 8; ++i)
3665 Idxs[i + l] = i + l;
3666 }
3667
3668 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3669
3670 if (CI->arg_size() == 4)
3671 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3672 Op1: CI->getArgOperand(i: 2));
3673 } else if (Name == "sse2.pshufh.w" ||
3674 Name.starts_with(Prefix: "avx512.mask.pshufh.w.")) {
3675 Value *Op0 = CI->getArgOperand(i: 0);
3676 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3677 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3678
3679 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3680 reportFatalUsageErrorWithCI(reason: "Intrinsic has invalid signature", CI);
3681
3682 SmallVector<int, 16> Idxs(NumElts);
3683 for (unsigned l = 0; l != NumElts; l += 8) {
3684 for (unsigned i = 0; i != 4; ++i)
3685 Idxs[i + l] = i + l;
3686 for (unsigned i = 0; i != 4; ++i)
3687 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3688 }
3689
3690 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3691
3692 if (CI->arg_size() == 4)
3693 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3694 Op1: CI->getArgOperand(i: 2));
3695 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p")) {
3696 Value *Op0 = CI->getArgOperand(i: 0);
3697 Value *Op1 = CI->getArgOperand(i: 1);
3698 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3699 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3700
3701 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3702 unsigned HalfLaneElts = NumLaneElts / 2;
3703
3704 SmallVector<int, 16> Idxs(NumElts);
3705 for (unsigned i = 0; i != NumElts; ++i) {
3706 // Base index is the starting element of the lane.
3707 Idxs[i] = i - (i % NumLaneElts);
3708 // If we are half way through the lane switch to the other source.
3709 if ((i % NumLaneElts) >= HalfLaneElts)
3710 Idxs[i] += NumElts;
3711 // Now select the specific element. By adding HalfLaneElts bits from
3712 // the immediate. Wrapping around the immediate every 8-bits.
3713 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3714 }
3715
3716 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3717
3718 Rep =
3719 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3720 } else if (Name.starts_with(Prefix: "avx512.mask.movddup") ||
3721 Name.starts_with(Prefix: "avx512.mask.movshdup") ||
3722 Name.starts_with(Prefix: "avx512.mask.movsldup")) {
3723 Value *Op0 = CI->getArgOperand(i: 0);
3724 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3725 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3726
3727 unsigned Offset = 0;
3728 if (Name.starts_with(Prefix: "avx512.mask.movshdup."))
3729 Offset = 1;
3730
3731 SmallVector<int, 16> Idxs(NumElts);
3732 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3733 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3734 Idxs[i + l + 0] = i + l + Offset;
3735 Idxs[i + l + 1] = i + l + Offset;
3736 }
3737
3738 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3739
3740 Rep =
3741 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3742 } else if (Name.starts_with(Prefix: "avx512.mask.punpckl") ||
3743 Name.starts_with(Prefix: "avx512.mask.unpckl.")) {
3744 Value *Op0 = CI->getArgOperand(i: 0);
3745 Value *Op1 = CI->getArgOperand(i: 1);
3746 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3747 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3748
3749 SmallVector<int, 64> Idxs(NumElts);
3750 for (int l = 0; l != NumElts; l += NumLaneElts)
3751 for (int i = 0; i != NumLaneElts; ++i)
3752 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3753
3754 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3755
3756 Rep =
3757 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3758 } else if (Name.starts_with(Prefix: "avx512.mask.punpckh") ||
3759 Name.starts_with(Prefix: "avx512.mask.unpckh.")) {
3760 Value *Op0 = CI->getArgOperand(i: 0);
3761 Value *Op1 = CI->getArgOperand(i: 1);
3762 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3763 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3764
3765 SmallVector<int, 64> Idxs(NumElts);
3766 for (int l = 0; l != NumElts; l += NumLaneElts)
3767 for (int i = 0; i != NumLaneElts; ++i)
3768 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3769
3770 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3771
3772 Rep =
3773 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3774 } else if (Name.starts_with(Prefix: "avx512.mask.and.") ||
3775 Name.starts_with(Prefix: "avx512.mask.pand.")) {
3776 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3777 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3778 Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3779 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3780 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3781 Rep =
3782 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3783 } else if (Name.starts_with(Prefix: "avx512.mask.andn.") ||
3784 Name.starts_with(Prefix: "avx512.mask.pandn.")) {
3785 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3786 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3787 Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy));
3788 Rep = Builder.CreateAnd(LHS: Rep,
3789 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3790 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3791 Rep =
3792 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3793 } else if (Name.starts_with(Prefix: "avx512.mask.or.") ||
3794 Name.starts_with(Prefix: "avx512.mask.por.")) {
3795 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3796 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3797 Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3798 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3799 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3800 Rep =
3801 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3802 } else if (Name.starts_with(Prefix: "avx512.mask.xor.") ||
3803 Name.starts_with(Prefix: "avx512.mask.pxor.")) {
3804 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3805 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3806 Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3807 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3808 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3809 Rep =
3810 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3811 } else if (Name.starts_with(Prefix: "avx512.mask.padd.")) {
3812 Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3813 Rep =
3814 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3815 } else if (Name.starts_with(Prefix: "avx512.mask.psub.")) {
3816 Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3817 Rep =
3818 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3819 } else if (Name.starts_with(Prefix: "avx512.mask.pmull.")) {
3820 Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3821 Rep =
3822 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3823 } else if (Name.starts_with(Prefix: "avx512.mask.add.p")) {
3824 if (Name.ends_with(Suffix: ".512")) {
3825 Intrinsic::ID IID;
3826 if (Name[17] == 's')
3827 IID = Intrinsic::x86_avx512_add_ps_512;
3828 else
3829 IID = Intrinsic::x86_avx512_add_pd_512;
3830
3831 Rep = Builder.CreateIntrinsic(
3832 ID: IID,
3833 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3834 } else {
3835 Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3836 }
3837 Rep =
3838 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3839 } else if (Name.starts_with(Prefix: "avx512.mask.div.p")) {
3840 if (Name.ends_with(Suffix: ".512")) {
3841 Intrinsic::ID IID;
3842 if (Name[17] == 's')
3843 IID = Intrinsic::x86_avx512_div_ps_512;
3844 else
3845 IID = Intrinsic::x86_avx512_div_pd_512;
3846
3847 Rep = Builder.CreateIntrinsic(
3848 ID: IID,
3849 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3850 } else {
3851 Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3852 }
3853 Rep =
3854 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3855 } else if (Name.starts_with(Prefix: "avx512.mask.mul.p")) {
3856 if (Name.ends_with(Suffix: ".512")) {
3857 Intrinsic::ID IID;
3858 if (Name[17] == 's')
3859 IID = Intrinsic::x86_avx512_mul_ps_512;
3860 else
3861 IID = Intrinsic::x86_avx512_mul_pd_512;
3862
3863 Rep = Builder.CreateIntrinsic(
3864 ID: IID,
3865 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3866 } else {
3867 Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3868 }
3869 Rep =
3870 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3871 } else if (Name.starts_with(Prefix: "avx512.mask.sub.p")) {
3872 if (Name.ends_with(Suffix: ".512")) {
3873 Intrinsic::ID IID;
3874 if (Name[17] == 's')
3875 IID = Intrinsic::x86_avx512_sub_ps_512;
3876 else
3877 IID = Intrinsic::x86_avx512_sub_pd_512;
3878
3879 Rep = Builder.CreateIntrinsic(
3880 ID: IID,
3881 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3882 } else {
3883 Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3884 }
3885 Rep =
3886 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3887 } else if ((Name.starts_with(Prefix: "avx512.mask.max.p") ||
3888 Name.starts_with(Prefix: "avx512.mask.min.p")) &&
3889 Name.drop_front(N: 18) == ".512") {
3890 bool IsDouble = Name[17] == 'd';
3891 bool IsMin = Name[13] == 'i';
3892 static const Intrinsic::ID MinMaxTbl[2][2] = {
3893 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3894 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3895 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3896
3897 Rep = Builder.CreateIntrinsic(
3898 ID: IID,
3899 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3900 Rep =
3901 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3902 } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt.")) {
3903 Rep =
3904 Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: CI->getType(),
3905 Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)});
3906 Rep =
3907 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3908 } else if (Name.starts_with(Prefix: "avx512.mask.psll")) {
3909 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3910 bool IsVariable = Name[16] == 'v';
3911 char Size = Name[16] == '.' ? Name[17]
3912 : Name[17] == '.' ? Name[18]
3913 : Name[18] == '.' ? Name[19]
3914 : Name[20];
3915
3916 Intrinsic::ID IID;
3917 if (IsVariable && Name[17] != '.') {
3918 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3919 IID = Intrinsic::x86_avx2_psllv_q;
3920 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3921 IID = Intrinsic::x86_avx2_psllv_q_256;
3922 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3923 IID = Intrinsic::x86_avx2_psllv_d;
3924 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3925 IID = Intrinsic::x86_avx2_psllv_d_256;
3926 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3927 IID = Intrinsic::x86_avx512_psllv_w_128;
3928 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3929 IID = Intrinsic::x86_avx512_psllv_w_256;
3930 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3931 IID = Intrinsic::x86_avx512_psllv_w_512;
3932 else
3933 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3934 } else if (Name.ends_with(Suffix: ".128")) {
3935 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3936 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3937 : Intrinsic::x86_sse2_psll_d;
3938 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3939 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3940 : Intrinsic::x86_sse2_psll_q;
3941 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3942 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3943 : Intrinsic::x86_sse2_psll_w;
3944 else
3945 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3946 } else if (Name.ends_with(Suffix: ".256")) {
3947 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3948 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3949 : Intrinsic::x86_avx2_psll_d;
3950 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3951 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3952 : Intrinsic::x86_avx2_psll_q;
3953 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3954 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3955 : Intrinsic::x86_avx2_psll_w;
3956 else
3957 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3958 } else {
3959 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3960 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3961 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3962 : Intrinsic::x86_avx512_psll_d_512;
3963 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3964 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3965 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3966 : Intrinsic::x86_avx512_psll_q_512;
3967 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3968 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3969 : Intrinsic::x86_avx512_psll_w_512;
3970 else
3971 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3972 }
3973
3974 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3975 } else if (Name.starts_with(Prefix: "avx512.mask.psrl")) {
3976 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3977 bool IsVariable = Name[16] == 'v';
3978 char Size = Name[16] == '.' ? Name[17]
3979 : Name[17] == '.' ? Name[18]
3980 : Name[18] == '.' ? Name[19]
3981 : Name[20];
3982
3983 Intrinsic::ID IID;
3984 if (IsVariable && Name[17] != '.') {
3985 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3986 IID = Intrinsic::x86_avx2_psrlv_q;
3987 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3988 IID = Intrinsic::x86_avx2_psrlv_q_256;
3989 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3990 IID = Intrinsic::x86_avx2_psrlv_d;
3991 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3992 IID = Intrinsic::x86_avx2_psrlv_d_256;
3993 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3994 IID = Intrinsic::x86_avx512_psrlv_w_128;
3995 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3996 IID = Intrinsic::x86_avx512_psrlv_w_256;
3997 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3998 IID = Intrinsic::x86_avx512_psrlv_w_512;
3999 else
4000 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4001 } else if (Name.ends_with(Suffix: ".128")) {
4002 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4003 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4004 : Intrinsic::x86_sse2_psrl_d;
4005 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4007 : Intrinsic::x86_sse2_psrl_q;
4008 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4010 : Intrinsic::x86_sse2_psrl_w;
4011 else
4012 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4013 } else if (Name.ends_with(Suffix: ".256")) {
4014 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4015 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4016 : Intrinsic::x86_avx2_psrl_d;
4017 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4018 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4019 : Intrinsic::x86_avx2_psrl_q;
4020 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4021 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4022 : Intrinsic::x86_avx2_psrl_w;
4023 else
4024 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4025 } else {
4026 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4027 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4028 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4029 : Intrinsic::x86_avx512_psrl_d_512;
4030 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4031 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4032 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4033 : Intrinsic::x86_avx512_psrl_q_512;
4034 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4036 : Intrinsic::x86_avx512_psrl_w_512;
4037 else
4038 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4039 }
4040
4041 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4042 } else if (Name.starts_with(Prefix: "avx512.mask.psra")) {
4043 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4044 bool IsVariable = Name[16] == 'v';
4045 char Size = Name[16] == '.' ? Name[17]
4046 : Name[17] == '.' ? Name[18]
4047 : Name[18] == '.' ? Name[19]
4048 : Name[20];
4049
4050 Intrinsic::ID IID;
4051 if (IsVariable && Name[17] != '.') {
4052 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4053 IID = Intrinsic::x86_avx2_psrav_d;
4054 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4055 IID = Intrinsic::x86_avx2_psrav_d_256;
4056 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4057 IID = Intrinsic::x86_avx512_psrav_w_128;
4058 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4059 IID = Intrinsic::x86_avx512_psrav_w_256;
4060 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4061 IID = Intrinsic::x86_avx512_psrav_w_512;
4062 else
4063 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4064 } else if (Name.ends_with(Suffix: ".128")) {
4065 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4066 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4067 : Intrinsic::x86_sse2_psra_d;
4068 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4069 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4070 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4071 : Intrinsic::x86_avx512_psra_q_128;
4072 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4073 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4074 : Intrinsic::x86_sse2_psra_w;
4075 else
4076 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4077 } else if (Name.ends_with(Suffix: ".256")) {
4078 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4079 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4080 : Intrinsic::x86_avx2_psra_d;
4081 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4082 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4083 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4084 : Intrinsic::x86_avx512_psra_q_256;
4085 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4086 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4087 : Intrinsic::x86_avx2_psra_w;
4088 else
4089 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4090 } else {
4091 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4092 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4093 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4094 : Intrinsic::x86_avx512_psra_d_512;
4095 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4096 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4097 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4098 : Intrinsic::x86_avx512_psra_q_512;
4099 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4100 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4101 : Intrinsic::x86_avx512_psra_w_512;
4102 else
4103 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4104 }
4105
4106 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4107 } else if (Name.starts_with(Prefix: "avx512.mask.move.s")) {
4108 Rep = upgradeMaskedMove(Builder, CI&: *CI);
4109 } else if (Name.starts_with(Prefix: "avx512.cvtmask2")) {
4110 Rep = upgradeMaskToInt(Builder, CI&: *CI);
4111 } else if (Name.ends_with(Suffix: ".movntdqa")) {
4112 MDNode *Node = MDNode::get(
4113 Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
4114
4115 LoadInst *LI = Builder.CreateAlignedLoad(
4116 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0),
4117 Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
4118 LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
4119 Rep = LI;
4120 } else if (Name.starts_with(Prefix: "fma.vfmadd.") ||
4121 Name.starts_with(Prefix: "fma.vfmsub.") ||
4122 Name.starts_with(Prefix: "fma.vfnmadd.") ||
4123 Name.starts_with(Prefix: "fma.vfnmsub.")) {
4124 bool NegMul = Name[6] == 'n';
4125 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4126 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4127
4128 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4129 CI->getArgOperand(i: 2)};
4130
4131 if (IsScalar) {
4132 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4133 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4134 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4135 }
4136
4137 if (NegMul && !IsScalar)
4138 Ops[0] = Builder.CreateFNeg(V: Ops[0]);
4139 if (NegMul && IsScalar)
4140 Ops[1] = Builder.CreateFNeg(V: Ops[1]);
4141 if (NegAcc)
4142 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4143
4144 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
4145
4146 if (IsScalar)
4147 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
4148 } else if (Name.starts_with(Prefix: "fma4.vfmadd.s")) {
4149 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4150 CI->getArgOperand(i: 2)};
4151
4152 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4153 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4154 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4155
4156 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
4157
4158 Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()),
4159 NewElt: Rep, Idx: (uint64_t)0);
4160 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s") ||
4161 Name.starts_with(Prefix: "avx512.maskz.vfmadd.s") ||
4162 Name.starts_with(Prefix: "avx512.mask3.vfmadd.s") ||
4163 Name.starts_with(Prefix: "avx512.mask3.vfmsub.s") ||
4164 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s")) {
4165 bool IsMask3 = Name[11] == '3';
4166 bool IsMaskZ = Name[11] == 'z';
4167 // Drop the "avx512.mask." to make it easier.
4168 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4169 bool NegMul = Name[2] == 'n';
4170 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4171
4172 Value *A = CI->getArgOperand(i: 0);
4173 Value *B = CI->getArgOperand(i: 1);
4174 Value *C = CI->getArgOperand(i: 2);
4175
4176 if (NegMul && (IsMask3 || IsMaskZ))
4177 A = Builder.CreateFNeg(V: A);
4178 if (NegMul && !(IsMask3 || IsMaskZ))
4179 B = Builder.CreateFNeg(V: B);
4180 if (NegAcc)
4181 C = Builder.CreateFNeg(V: C);
4182
4183 A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0);
4184 B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
4185 C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0);
4186
4187 if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4188 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) {
4189 Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)};
4190
4191 Intrinsic::ID IID;
4192 if (Name.back() == 'd')
4193 IID = Intrinsic::x86_avx512_vfmadd_f64;
4194 else
4195 IID = Intrinsic::x86_avx512_vfmadd_f32;
4196 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4197 } else {
4198 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4199 }
4200
4201 Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType())
4202 : IsMask3 ? C
4203 : A;
4204
4205 // For Mask3 with NegAcc, we need to create a new extractelement that
4206 // avoids the negation above.
4207 if (NegAcc && IsMask3)
4208 PassThru =
4209 Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0);
4210
4211 Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4212 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep,
4213 Idx: (uint64_t)0);
4214 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p") ||
4215 Name.starts_with(Prefix: "avx512.mask.vfnmadd.p") ||
4216 Name.starts_with(Prefix: "avx512.mask.vfnmsub.p") ||
4217 Name.starts_with(Prefix: "avx512.mask3.vfmadd.p") ||
4218 Name.starts_with(Prefix: "avx512.mask3.vfmsub.p") ||
4219 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p") ||
4220 Name.starts_with(Prefix: "avx512.maskz.vfmadd.p")) {
4221 bool IsMask3 = Name[11] == '3';
4222 bool IsMaskZ = Name[11] == 'z';
4223 // Drop the "avx512.mask." to make it easier.
4224 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4225 bool NegMul = Name[2] == 'n';
4226 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4227
4228 Value *A = CI->getArgOperand(i: 0);
4229 Value *B = CI->getArgOperand(i: 1);
4230 Value *C = CI->getArgOperand(i: 2);
4231
4232 if (NegMul && (IsMask3 || IsMaskZ))
4233 A = Builder.CreateFNeg(V: A);
4234 if (NegMul && !(IsMask3 || IsMaskZ))
4235 B = Builder.CreateFNeg(V: B);
4236 if (NegAcc)
4237 C = Builder.CreateFNeg(V: C);
4238
4239 if (CI->arg_size() == 5 &&
4240 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4241 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) {
4242 Intrinsic::ID IID;
4243 // Check the character before ".512" in string.
4244 if (Name[Name.size() - 5] == 's')
4245 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4246 else
4247 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4248
4249 Rep = Builder.CreateIntrinsic(ID: IID, Args: {A, B, C, CI->getArgOperand(i: 4)});
4250 } else {
4251 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4252 }
4253
4254 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4255 : IsMask3 ? CI->getArgOperand(i: 2)
4256 : CI->getArgOperand(i: 0);
4257
4258 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4259 } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p")) {
4260 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4261 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4262 Intrinsic::ID IID;
4263 if (VecWidth == 128 && EltWidth == 32)
4264 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4265 else if (VecWidth == 256 && EltWidth == 32)
4266 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4267 else if (VecWidth == 128 && EltWidth == 64)
4268 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4269 else if (VecWidth == 256 && EltWidth == 64)
4270 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4271 else
4272 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4273
4274 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4275 CI->getArgOperand(i: 2)};
4276 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4277 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4278 } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p") ||
4279 Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p") ||
4280 Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p") ||
4281 Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p")) {
4282 bool IsMask3 = Name[11] == '3';
4283 bool IsMaskZ = Name[11] == 'z';
4284 // Drop the "avx512.mask." to make it easier.
4285 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4286 bool IsSubAdd = Name[3] == 's';
4287 if (CI->arg_size() == 5) {
4288 Intrinsic::ID IID;
4289 // Check the character before ".512" in string.
4290 if (Name[Name.size() - 5] == 's')
4291 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4292 else
4293 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4294
4295 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4296 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
4297 if (IsSubAdd)
4298 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4299
4300 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4301 } else {
4302 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4303
4304 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4305 CI->getArgOperand(i: 2)};
4306
4307 Function *FMA = Intrinsic::getOrInsertDeclaration(
4308 M: CI->getModule(), id: Intrinsic::fma, OverloadTys: Ops[0]->getType());
4309 Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops);
4310 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4311 Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops);
4312
4313 if (IsSubAdd)
4314 std::swap(a&: Even, b&: Odd);
4315
4316 SmallVector<int, 32> Idxs(NumElts);
4317 for (int i = 0; i != NumElts; ++i)
4318 Idxs[i] = i + (i % 2) * NumElts;
4319
4320 Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs);
4321 }
4322
4323 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4324 : IsMask3 ? CI->getArgOperand(i: 2)
4325 : CI->getArgOperand(i: 0);
4326
4327 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4328 } else if (Name.starts_with(Prefix: "avx512.mask.pternlog.") ||
4329 Name.starts_with(Prefix: "avx512.maskz.pternlog.")) {
4330 bool ZeroMask = Name[11] == 'z';
4331 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4332 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4333 Intrinsic::ID IID;
4334 if (VecWidth == 128 && EltWidth == 32)
4335 IID = Intrinsic::x86_avx512_pternlog_d_128;
4336 else if (VecWidth == 256 && EltWidth == 32)
4337 IID = Intrinsic::x86_avx512_pternlog_d_256;
4338 else if (VecWidth == 512 && EltWidth == 32)
4339 IID = Intrinsic::x86_avx512_pternlog_d_512;
4340 else if (VecWidth == 128 && EltWidth == 64)
4341 IID = Intrinsic::x86_avx512_pternlog_q_128;
4342 else if (VecWidth == 256 && EltWidth == 64)
4343 IID = Intrinsic::x86_avx512_pternlog_q_256;
4344 else if (VecWidth == 512 && EltWidth == 64)
4345 IID = Intrinsic::x86_avx512_pternlog_q_512;
4346 else
4347 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4348
4349 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4350 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)};
4351 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4352 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4353 : CI->getArgOperand(i: 0);
4354 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru);
4355 } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52") ||
4356 Name.starts_with(Prefix: "avx512.maskz.vpmadd52")) {
4357 bool ZeroMask = Name[11] == 'z';
4358 bool High = Name[20] == 'h' || Name[21] == 'h';
4359 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4360 Intrinsic::ID IID;
4361 if (VecWidth == 128 && !High)
4362 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4363 else if (VecWidth == 256 && !High)
4364 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4365 else if (VecWidth == 512 && !High)
4366 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4367 else if (VecWidth == 128 && High)
4368 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4369 else if (VecWidth == 256 && High)
4370 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4371 else if (VecWidth == 512 && High)
4372 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4373 else
4374 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4375
4376 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4377 CI->getArgOperand(i: 2)};
4378 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4379 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4380 : CI->getArgOperand(i: 0);
4381 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4382 } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var.") ||
4383 Name.starts_with(Prefix: "avx512.mask.vpermt2var.") ||
4384 Name.starts_with(Prefix: "avx512.maskz.vpermt2var.")) {
4385 bool ZeroMask = Name[11] == 'z';
4386 bool IndexForm = Name[17] == 'i';
4387 Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm);
4388 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd.") ||
4389 Name.starts_with(Prefix: "avx512.maskz.vpdpbusd.") ||
4390 Name.starts_with(Prefix: "avx512.mask.vpdpbusds.") ||
4391 Name.starts_with(Prefix: "avx512.maskz.vpdpbusds.")) {
4392 bool ZeroMask = Name[11] == 'z';
4393 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4394 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4395 Intrinsic::ID IID;
4396 if (VecWidth == 128 && !IsSaturating)
4397 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4398 else if (VecWidth == 256 && !IsSaturating)
4399 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4400 else if (VecWidth == 512 && !IsSaturating)
4401 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4402 else if (VecWidth == 128 && IsSaturating)
4403 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4404 else if (VecWidth == 256 && IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4406 else if (VecWidth == 512 && IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4408 else
4409 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4410
4411 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4412 CI->getArgOperand(i: 2)};
4413
4414 // Input arguments types were incorrectly set to vectors of i32 before but
4415 // they should be vectors of i8. Insert bit cast when encountering the old
4416 // types
4417 if (Args[1]->getType()->isVectorTy() &&
4418 cast<VectorType>(Val: Args[1]->getType())
4419 ->getElementType()
4420 ->isIntegerTy(Bitwidth: 32) &&
4421 Args[2]->getType()->isVectorTy() &&
4422 cast<VectorType>(Val: Args[2]->getType())
4423 ->getElementType()
4424 ->isIntegerTy(Bitwidth: 32)) {
4425 Type *NewArgType = nullptr;
4426 if (VecWidth == 128)
4427 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 16, Scalable: false);
4428 else if (VecWidth == 256)
4429 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 32, Scalable: false);
4430 else if (VecWidth == 512)
4431 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 64, Scalable: false);
4432 else
4433 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected vector bit width",
4434 CI);
4435
4436 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4437 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4438 }
4439
4440 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4441 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4442 : CI->getArgOperand(i: 0);
4443 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4444 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd.") ||
4445 Name.starts_with(Prefix: "avx512.maskz.vpdpwssd.") ||
4446 Name.starts_with(Prefix: "avx512.mask.vpdpwssds.") ||
4447 Name.starts_with(Prefix: "avx512.maskz.vpdpwssds.")) {
4448 bool ZeroMask = Name[11] == 'z';
4449 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4450 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4451 Intrinsic::ID IID;
4452 if (VecWidth == 128 && !IsSaturating)
4453 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4454 else if (VecWidth == 256 && !IsSaturating)
4455 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4456 else if (VecWidth == 512 && !IsSaturating)
4457 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4458 else if (VecWidth == 128 && IsSaturating)
4459 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4460 else if (VecWidth == 256 && IsSaturating)
4461 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4462 else if (VecWidth == 512 && IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4464 else
4465 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4466
4467 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4468 CI->getArgOperand(i: 2)};
4469
4470 // Input arguments types were incorrectly set to vectors of i32 before but
4471 // they should be vectors of i16. Insert bit cast when encountering the old
4472 // types
4473 if (Args[1]->getType()->isVectorTy() &&
4474 cast<VectorType>(Val: Args[1]->getType())
4475 ->getElementType()
4476 ->isIntegerTy(Bitwidth: 32) &&
4477 Args[2]->getType()->isVectorTy() &&
4478 cast<VectorType>(Val: Args[2]->getType())
4479 ->getElementType()
4480 ->isIntegerTy(Bitwidth: 32)) {
4481 Type *NewArgType = nullptr;
4482 if (VecWidth == 128)
4483 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 8, Scalable: false);
4484 else if (VecWidth == 256)
4485 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 16, Scalable: false);
4486 else if (VecWidth == 512)
4487 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 32, Scalable: false);
4488 else
4489 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected vector bit width",
4490 CI);
4491
4492 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4493 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4494 }
4495
4496 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4497 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4498 : CI->getArgOperand(i: 0);
4499 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4500 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4501 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4502 Name == "subborrow.u32" || Name == "subborrow.u64") {
4503 Intrinsic::ID IID;
4504 if (Name[0] == 'a' && Name.back() == '2')
4505 IID = Intrinsic::x86_addcarry_32;
4506 else if (Name[0] == 'a' && Name.back() == '4')
4507 IID = Intrinsic::x86_addcarry_64;
4508 else if (Name[0] == 's' && Name.back() == '2')
4509 IID = Intrinsic::x86_subborrow_32;
4510 else if (Name[0] == 's' && Name.back() == '4')
4511 IID = Intrinsic::x86_subborrow_64;
4512 else
4513 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4514
4515 // Make a call with 3 operands.
4516 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4517 CI->getArgOperand(i: 2)};
4518 Value *NewCall = Builder.CreateIntrinsic(ID: IID, Args);
4519
4520 // Extract the second result and store it.
4521 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4522 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 3), Align: Align(1));
4523 // Replace the original call result with the first result of the new call.
4524 Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4525
4526 CI->replaceAllUsesWith(V: CF);
4527 Rep = nullptr;
4528 } else if (Name.starts_with(Prefix: "avx512.mask.") &&
4529 upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) {
4530 // Rep will be updated by the call in the condition.
4531 } else
4532 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4533
4534 return Rep;
4535}
4536
4537static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4538 Function *F, IRBuilder<> &Builder) {
4539 if (Name.starts_with(Prefix: "neon.bfcvt")) {
4540 if (Name.starts_with(Prefix: "neon.bfcvtn2")) {
4541 SmallVector<int, 32> LoMask(4);
4542 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
4543 SmallVector<int, 32> ConcatMask(8);
4544 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4545 Value *Inactive = Builder.CreateShuffleVector(V: CI->getOperand(i_nocapture: 0), Mask: LoMask);
4546 Value *Trunc =
4547 Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 1), DestTy: Inactive->getType());
4548 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
4549 } else if (Name.starts_with(Prefix: "neon.bfcvtn")) {
4550 SmallVector<int, 32> ConcatMask(8);
4551 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4552 Type *V4BF16 =
4553 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: F->getContext()), NumElts: 4);
4554 Value *Trunc = Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), DestTy: V4BF16);
4555 dbgs() << "Trunc: " << *Trunc << "\n";
4556 return Builder.CreateShuffleVector(
4557 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
4558 } else {
4559 return Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0),
4560 DestTy: Type::getBFloatTy(C&: F->getContext()));
4561 }
4562 } else if (Name.starts_with(Prefix: "sve.fcvt")) {
4563 Intrinsic::ID NewID =
4564 StringSwitch<Intrinsic::ID>(Name)
4565 .Case(S: "sve.fcvt.bf16f32", Value: Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4566 .Case(S: "sve.fcvtnt.bf16f32",
4567 Value: Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4568 .Default(Value: Intrinsic::not_intrinsic);
4569 if (NewID == Intrinsic::not_intrinsic)
4570 llvm_unreachable("Unhandled Intrinsic!");
4571
4572 SmallVector<Value *, 3> Args(CI->args());
4573
4574 // The original intrinsics incorrectly used a predicate based on the
4575 // smallest element type rather than the largest.
4576 Type *BadPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
4577 Type *GoodPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
4578
4579 if (Args[1]->getType() != BadPredTy)
4580 llvm_unreachable("Unexpected predicate type!");
4581
4582 Args[1] = Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_to_svbool,
4583 Types: BadPredTy, Args: Args[1]);
4584 Args[1] = Builder.CreateIntrinsic(
4585 ID: Intrinsic::aarch64_sve_convert_from_svbool, Types: GoodPredTy, Args: Args[1]);
4586
4587 return Builder.CreateIntrinsic(ID: NewID, Args, /*FMFSource=*/nullptr,
4588 Name: CI->getName());
4589 }
4590
4591 llvm_unreachable("Unhandled Intrinsic!");
4592}
4593
4594static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4595 IRBuilder<> &Builder) {
4596 if (Name == "mve.vctp64.old") {
4597 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4598 // correct type.
4599 Value *VCTP = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_vctp64, Types: {},
4600 Args: CI->getArgOperand(i: 0),
4601 /*FMFSource=*/nullptr, Name: CI->getName());
4602 Value *C1 = Builder.CreateIntrinsic(
4603 ID: Intrinsic::arm_mve_pred_v2i,
4604 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}, Args: VCTP);
4605 return Builder.CreateIntrinsic(
4606 ID: Intrinsic::arm_mve_pred_i2v,
4607 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: C1);
4608 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4609 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4610 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4611 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4612 Name ==
4613 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4614 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4615 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4616 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4617 Name ==
4618 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4619 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4620 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4621 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4622 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4623 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4624 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4625 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4626 std::vector<Type *> Tys;
4627 unsigned ID = CI->getIntrinsicID();
4628 Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2);
4629 switch (ID) {
4630 case Intrinsic::arm_mve_mull_int_predicated:
4631 case Intrinsic::arm_mve_vqdmull_predicated:
4632 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4633 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty};
4634 break;
4635 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4636 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4637 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4638 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4639 V2I1Ty};
4640 break;
4641 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4642 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4643 CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4644 break;
4645 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4646 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(),
4647 CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty};
4648 break;
4649 case Intrinsic::arm_cde_vcx1q_predicated:
4650 case Intrinsic::arm_cde_vcx1qa_predicated:
4651 case Intrinsic::arm_cde_vcx2q_predicated:
4652 case Intrinsic::arm_cde_vcx2qa_predicated:
4653 case Intrinsic::arm_cde_vcx3q_predicated:
4654 case Intrinsic::arm_cde_vcx3qa_predicated:
4655 Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4656 break;
4657 default:
4658 llvm_unreachable("Unhandled Intrinsic!");
4659 }
4660
4661 std::vector<Value *> Ops;
4662 for (Value *Op : CI->args()) {
4663 Type *Ty = Op->getType();
4664 if (Ty->getScalarSizeInBits() == 1) {
4665 Value *C1 = Builder.CreateIntrinsic(
4666 ID: Intrinsic::arm_mve_pred_v2i,
4667 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: Op);
4668 Op = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_pred_i2v, Types: {V2I1Ty}, Args: C1);
4669 }
4670 Ops.push_back(x: Op);
4671 }
4672
4673 return Builder.CreateIntrinsic(ID, Types: Tys, Args: Ops, /*FMFSource=*/nullptr,
4674 Name: CI->getName());
4675 }
4676 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4677}
4678
4679// These are expected to have the arguments:
4680// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4681//
4682// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4683//
4684static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4685 Function *F, IRBuilder<> &Builder) {
4686 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4687 // for compatibility.
4688 auto UpgradeLegacyWMMAIUIntrinsicCall =
4689 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4690 ArrayRef<Type *> OverloadTys) -> Value * {
4691 // Prepare arguments, append clamp=0 for compatibility
4692 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4693 Args.push_back(Elt: Builder.getFalse());
4694
4695 // Insert the declaration for the right overload types
4696 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
4697 M: F->getParent(), id: F->getIntrinsicID(), OverloadTys);
4698
4699 // Copy operand bundles if any
4700 SmallVector<OperandBundleDef, 1> Bundles;
4701 CI->getOperandBundlesAsDefs(Defs&: Bundles);
4702
4703 // Create the new call and copy calling properties
4704 auto *NewCall = cast<CallInst>(Val: Builder.CreateCall(Callee: NewDecl, Args, OpBundles: Bundles));
4705 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4706 NewCall->setCallingConv(CI->getCallingConv());
4707 NewCall->setAttributes(CI->getAttributes());
4708 NewCall->setDebugLoc(CI->getDebugLoc());
4709 NewCall->copyMetadata(SrcInst: *CI);
4710 return NewCall;
4711 };
4712
4713 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4714 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4715 "intrinsic should have 7 arguments");
4716 Type *T1 = CI->getArgOperand(i: 4)->getType();
4717 Type *T2 = CI->getArgOperand(i: 1)->getType();
4718 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4719 }
4720 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4721 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4722 "intrinsic should have 8 arguments");
4723 Type *T1 = CI->getArgOperand(i: 4)->getType();
4724 Type *T2 = CI->getArgOperand(i: 1)->getType();
4725 Type *T3 = CI->getArgOperand(i: 3)->getType();
4726 Type *T4 = CI->getArgOperand(i: 5)->getType();
4727 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4728 }
4729
4730 switch (F->getIntrinsicID()) {
4731 default:
4732 break;
4733 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4734 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4735 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4736 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4737 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4738 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4739 // Drop src0 and src1 modifiers.
4740 const Value *Op0 = CI->getArgOperand(i: 0);
4741 const Value *Op2 = CI->getArgOperand(i: 2);
4742 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4743 const ConstantInt *ModA = dyn_cast<ConstantInt>(Val: Op0);
4744 const ConstantInt *ModB = dyn_cast<ConstantInt>(Val: Op2);
4745 if (!ModA->isZero() || !ModB->isZero())
4746 reportFatalUsageError(reason: Name + " matrix A and B modifiers shall be zero");
4747
4748 SmallVector<Value *, 8> Args{CI->getArgOperand(i: 1), CI->getArgOperand(i: 3)};
4749 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4750 Args.push_back(Elt: CI->getArgOperand(i: I));
4751
4752 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4753 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4754 Overloads.push_back(Elt: Args[3]->getType());
4755 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
4756 M: F->getParent(), id: F->getIntrinsicID(), OverloadTys: Overloads);
4757
4758 SmallVector<OperandBundleDef, 1> Bundles;
4759 CI->getOperandBundlesAsDefs(Defs&: Bundles);
4760
4761 auto *NewCall = cast<CallInst>(Val: Builder.CreateCall(Callee: NewDecl, Args, OpBundles: Bundles));
4762 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4763 NewCall->setCallingConv(CI->getCallingConv());
4764 NewCall->setAttributes(CI->getAttributes());
4765 NewCall->setDebugLoc(CI->getDebugLoc());
4766 NewCall->copyMetadata(SrcInst: *CI);
4767 NewCall->takeName(V: CI);
4768 return NewCall;
4769 }
4770 }
4771
4772 AtomicRMWInst::BinOp RMWOp =
4773 StringSwitch<AtomicRMWInst::BinOp>(Name)
4774 .StartsWith(S: "ds.fadd", Value: AtomicRMWInst::FAdd)
4775 .StartsWith(S: "ds.fmin", Value: AtomicRMWInst::FMin)
4776 .StartsWith(S: "ds.fmax", Value: AtomicRMWInst::FMax)
4777 .StartsWith(S: "atomic.inc.", Value: AtomicRMWInst::UIncWrap)
4778 .StartsWith(S: "atomic.dec.", Value: AtomicRMWInst::UDecWrap)
4779 .StartsWith(S: "global.atomic.fadd", Value: AtomicRMWInst::FAdd)
4780 .StartsWith(S: "flat.atomic.fadd", Value: AtomicRMWInst::FAdd)
4781 .StartsWith(S: "global.atomic.fmin", Value: AtomicRMWInst::FMin)
4782 .StartsWith(S: "flat.atomic.fmin", Value: AtomicRMWInst::FMin)
4783 .StartsWith(S: "global.atomic.fmax", Value: AtomicRMWInst::FMax)
4784 .StartsWith(S: "flat.atomic.fmax", Value: AtomicRMWInst::FMax)
4785 .StartsWith(S: "atomic.cond.sub", Value: AtomicRMWInst::USubCond)
4786 .StartsWith(S: "atomic.csub", Value: AtomicRMWInst::USubSat);
4787
4788 unsigned NumOperands = CI->getNumOperands();
4789 if (NumOperands < 3) // Malformed bitcode.
4790 return nullptr;
4791
4792 Value *Ptr = CI->getArgOperand(i: 0);
4793 PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType());
4794 if (!PtrTy) // Malformed.
4795 return nullptr;
4796
4797 Value *Val = CI->getArgOperand(i: 1);
4798 if (Val->getType() != CI->getType()) // Malformed.
4799 return nullptr;
4800
4801 ConstantInt *OrderArg = nullptr;
4802 bool IsVolatile = false;
4803
4804 // These should have 5 arguments (plus the callee). A separate version of the
4805 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4806 if (NumOperands > 3)
4807 OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4808
4809 // Ignore scope argument at 3
4810
4811 if (NumOperands > 5) {
4812 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4));
4813 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4814 }
4815
4816 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4817 if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue()))
4818 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4819 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4820 Order = AtomicOrdering::SequentiallyConsistent;
4821
4822 LLVMContext &Ctx = F->getContext();
4823
4824 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4825 Type *RetTy = CI->getType();
4826 if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) {
4827 if (VT->getElementType()->isIntegerTy(Bitwidth: 16)) {
4828 VectorType *AsBF16 =
4829 VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount());
4830 Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16);
4831 }
4832 }
4833
4834 // The scope argument never really worked correctly. Use agent as the most
4835 // conservative option which should still always produce the instruction.
4836 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent");
4837 AtomicRMWInst *RMW =
4838 Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID);
4839
4840 unsigned AddrSpace = PtrTy->getAddressSpace();
4841 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4842 MDNode *EmptyMD = MDNode::get(Context&: F->getContext(), MDs: {});
4843 RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory", Node: EmptyMD);
4844 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4845 RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: EmptyMD);
4846 }
4847
4848 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4849 MDBuilder MDB(F->getContext());
4850 MDNode *RangeNotPrivate =
4851 MDB.createRange(Lo: APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4852 Hi: APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4853 RMW->setMetadata(KindID: LLVMContext::MD_noalias_addrspace, Node: RangeNotPrivate);
4854 }
4855
4856 if (IsVolatile)
4857 RMW->setVolatile(true);
4858
4859 return Builder.CreateBitCast(V: RMW, DestTy: RetTy);
4860}
4861
4862/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4863/// plain MDNode, as it's the verifier's job to check these are the correct
4864/// types later.
4865static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4866 if (Op < CI->arg_size()) {
4867 if (MetadataAsValue *MAV =
4868 dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) {
4869 Metadata *MD = MAV->getMetadata();
4870 return dyn_cast_if_present<MDNode>(Val: MD);
4871 }
4872 }
4873 return nullptr;
4874}
4875
4876/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4877static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4878 if (Op < CI->arg_size())
4879 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op)))
4880 return MAV->getMetadata();
4881 return nullptr;
4882}
4883
4884static MDNode *getDebugLocSafe(const Instruction *I) {
4885 // The MDNode attached to this instruction might not be the correct type,
4886 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4887 return I->getDebugLoc().getAsMDNode();
4888}
4889
4890/// Convert debug intrinsic calls to non-instruction debug records.
4891/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4892/// \p CI - The debug intrinsic call.
4893static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4894 DbgRecord *DR = nullptr;
4895 if (Name == "label") {
4896 DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(Label: unwrapMAVOp(CI, Op: 0),
4897 DL: CI->getDebugLoc());
4898 } else if (Name == "assign") {
4899 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4900 Type: DbgVariableRecord::LocationType::Assign, Val: unwrapMAVMetadataOp(CI, Op: 0),
4901 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: unwrapMAVOp(CI, Op: 3),
4902 Address: unwrapMAVMetadataOp(CI, Op: 4),
4903 /*The address is a Value ref, it will be stored as a Metadata */
4904 AddressExpression: unwrapMAVOp(CI, Op: 5), DI: getDebugLocSafe(I: CI));
4905 } else if (Name == "declare") {
4906 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4907 Type: DbgVariableRecord::LocationType::Declare, Val: unwrapMAVMetadataOp(CI, Op: 0),
4908 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4909 DI: getDebugLocSafe(I: CI));
4910 } else if (Name == "addr") {
4911 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4912 MDNode *ExprNode = unwrapMAVOp(CI, Op: 2);
4913 // Don't try to add something to the expression if it's not an expression.
4914 // Instead, allow the verifier to fail later.
4915 if (DIExpression *Expr = dyn_cast<DIExpression>(Val: ExprNode)) {
4916 ExprNode = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
4917 }
4918 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4919 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4920 Variable: unwrapMAVOp(CI, Op: 1), Expression: ExprNode, AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4921 DI: getDebugLocSafe(I: CI));
4922 } else if (Name == "value") {
4923 // An old version of dbg.value had an extra offset argument.
4924 unsigned VarOp = 1;
4925 unsigned ExprOp = 2;
4926 if (CI->arg_size() == 4) {
4927 auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1));
4928 // Nonzero offset dbg.values get dropped without a replacement.
4929 if (!Offset || !Offset->isNullValue())
4930 return;
4931 VarOp = 2;
4932 ExprOp = 3;
4933 }
4934 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4935 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4936 Variable: unwrapMAVOp(CI, Op: VarOp), Expression: unwrapMAVOp(CI, Op: ExprOp), AssignID: nullptr, Address: nullptr,
4937 AddressExpression: nullptr, DI: getDebugLocSafe(I: CI));
4938 }
4939 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4940 CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator());
4941}
4942
4943static Value *upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder) {
4944 auto *Offset = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4945 if (!Offset)
4946 reportFatalUsageError(reason: "Invalid llvm.vector.splice offset argument");
4947 int64_t OffsetVal = Offset->getSExtValue();
4948 return Builder.CreateIntrinsic(ID: OffsetVal >= 0
4949 ? Intrinsic::vector_splice_left
4950 : Intrinsic::vector_splice_right,
4951 Types: CI->getType(),
4952 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4953 Builder.getInt32(C: std::abs(i: OffsetVal))});
4954}
4955
4956static Value *upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI,
4957 Function *F, IRBuilder<> &Builder) {
4958 if (Name.starts_with(Prefix: "to.fp16")) {
4959 Value *Cast =
4960 Builder.CreateFPTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
4961 return Builder.CreateBitCast(V: Cast, DestTy: CI->getType());
4962 }
4963
4964 if (Name.starts_with(Prefix: "from.fp16")) {
4965 Value *Cast =
4966 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
4967 return Builder.CreateFPExt(V: Cast, DestTy: CI->getType());
4968 }
4969
4970 return nullptr;
4971}
4972
4973/// Upgrade a call to an old intrinsic. All argument and return casting must be
4974/// provided to seamlessly integrate with existing context.
4975void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4976 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4977 // checks the callee's function type matches. It's likely we need to handle
4978 // type changes here.
4979 Function *F = dyn_cast<Function>(Val: CI->getCalledOperand());
4980 if (!F)
4981 return;
4982
4983 LLVMContext &C = CI->getContext();
4984 IRBuilder<> Builder(C);
4985 if (isa<FPMathOperator>(Val: CI))
4986 Builder.setFastMathFlags(CI->getFastMathFlags());
4987 Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator());
4988
4989 if (!NewFn) {
4990 // Get the Function's name.
4991 StringRef Name = F->getName();
4992 if (!Name.consume_front(Prefix: "llvm."))
4993 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4994
4995 bool IsX86 = Name.consume_front(Prefix: "x86.");
4996 bool IsNVVM = Name.consume_front(Prefix: "nvvm.");
4997 bool IsAArch64 = Name.consume_front(Prefix: "aarch64.");
4998 bool IsARM = Name.consume_front(Prefix: "arm.");
4999 bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn.");
5000 bool IsDbg = Name.consume_front(Prefix: "dbg.");
5001 bool IsOldSplice =
5002 (Name.consume_front(Prefix: "experimental.vector.splice") ||
5003 Name.consume_front(Prefix: "vector.splice")) &&
5004 !(Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"));
5005 Value *Rep = nullptr;
5006
5007 if (!IsX86 && Name == "stackprotectorcheck") {
5008 Rep = nullptr;
5009 } else if (IsNVVM) {
5010 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5011 } else if (IsX86) {
5012 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5013 } else if (IsAArch64) {
5014 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5015 } else if (IsARM) {
5016 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5017 } else if (IsAMDGCN) {
5018 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5019 } else if (IsDbg) {
5020 upgradeDbgIntrinsicToDbgRecord(Name, CI);
5021 } else if (IsOldSplice) {
5022 Rep = upgradeVectorSplice(CI, Builder);
5023 } else if (Name.consume_front(Prefix: "convert.")) {
5024 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5025 } else {
5026 llvm_unreachable("Unknown function for CallBase upgrade.");
5027 }
5028
5029 if (Rep)
5030 CI->replaceAllUsesWith(V: Rep);
5031 CI->eraseFromParent();
5032 return;
5033 }
5034
5035 const auto &DefaultCase = [&]() -> void {
5036 if (F == NewFn)
5037 return;
5038
5039 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5040 // Handle generic mangling change.
5041 assert(
5042 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5043 "Unknown function for CallBase upgrade and isn't just a name change");
5044 CI->setCalledFunction(NewFn);
5045 return;
5046 }
5047
5048 // This must be an upgrade from a named to a literal struct.
5049 if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) {
5050 assert(OldST != NewFn->getReturnType() &&
5051 "Return type must have changed");
5052 assert(OldST->getNumElements() ==
5053 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5054 "Must have same number of elements");
5055
5056 SmallVector<Value *> Args(CI->args());
5057 CallInst *NewCI = Builder.CreateCall(Callee: NewFn, Args);
5058 NewCI->setAttributes(CI->getAttributes());
5059 Value *Res = PoisonValue::get(T: OldST);
5060 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5061 Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx);
5062 Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx);
5063 }
5064 CI->replaceAllUsesWith(V: Res);
5065 CI->eraseFromParent();
5066 return;
5067 }
5068
5069 // We're probably about to produce something invalid. Let the verifier catch
5070 // it instead of dying here.
5071 CI->setCalledOperand(
5072 ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType()));
5073 return;
5074 };
5075 CallInst *NewCall = nullptr;
5076 switch (NewFn->getIntrinsicID()) {
5077 default: {
5078 DefaultCase();
5079 return;
5080 }
5081 case Intrinsic::arm_neon_vst1:
5082 case Intrinsic::arm_neon_vst2:
5083 case Intrinsic::arm_neon_vst3:
5084 case Intrinsic::arm_neon_vst4:
5085 case Intrinsic::arm_neon_vst2lane:
5086 case Intrinsic::arm_neon_vst3lane:
5087 case Intrinsic::arm_neon_vst4lane: {
5088 SmallVector<Value *, 4> Args(CI->args());
5089 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5090 break;
5091 }
5092 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5093 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5094 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5095 LLVMContext &Ctx = F->getParent()->getContext();
5096 SmallVector<Value *, 4> Args(CI->args());
5097 Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx),
5098 V: cast<ConstantInt>(Val: Args[3])->getZExtValue());
5099 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5100 break;
5101 }
5102 case Intrinsic::aarch64_sve_ld3_sret:
5103 case Intrinsic::aarch64_sve_ld4_sret:
5104 case Intrinsic::aarch64_sve_ld2_sret: {
5105 StringRef Name = F->getName();
5106 Name = Name.substr(Start: 5);
5107 unsigned N = StringSwitch<unsigned>(Name)
5108 .StartsWith(S: "aarch64.sve.ld2", Value: 2)
5109 .StartsWith(S: "aarch64.sve.ld3", Value: 3)
5110 .StartsWith(S: "aarch64.sve.ld4", Value: 4)
5111 .Default(Value: 0);
5112 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5113 unsigned MinElts = RetTy->getMinNumElements() / N;
5114 SmallVector<Value *, 2> Args(CI->args());
5115 Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args);
5116 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5117 for (unsigned I = 0; I < N; I++) {
5118 Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I);
5119 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx: I * MinElts);
5120 }
5121 NewCall = dyn_cast<CallInst>(Val: Ret);
5122 break;
5123 }
5124
5125 case Intrinsic::coro_end: {
5126 SmallVector<Value *, 3> Args(CI->args());
5127 Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext()));
5128 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5129 break;
5130 }
5131
5132 case Intrinsic::vector_extract: {
5133 StringRef Name = F->getName();
5134 Name = Name.substr(Start: 5); // Strip llvm
5135 if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get")) {
5136 DefaultCase();
5137 return;
5138 }
5139 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5140 unsigned MinElts = RetTy->getMinNumElements();
5141 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5142 Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
5143 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx});
5144 break;
5145 }
5146
5147 case Intrinsic::vector_insert: {
5148 StringRef Name = F->getName();
5149 Name = Name.substr(Start: 5);
5150 if (!Name.starts_with(Prefix: "aarch64.sve.tuple")) {
5151 DefaultCase();
5152 return;
5153 }
5154 if (Name.starts_with(Prefix: "aarch64.sve.tuple.set")) {
5155 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5156 auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType());
5157 Value *NewIdx =
5158 ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements());
5159 NewCall = Builder.CreateCall(
5160 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx});
5161 break;
5162 }
5163 if (Name.starts_with(Prefix: "aarch64.sve.tuple.create")) {
5164 unsigned N = StringSwitch<unsigned>(Name)
5165 .StartsWith(S: "aarch64.sve.tuple.create2", Value: 2)
5166 .StartsWith(S: "aarch64.sve.tuple.create3", Value: 3)
5167 .StartsWith(S: "aarch64.sve.tuple.create4", Value: 4)
5168 .Default(Value: 0);
5169 assert(N > 1 && "Create is expected to be between 2-4");
5170 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5171 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5172 unsigned MinElts = RetTy->getMinNumElements() / N;
5173 for (unsigned I = 0; I < N; I++) {
5174 Value *V = CI->getArgOperand(i: I);
5175 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx: I * MinElts);
5176 }
5177 NewCall = dyn_cast<CallInst>(Val: Ret);
5178 }
5179 break;
5180 }
5181
5182 case Intrinsic::arm_neon_bfdot:
5183 case Intrinsic::arm_neon_bfmmla:
5184 case Intrinsic::arm_neon_bfmlalb:
5185 case Intrinsic::arm_neon_bfmlalt:
5186 case Intrinsic::aarch64_neon_bfdot:
5187 case Intrinsic::aarch64_neon_bfmmla:
5188 case Intrinsic::aarch64_neon_bfmlalb:
5189 case Intrinsic::aarch64_neon_bfmlalt: {
5190 SmallVector<Value *, 3> Args;
5191 assert(CI->arg_size() == 3 &&
5192 "Mismatch between function args and call args");
5193 size_t OperandWidth =
5194 CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits();
5195 assert((OperandWidth == 64 || OperandWidth == 128) &&
5196 "Unexpected operand width");
5197 Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16);
5198 auto Iter = CI->args().begin();
5199 Args.push_back(Elt: *Iter++);
5200 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5201 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5202 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5203 break;
5204 }
5205
5206 case Intrinsic::bitreverse:
5207 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5208 break;
5209
5210 case Intrinsic::ctlz:
5211 case Intrinsic::cttz: {
5212 if (CI->arg_size() != 1) {
5213 DefaultCase();
5214 return;
5215 }
5216
5217 NewCall =
5218 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()});
5219 break;
5220 }
5221
5222 case Intrinsic::objectsize: {
5223 Value *NullIsUnknownSize =
5224 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2);
5225 Value *Dynamic =
5226 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3);
5227 NewCall = Builder.CreateCall(
5228 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic});
5229 break;
5230 }
5231
5232 case Intrinsic::ctpop:
5233 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5234 break;
5235 case Intrinsic::dbg_value: {
5236 StringRef Name = F->getName();
5237 Name = Name.substr(Start: 5); // Strip llvm.
5238 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5239 if (Name.starts_with(Prefix: "dbg.addr")) {
5240 DIExpression *Expr = cast<DIExpression>(
5241 Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata());
5242 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
5243 NewCall =
5244 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5245 MetadataAsValue::get(Context&: C, MD: Expr)});
5246 break;
5247 }
5248
5249 // Upgrade from the old version that had an extra offset argument.
5250 assert(CI->arg_size() == 4);
5251 // Drop nonzero offsets instead of attempting to upgrade them.
5252 if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)))
5253 if (Offset->isNullValue()) {
5254 NewCall = Builder.CreateCall(
5255 Callee: NewFn,
5256 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)});
5257 break;
5258 }
5259 CI->eraseFromParent();
5260 return;
5261 }
5262
5263 case Intrinsic::ptr_annotation:
5264 // Upgrade from versions that lacked the annotation attribute argument.
5265 if (CI->arg_size() != 4) {
5266 DefaultCase();
5267 return;
5268 }
5269
5270 // Create a new call with an added null annotation attribute argument.
5271 NewCall = Builder.CreateCall(
5272 Callee: NewFn,
5273 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5274 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5275 NewCall->takeName(V: CI);
5276 CI->replaceAllUsesWith(V: NewCall);
5277 CI->eraseFromParent();
5278 return;
5279
5280 case Intrinsic::var_annotation:
5281 // Upgrade from versions that lacked the annotation attribute argument.
5282 if (CI->arg_size() != 4) {
5283 DefaultCase();
5284 return;
5285 }
5286 // Create a new call with an added null annotation attribute argument.
5287 NewCall = Builder.CreateCall(
5288 Callee: NewFn,
5289 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5290 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5291 NewCall->takeName(V: CI);
5292 CI->replaceAllUsesWith(V: NewCall);
5293 CI->eraseFromParent();
5294 return;
5295
5296 case Intrinsic::riscv_aes32dsi:
5297 case Intrinsic::riscv_aes32dsmi:
5298 case Intrinsic::riscv_aes32esi:
5299 case Intrinsic::riscv_aes32esmi:
5300 case Intrinsic::riscv_sm4ks:
5301 case Intrinsic::riscv_sm4ed: {
5302 // The last argument to these intrinsics used to be i8 and changed to i32.
5303 // The type overload for sm4ks and sm4ed was removed.
5304 Value *Arg2 = CI->getArgOperand(i: 2);
5305 if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64))
5306 return;
5307
5308 Value *Arg0 = CI->getArgOperand(i: 0);
5309 Value *Arg1 = CI->getArgOperand(i: 1);
5310 if (CI->getType()->isIntegerTy(Bitwidth: 64)) {
5311 Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty());
5312 Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty());
5313 }
5314
5315 Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C),
5316 V: cast<ConstantInt>(Val: Arg2)->getZExtValue());
5317
5318 NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2});
5319 Value *Res = NewCall;
5320 if (Res->getType() != CI->getType())
5321 Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5322 NewCall->takeName(V: CI);
5323 CI->replaceAllUsesWith(V: Res);
5324 CI->eraseFromParent();
5325 return;
5326 }
5327 case Intrinsic::nvvm_mapa_shared_cluster: {
5328 // Create a new call with the correct address space.
5329 NewCall =
5330 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
5331 Value *Res = NewCall;
5332 Res = Builder.CreateAddrSpaceCast(
5333 V: Res, DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED));
5334 NewCall->takeName(V: CI);
5335 CI->replaceAllUsesWith(V: Res);
5336 CI->eraseFromParent();
5337 return;
5338 }
5339 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5340 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5341 // Create a new call with the correct address space.
5342 SmallVector<Value *, 4> Args(CI->args());
5343 Args[0] = Builder.CreateAddrSpaceCast(
5344 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5345
5346 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5347 NewCall->takeName(V: CI);
5348 CI->replaceAllUsesWith(V: NewCall);
5349 CI->eraseFromParent();
5350 return;
5351 }
5352 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5353 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5354 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5355 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5356 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5357 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5358 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5359 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5360 SmallVector<Value *, 16> Args(CI->args());
5361
5362 // Create AddrSpaceCast to shared_cluster if needed.
5363 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5364 unsigned AS = CI->getArgOperand(i: 0)->getType()->getPointerAddressSpace();
5365 if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
5366 Args[0] = Builder.CreateAddrSpaceCast(
5367 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5368
5369 // Attach the flag argument for cta_group, with a
5370 // default value of 0. This handles case (2) in
5371 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5372 size_t NumArgs = CI->arg_size();
5373 Value *FlagArg = CI->getArgOperand(i: NumArgs - 3);
5374 if (!FlagArg->getType()->isIntegerTy(Bitwidth: 1))
5375 Args.push_back(Elt: ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0));
5376
5377 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5378 NewCall->takeName(V: CI);
5379 CI->replaceAllUsesWith(V: NewCall);
5380 CI->eraseFromParent();
5381 return;
5382 }
5383 case Intrinsic::riscv_sha256sig0:
5384 case Intrinsic::riscv_sha256sig1:
5385 case Intrinsic::riscv_sha256sum0:
5386 case Intrinsic::riscv_sha256sum1:
5387 case Intrinsic::riscv_sm3p0:
5388 case Intrinsic::riscv_sm3p1: {
5389 // The last argument to these intrinsics used to be i8 and changed to i32.
5390 // The type overload for sm4ks and sm4ed was removed.
5391 if (!CI->getType()->isIntegerTy(Bitwidth: 64))
5392 return;
5393
5394 Value *Arg =
5395 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty());
5396
5397 NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg);
5398 Value *Res =
5399 Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5400 NewCall->takeName(V: CI);
5401 CI->replaceAllUsesWith(V: Res);
5402 CI->eraseFromParent();
5403 return;
5404 }
5405
5406 case Intrinsic::x86_xop_vfrcz_ss:
5407 case Intrinsic::x86_xop_vfrcz_sd:
5408 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)});
5409 break;
5410
5411 case Intrinsic::x86_xop_vpermil2pd:
5412 case Intrinsic::x86_xop_vpermil2ps:
5413 case Intrinsic::x86_xop_vpermil2pd_256:
5414 case Intrinsic::x86_xop_vpermil2ps_256: {
5415 SmallVector<Value *, 4> Args(CI->args());
5416 VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType());
5417 VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy);
5418 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy);
5419 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5420 break;
5421 }
5422
5423 case Intrinsic::x86_sse41_ptestc:
5424 case Intrinsic::x86_sse41_ptestz:
5425 case Intrinsic::x86_sse41_ptestnzc: {
5426 // The arguments for these intrinsics used to be v4f32, and changed
5427 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5428 // So, the only thing required is a bitcast for both arguments.
5429 // First, check the arguments have the old type.
5430 Value *Arg0 = CI->getArgOperand(i: 0);
5431 if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4))
5432 return;
5433
5434 // Old intrinsic, add bitcasts
5435 Value *Arg1 = CI->getArgOperand(i: 1);
5436
5437 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
5438
5439 Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast");
5440 Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
5441
5442 NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1});
5443 break;
5444 }
5445
5446 case Intrinsic::x86_rdtscp: {
5447 // This used to take 1 arguments. If we have no arguments, it is already
5448 // upgraded.
5449 if (CI->getNumOperands() == 0)
5450 return;
5451
5452 NewCall = Builder.CreateCall(Callee: NewFn);
5453 // Extract the second result and store it.
5454 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
5455 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
5456 // Replace the original call result with the first result of the new call.
5457 Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
5458
5459 NewCall->takeName(V: CI);
5460 CI->replaceAllUsesWith(V: TSC);
5461 CI->eraseFromParent();
5462 return;
5463 }
5464
5465 case Intrinsic::x86_sse41_insertps:
5466 case Intrinsic::x86_sse41_dppd:
5467 case Intrinsic::x86_sse41_dpps:
5468 case Intrinsic::x86_sse41_mpsadbw:
5469 case Intrinsic::x86_avx_dp_ps_256:
5470 case Intrinsic::x86_avx2_mpsadbw: {
5471 // Need to truncate the last argument from i32 to i8 -- this argument models
5472 // an inherently 8-bit immediate operand to these x86 instructions.
5473 SmallVector<Value *, 4> Args(CI->args());
5474
5475 // Replace the last argument with a trunc.
5476 Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc");
5477 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5478 break;
5479 }
5480
5481 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5482 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5483 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5484 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5485 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5486 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5487 SmallVector<Value *, 4> Args(CI->args());
5488 unsigned NumElts =
5489 cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements();
5490 Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts);
5491
5492 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5493 Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr);
5494
5495 NewCall->takeName(V: CI);
5496 CI->replaceAllUsesWith(V: Res);
5497 CI->eraseFromParent();
5498 return;
5499 }
5500
5501 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5502 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5503 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5504 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5505 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5506 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5507 SmallVector<Value *, 4> Args(CI->args());
5508 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
5509 if (NewFn->getIntrinsicID() ==
5510 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5511 Args[1] = Builder.CreateBitCast(
5512 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5513
5514 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5515 Value *Res = Builder.CreateBitCast(
5516 V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts));
5517
5518 NewCall->takeName(V: CI);
5519 CI->replaceAllUsesWith(V: Res);
5520 CI->eraseFromParent();
5521 return;
5522 }
5523 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5524 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5525 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5526 SmallVector<Value *, 4> Args(CI->args());
5527 unsigned NumElts =
5528 cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2;
5529 Args[1] = Builder.CreateBitCast(
5530 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5531 Args[2] = Builder.CreateBitCast(
5532 V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5533
5534 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5535 break;
5536 }
5537
5538 case Intrinsic::thread_pointer: {
5539 NewCall = Builder.CreateCall(Callee: NewFn, Args: {});
5540 break;
5541 }
5542
5543 case Intrinsic::memcpy:
5544 case Intrinsic::memmove:
5545 case Intrinsic::memset: {
5546 // We have to make sure that the call signature is what we're expecting.
5547 // We only want to change the old signatures by removing the alignment arg:
5548 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5549 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5550 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5551 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5552 // Note: i8*'s in the above can be any pointer type
5553 if (CI->arg_size() != 5) {
5554 DefaultCase();
5555 return;
5556 }
5557 // Remove alignment argument (3), and add alignment attributes to the
5558 // dest/src pointers.
5559 Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5560 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
5561 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5562 AttributeList OldAttrs = CI->getAttributes();
5563 AttributeList NewAttrs = AttributeList::get(
5564 C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(),
5565 ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1),
5566 OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)});
5567 NewCall->setAttributes(NewAttrs);
5568 auto *MemCI = cast<MemIntrinsic>(Val: NewCall);
5569 // All mem intrinsics support dest alignment.
5570 const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
5571 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5572 // Memcpy/Memmove also support source alignment.
5573 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI))
5574 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5575 break;
5576 }
5577
5578 case Intrinsic::masked_load:
5579 case Intrinsic::masked_gather:
5580 case Intrinsic::masked_store:
5581 case Intrinsic::masked_scatter: {
5582 if (CI->arg_size() != 4) {
5583 DefaultCase();
5584 return;
5585 }
5586
5587 auto GetMaybeAlign = [](Value *Op) {
5588 if (auto *CI = dyn_cast<ConstantInt>(Val: Op)) {
5589 uint64_t Val = CI->getZExtValue();
5590 if (Val == 0)
5591 return MaybeAlign();
5592 if (isPowerOf2_64(Value: Val))
5593 return MaybeAlign(Val);
5594 }
5595 reportFatalUsageError(reason: "Invalid alignment argument");
5596 };
5597 auto GetAlign = [&](Value *Op) {
5598 MaybeAlign Align = GetMaybeAlign(Op);
5599 if (Align)
5600 return *Align;
5601 reportFatalUsageError(reason: "Invalid zero alignment argument");
5602 };
5603
5604 const DataLayout &DL = CI->getDataLayout();
5605 switch (NewFn->getIntrinsicID()) {
5606 case Intrinsic::masked_load:
5607 NewCall = Builder.CreateMaskedLoad(
5608 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0), Alignment: GetAlign(CI->getArgOperand(i: 1)),
5609 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5610 break;
5611 case Intrinsic::masked_gather:
5612 NewCall = Builder.CreateMaskedGather(
5613 Ty: CI->getType(), Ptrs: CI->getArgOperand(i: 0),
5614 Alignment: DL.getValueOrABITypeAlignment(Alignment: GetMaybeAlign(CI->getArgOperand(i: 1)),
5615 Ty: CI->getType()->getScalarType()),
5616 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5617 break;
5618 case Intrinsic::masked_store:
5619 NewCall = Builder.CreateMaskedStore(
5620 Val: CI->getArgOperand(i: 0), Ptr: CI->getArgOperand(i: 1),
5621 Alignment: GetAlign(CI->getArgOperand(i: 2)), Mask: CI->getArgOperand(i: 3));
5622 break;
5623 case Intrinsic::masked_scatter:
5624 NewCall = Builder.CreateMaskedScatter(
5625 Val: CI->getArgOperand(i: 0), Ptrs: CI->getArgOperand(i: 1),
5626 Alignment: DL.getValueOrABITypeAlignment(
5627 Alignment: GetMaybeAlign(CI->getArgOperand(i: 2)),
5628 Ty: CI->getArgOperand(i: 0)->getType()->getScalarType()),
5629 Mask: CI->getArgOperand(i: 3));
5630 break;
5631 default:
5632 llvm_unreachable("Unexpected intrinsic ID");
5633 }
5634 // Previous metadata is still valid.
5635 NewCall->copyMetadata(SrcInst: *CI);
5636 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
5637 break;
5638 }
5639
5640 case Intrinsic::lifetime_start:
5641 case Intrinsic::lifetime_end: {
5642 if (CI->arg_size() != 2) {
5643 DefaultCase();
5644 return;
5645 }
5646
5647 Value *Ptr = CI->getArgOperand(i: 1);
5648 // Try to strip pointer casts, such that the lifetime works on an alloca.
5649 Ptr = Ptr->stripPointerCasts();
5650 if (isa<AllocaInst>(Val: Ptr)) {
5651 // Don't use NewFn, as we might have looked through an addrspacecast.
5652 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5653 NewCall = Builder.CreateLifetimeStart(Ptr);
5654 else
5655 NewCall = Builder.CreateLifetimeEnd(Ptr);
5656 break;
5657 }
5658
5659 // Otherwise remove the lifetime marker.
5660 CI->eraseFromParent();
5661 return;
5662 }
5663
5664 case Intrinsic::x86_avx512_vpdpbusd_128:
5665 case Intrinsic::x86_avx512_vpdpbusd_256:
5666 case Intrinsic::x86_avx512_vpdpbusd_512:
5667 case Intrinsic::x86_avx512_vpdpbusds_128:
5668 case Intrinsic::x86_avx512_vpdpbusds_256:
5669 case Intrinsic::x86_avx512_vpdpbusds_512:
5670 case Intrinsic::x86_avx2_vpdpbssd_128:
5671 case Intrinsic::x86_avx2_vpdpbssd_256:
5672 case Intrinsic::x86_avx10_vpdpbssd_512:
5673 case Intrinsic::x86_avx2_vpdpbssds_128:
5674 case Intrinsic::x86_avx2_vpdpbssds_256:
5675 case Intrinsic::x86_avx10_vpdpbssds_512:
5676 case Intrinsic::x86_avx2_vpdpbsud_128:
5677 case Intrinsic::x86_avx2_vpdpbsud_256:
5678 case Intrinsic::x86_avx10_vpdpbsud_512:
5679 case Intrinsic::x86_avx2_vpdpbsuds_128:
5680 case Intrinsic::x86_avx2_vpdpbsuds_256:
5681 case Intrinsic::x86_avx10_vpdpbsuds_512:
5682 case Intrinsic::x86_avx2_vpdpbuud_128:
5683 case Intrinsic::x86_avx2_vpdpbuud_256:
5684 case Intrinsic::x86_avx10_vpdpbuud_512:
5685 case Intrinsic::x86_avx2_vpdpbuuds_128:
5686 case Intrinsic::x86_avx2_vpdpbuuds_256:
5687 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5688 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5689 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5690 CI->getArgOperand(i: 2)};
5691 Type *NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: NumElts, Scalable: false);
5692 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5693 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5694
5695 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5696 break;
5697 }
5698 case Intrinsic::x86_avx512_vpdpwssd_128:
5699 case Intrinsic::x86_avx512_vpdpwssd_256:
5700 case Intrinsic::x86_avx512_vpdpwssd_512:
5701 case Intrinsic::x86_avx512_vpdpwssds_128:
5702 case Intrinsic::x86_avx512_vpdpwssds_256:
5703 case Intrinsic::x86_avx512_vpdpwssds_512:
5704 case Intrinsic::x86_avx2_vpdpwsud_128:
5705 case Intrinsic::x86_avx2_vpdpwsud_256:
5706 case Intrinsic::x86_avx10_vpdpwsud_512:
5707 case Intrinsic::x86_avx2_vpdpwsuds_128:
5708 case Intrinsic::x86_avx2_vpdpwsuds_256:
5709 case Intrinsic::x86_avx10_vpdpwsuds_512:
5710 case Intrinsic::x86_avx2_vpdpwusd_128:
5711 case Intrinsic::x86_avx2_vpdpwusd_256:
5712 case Intrinsic::x86_avx10_vpdpwusd_512:
5713 case Intrinsic::x86_avx2_vpdpwusds_128:
5714 case Intrinsic::x86_avx2_vpdpwusds_256:
5715 case Intrinsic::x86_avx10_vpdpwusds_512:
5716 case Intrinsic::x86_avx2_vpdpwuud_128:
5717 case Intrinsic::x86_avx2_vpdpwuud_256:
5718 case Intrinsic::x86_avx10_vpdpwuud_512:
5719 case Intrinsic::x86_avx2_vpdpwuuds_128:
5720 case Intrinsic::x86_avx2_vpdpwuuds_256:
5721 case Intrinsic::x86_avx10_vpdpwuuds_512:
5722 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5723 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5724 CI->getArgOperand(i: 2)};
5725 Type *NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: NumElts, Scalable: false);
5726 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5727 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5728
5729 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5730 break;
5731 }
5732 assert(NewCall && "Should have either set this variable or returned through "
5733 "the default case");
5734 NewCall->takeName(V: CI);
5735 CI->replaceAllUsesWith(V: NewCall);
5736 CI->eraseFromParent();
5737}
5738
5739void llvm::UpgradeCallsToIntrinsic(Function *F) {
5740 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5741
5742 // Check if this function should be upgraded and get the replacement function
5743 // if there is one.
5744 Function *NewFn;
5745 if (UpgradeIntrinsicFunction(F, NewFn)) {
5746 // Replace all users of the old function with the new function or new
5747 // instructions. This is not a range loop because the call is deleted.
5748 for (User *U : make_early_inc_range(Range: F->users()))
5749 if (CallBase *CB = dyn_cast<CallBase>(Val: U))
5750 UpgradeIntrinsicCall(CI: CB, NewFn);
5751
5752 // Remove old function, no longer used, from the module.
5753 if (F != NewFn)
5754 F->eraseFromParent();
5755 }
5756}
5757
5758MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5759 const unsigned NumOperands = MD.getNumOperands();
5760 if (NumOperands == 0)
5761 return &MD; // Invalid, punt to a verifier error.
5762
5763 // Check if the tag uses struct-path aware TBAA format.
5764 if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3)
5765 return &MD;
5766
5767 auto &Context = MD.getContext();
5768 if (NumOperands == 3) {
5769 Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)};
5770 MDNode *ScalarType = MDNode::get(Context, MDs: Elts);
5771 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5772 Metadata *Elts2[] = {ScalarType, ScalarType,
5773 ConstantAsMetadata::get(
5774 C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))),
5775 MD.getOperand(I: 2)};
5776 return MDNode::get(Context, MDs: Elts2);
5777 }
5778 // Create a MDNode <MD, MD, offset 0>
5779 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue(
5780 Ty: Type::getInt64Ty(C&: Context)))};
5781 return MDNode::get(Context, MDs: Elts);
5782}
5783
5784Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5785 Instruction *&Temp) {
5786 if (Opc != Instruction::BitCast)
5787 return nullptr;
5788
5789 Temp = nullptr;
5790 Type *SrcTy = V->getType();
5791 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5792 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5793 LLVMContext &Context = V->getContext();
5794
5795 // We have no information about target data layout, so we assume that
5796 // the maximum pointer size is 64bit.
5797 Type *MidTy = Type::getInt64Ty(C&: Context);
5798 Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy);
5799
5800 return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy);
5801 }
5802
5803 return nullptr;
5804}
5805
5806Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5807 if (Opc != Instruction::BitCast)
5808 return nullptr;
5809
5810 Type *SrcTy = C->getType();
5811 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5812 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5813 LLVMContext &Context = C->getContext();
5814
5815 // We have no information about target data layout, so we assume that
5816 // the maximum pointer size is 64bit.
5817 Type *MidTy = Type::getInt64Ty(C&: Context);
5818
5819 return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy),
5820 Ty: DestTy);
5821 }
5822
5823 return nullptr;
5824}
5825
5826/// Check the debug info version number, if it is out-dated, drop the debug
5827/// info. Return true if module is modified.
5828bool llvm::UpgradeDebugInfo(Module &M) {
5829 if (DisableAutoUpgradeDebugInfo)
5830 return false;
5831
5832 llvm::TimeTraceScope timeScope("Upgrade debug info");
5833 // We need to get metadata before the module is verified (i.e., getModuleFlag
5834 // makes assumptions that we haven't verified yet). Carefully extract the flag
5835 // from the metadata.
5836 unsigned Version = 0;
5837 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5838 auto OpIt = find_if(Range: ModFlags->operands(), P: [](const MDNode *Flag) {
5839 if (Flag->getNumOperands() < 3)
5840 return false;
5841 if (MDString *K = dyn_cast_or_null<MDString>(Val: Flag->getOperand(I: 1)))
5842 return K->getString() == "Debug Info Version";
5843 return false;
5844 });
5845 if (OpIt != ModFlags->op_end()) {
5846 const MDOperand &ValOp = (*OpIt)->getOperand(I: 2);
5847 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD: ValOp))
5848 Version = CI->getZExtValue();
5849 }
5850 }
5851
5852 if (Version == DEBUG_METADATA_VERSION) {
5853 bool BrokenDebugInfo = false;
5854 if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo))
5855 report_fatal_error(reason: "Broken module found, compilation aborted!");
5856 if (!BrokenDebugInfo)
5857 // Everything is ok.
5858 return false;
5859 else {
5860 // Diagnose malformed debug info.
5861 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5862 M.getContext().diagnose(DI: Diag);
5863 }
5864 }
5865 bool Modified = StripDebugInfo(M);
5866 if (Modified && Version != DEBUG_METADATA_VERSION) {
5867 // Diagnose a version mismatch.
5868 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5869 M.getContext().diagnose(DI: DiagVersion);
5870 }
5871 return Modified;
5872}
5873
5874static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5875 GlobalValue *GV, const Metadata *V) {
5876 Function *F = cast<Function>(Val: GV);
5877
5878 constexpr StringLiteral DefaultValue = "1";
5879 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5880 unsigned Length = 0;
5881
5882 if (F->hasFnAttribute(Kind: Attr)) {
5883 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5884 // parse these elements placing them into Vect3
5885 StringRef S = F->getFnAttribute(Kind: Attr).getValueAsString();
5886 for (; Length < 3 && !S.empty(); Length++) {
5887 auto [Part, Rest] = S.split(Separator: ',');
5888 Vect3[Length] = Part.trim();
5889 S = Rest;
5890 }
5891 }
5892
5893 const unsigned Dim = DimC - 'x';
5894 assert(Dim < 3 && "Unexpected dim char");
5895
5896 const uint64_t VInt = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5897
5898 // local variable required for StringRef in Vect3 to point to.
5899 const std::string VStr = llvm::utostr(X: VInt);
5900 Vect3[Dim] = VStr;
5901 Length = std::max(a: Length, b: Dim + 1);
5902
5903 const std::string NewAttr = llvm::join(R: ArrayRef(Vect3, Length), Separator: ",");
5904 F->addFnAttr(Kind: Attr, Val: NewAttr);
5905}
5906
5907static inline bool isXYZ(StringRef S) {
5908 return S == "x" || S == "y" || S == "z";
5909}
5910
5911bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5912 const Metadata *V) {
5913 if (K == "kernel") {
5914 if (!mdconst::extract<ConstantInt>(MD&: V)->isZero())
5915 cast<Function>(Val: GV)->setCallingConv(CallingConv::PTX_Kernel);
5916 return true;
5917 }
5918 if (K == "align") {
5919 // V is a bitfeild specifying two 16-bit values. The alignment value is
5920 // specfied in low 16-bits, The index is specified in the high bits. For the
5921 // index, 0 indicates the return value while higher values correspond to
5922 // each parameter (idx = param + 1).
5923 const uint64_t AlignIdxValuePair =
5924 mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5925 const unsigned Idx = (AlignIdxValuePair >> 16);
5926 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5927 cast<Function>(Val: GV)->addAttributeAtIndex(
5928 i: Idx, Attr: Attribute::getWithStackAlignment(Context&: GV->getContext(), Alignment: StackAlign));
5929 return true;
5930 }
5931 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5932 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5933 cast<Function>(Val: GV)->addFnAttr(Kind: NVVMAttr::MaxClusterRank, Val: llvm::utostr(X: CV));
5934 return true;
5935 }
5936 if (K == "minctasm") {
5937 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5938 cast<Function>(Val: GV)->addFnAttr(Kind: NVVMAttr::MinCTASm, Val: llvm::utostr(X: CV));
5939 return true;
5940 }
5941 if (K == "maxnreg") {
5942 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5943 cast<Function>(Val: GV)->addFnAttr(Kind: NVVMAttr::MaxNReg, Val: llvm::utostr(X: CV));
5944 return true;
5945 }
5946 if (K.consume_front(Prefix: "maxntid") && isXYZ(S: K)) {
5947 upgradeNVVMFnVectorAttr(Attr: NVVMAttr::MaxNTID, DimC: K[0], GV, V);
5948 return true;
5949 }
5950 if (K.consume_front(Prefix: "reqntid") && isXYZ(S: K)) {
5951 upgradeNVVMFnVectorAttr(Attr: NVVMAttr::ReqNTID, DimC: K[0], GV, V);
5952 return true;
5953 }
5954 if (K.consume_front(Prefix: "cluster_dim_") && isXYZ(S: K)) {
5955 upgradeNVVMFnVectorAttr(Attr: NVVMAttr::ClusterDim, DimC: K[0], GV, V);
5956 return true;
5957 }
5958 if (K == "grid_constant") {
5959 const auto Attr = Attribute::get(Context&: GV->getContext(), Kind: NVVMAttr::GridConstant);
5960 for (const auto &Op : cast<MDNode>(Val: V)->operands()) {
5961 // For some reason, the index is 1-based in the metadata. Good thing we're
5962 // able to auto-upgrade it!
5963 const auto Index = mdconst::extract<ConstantInt>(MD: Op)->getZExtValue() - 1;
5964 cast<Function>(Val: GV)->addParamAttr(ArgNo: Index, Attr);
5965 }
5966 return true;
5967 }
5968
5969 return false;
5970}
5971
5972void llvm::UpgradeNVVMAnnotations(Module &M) {
5973 NamedMDNode *NamedMD = M.getNamedMetadata(Name: "nvvm.annotations");
5974 if (!NamedMD)
5975 return;
5976
5977 SmallVector<MDNode *, 8> NewNodes;
5978 SmallPtrSet<const MDNode *, 8> SeenNodes;
5979 for (MDNode *MD : NamedMD->operands()) {
5980 if (!SeenNodes.insert(Ptr: MD).second)
5981 continue;
5982
5983 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD: MD->getOperand(I: 0));
5984 if (!GV)
5985 continue;
5986
5987 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5988
5989 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(I: 0)};
5990 // Each nvvm.annotations metadata entry will be of the following form:
5991 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5992 // start index = 1, to skip the global variable key
5993 // increment = 2, to skip the value for each property-value pairs
5994 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5995 MDString *K = cast<MDString>(Val: MD->getOperand(I: j));
5996 const MDOperand &V = MD->getOperand(I: j + 1);
5997 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K: K->getString(), V);
5998 if (!Upgraded)
5999 NewOperands.append(IL: {K, V});
6000 }
6001
6002 if (NewOperands.size() > 1)
6003 NewNodes.push_back(Elt: MDNode::get(Context&: M.getContext(), MDs: NewOperands));
6004 }
6005
6006 NamedMD->clearOperands();
6007 for (MDNode *N : NewNodes)
6008 NamedMD->addOperand(M: N);
6009}
6010
6011/// This checks for objc retain release marker which should be upgraded. It
6012/// returns true if module is modified.
6013static bool upgradeRetainReleaseMarker(Module &M) {
6014 bool Changed = false;
6015 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6016 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey);
6017 if (ModRetainReleaseMarker) {
6018 MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0);
6019 if (Op) {
6020 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0));
6021 if (ID) {
6022 SmallVector<StringRef, 4> ValueComp;
6023 ID->getString().split(A&: ValueComp, Separator: "#");
6024 if (ValueComp.size() == 2) {
6025 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6026 ID = MDString::get(Context&: M.getContext(), Str: NewValue);
6027 }
6028 M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID);
6029 M.eraseNamedMetadata(NMD: ModRetainReleaseMarker);
6030 Changed = true;
6031 }
6032 }
6033 }
6034 return Changed;
6035}
6036
6037void llvm::UpgradeARCRuntime(Module &M) {
6038 // This lambda converts normal function calls to ARC runtime functions to
6039 // intrinsic calls.
6040 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6041 llvm::Intrinsic::ID IntrinsicFunc) {
6042 Function *Fn = M.getFunction(Name: OldFunc);
6043
6044 if (!Fn)
6045 return;
6046
6047 Function *NewFn =
6048 llvm::Intrinsic::getOrInsertDeclaration(M: &M, id: IntrinsicFunc);
6049
6050 for (User *U : make_early_inc_range(Range: Fn->users())) {
6051 CallInst *CI = dyn_cast<CallInst>(Val: U);
6052 if (!CI || CI->getCalledFunction() != Fn)
6053 continue;
6054
6055 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6056 FunctionType *NewFuncTy = NewFn->getFunctionType();
6057 SmallVector<Value *, 2> Args;
6058
6059 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6060 // value to the return type of the old function.
6061 if (NewFuncTy->getReturnType() != CI->getType() &&
6062 !CastInst::castIsValid(op: Instruction::BitCast, S: CI,
6063 DstTy: NewFuncTy->getReturnType()))
6064 continue;
6065
6066 bool InvalidCast = false;
6067
6068 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6069 Value *Arg = CI->getArgOperand(i: I);
6070
6071 // Bitcast argument to the parameter type of the new function if it's
6072 // not a variadic argument.
6073 if (I < NewFuncTy->getNumParams()) {
6074 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6075 // to the parameter type of the new function.
6076 if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg,
6077 DstTy: NewFuncTy->getParamType(i: I))) {
6078 InvalidCast = true;
6079 break;
6080 }
6081 Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I));
6082 }
6083 Args.push_back(Elt: Arg);
6084 }
6085
6086 if (InvalidCast)
6087 continue;
6088
6089 // Create a call instruction that calls the new function.
6090 CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args);
6091 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
6092 NewCall->takeName(V: CI);
6093
6094 // Bitcast the return value back to the type of the old call.
6095 Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType());
6096
6097 if (!CI->use_empty())
6098 CI->replaceAllUsesWith(V: NewRetVal);
6099 CI->eraseFromParent();
6100 }
6101
6102 if (Fn->use_empty())
6103 Fn->eraseFromParent();
6104 };
6105
6106 // Unconditionally convert a call to "clang.arc.use" to a call to
6107 // "llvm.objc.clang.arc.use".
6108 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6109
6110 // Upgrade the retain release marker. If there is no need to upgrade
6111 // the marker, that means either the module is already new enough to contain
6112 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6113 if (!upgradeRetainReleaseMarker(M))
6114 return;
6115
6116 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6117 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6118 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6119 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6120 {"objc_autoreleaseReturnValue",
6121 llvm::Intrinsic::objc_autoreleaseReturnValue},
6122 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6123 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6124 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6125 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6126 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6127 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6128 {"objc_release", llvm::Intrinsic::objc_release},
6129 {"objc_retain", llvm::Intrinsic::objc_retain},
6130 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6131 {"objc_retainAutoreleaseReturnValue",
6132 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6133 {"objc_retainAutoreleasedReturnValue",
6134 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6135 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6136 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6137 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6138 {"objc_unsafeClaimAutoreleasedReturnValue",
6139 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6140 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6141 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6142 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6143 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6144 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6145 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6146 {"objc_arc_annotation_topdown_bbstart",
6147 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6148 {"objc_arc_annotation_topdown_bbend",
6149 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6150 {"objc_arc_annotation_bottomup_bbstart",
6151 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6152 {"objc_arc_annotation_bottomup_bbend",
6153 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6154
6155 for (auto &I : RuntimeFuncs)
6156 UpgradeToIntrinsic(I.first, I.second);
6157}
6158
6159bool llvm::UpgradeModuleFlags(Module &M) {
6160 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6161 if (!ModFlags)
6162 return false;
6163
6164 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6165 bool HasSwiftVersionFlag = false;
6166 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6167 uint32_t SwiftABIVersion;
6168 auto Int8Ty = Type::getInt8Ty(C&: M.getContext());
6169 auto Int32Ty = Type::getInt32Ty(C&: M.getContext());
6170
6171 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6172 MDNode *Op = ModFlags->getOperand(i: I);
6173 if (Op->getNumOperands() != 3)
6174 continue;
6175 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6176 if (!ID)
6177 continue;
6178 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6179 Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get(
6180 Ty: Type::getInt32Ty(C&: M.getContext()), V: B)),
6181 MDString::get(Context&: M.getContext(), Str: ID->getString()),
6182 Op->getOperand(I: 2)};
6183 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6184 Changed = true;
6185 };
6186
6187 if (ID->getString() == "Objective-C Image Info Version")
6188 HasObjCFlag = true;
6189 if (ID->getString() == "Objective-C Class Properties")
6190 HasClassProperties = true;
6191 // Upgrade PIC from Error/Max to Min.
6192 if (ID->getString() == "PIC Level") {
6193 if (auto *Behavior =
6194 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6195 uint64_t V = Behavior->getLimitedValue();
6196 if (V == Module::Error || V == Module::Max)
6197 SetBehavior(Module::Min);
6198 }
6199 }
6200 // Upgrade "PIE Level" from Error to Max.
6201 if (ID->getString() == "PIE Level")
6202 if (auto *Behavior =
6203 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0)))
6204 if (Behavior->getLimitedValue() == Module::Error)
6205 SetBehavior(Module::Max);
6206
6207 // Upgrade branch protection and return address signing module flags. The
6208 // module flag behavior for these fields were Error and now they are Min.
6209 if (ID->getString() == "branch-target-enforcement" ||
6210 ID->getString().starts_with(Prefix: "sign-return-address")) {
6211 if (auto *Behavior =
6212 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6213 if (Behavior->getLimitedValue() == Module::Error) {
6214 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
6215 Metadata *Ops[3] = {
6216 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)),
6217 Op->getOperand(I: 1), Op->getOperand(I: 2)};
6218 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6219 Changed = true;
6220 }
6221 }
6222 }
6223
6224 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6225 // section name so that llvm-lto will not complain about mismatching
6226 // module flags that is functionally the same.
6227 if (ID->getString() == "Objective-C Image Info Section") {
6228 if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) {
6229 SmallVector<StringRef, 4> ValueComp;
6230 Value->getString().split(A&: ValueComp, Separator: " ");
6231 if (ValueComp.size() != 1) {
6232 std::string NewValue;
6233 for (auto &S : ValueComp)
6234 NewValue += S.str();
6235 Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1),
6236 MDString::get(Context&: M.getContext(), Str: NewValue)};
6237 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6238 Changed = true;
6239 }
6240 }
6241 }
6242
6243 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6244 // If the higher bits are set, it adds new module flag for swift info.
6245 if (ID->getString() == "Objective-C Garbage Collection") {
6246 auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2));
6247 if (Md) {
6248 assert(Md->getValue() && "Expected non-empty metadata");
6249 auto Type = Md->getValue()->getType();
6250 if (Type == Int8Ty)
6251 continue;
6252 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6253 if ((Val & 0xff) != Val) {
6254 HasSwiftVersionFlag = true;
6255 SwiftABIVersion = (Val & 0xff00) >> 8;
6256 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6257 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6258 }
6259 Metadata *Ops[3] = {
6260 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)),
6261 Op->getOperand(I: 1),
6262 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))};
6263 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6264 Changed = true;
6265 }
6266 }
6267
6268 if (ID->getString() == "amdgpu_code_object_version") {
6269 Metadata *Ops[3] = {
6270 Op->getOperand(I: 0),
6271 MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version"),
6272 Op->getOperand(I: 2)};
6273 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6274 Changed = true;
6275 }
6276 }
6277
6278 // "Objective-C Class Properties" is recently added for Objective-C. We
6279 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6280 // flag of value 0, so we can correclty downgrade this flag when trying to
6281 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6282 // this module flag.
6283 if (HasObjCFlag && !HasClassProperties) {
6284 M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties",
6285 Val: (uint32_t)0);
6286 Changed = true;
6287 }
6288
6289 if (HasSwiftVersionFlag) {
6290 M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version",
6291 Val: SwiftABIVersion);
6292 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version",
6293 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion));
6294 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version",
6295 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion));
6296 Changed = true;
6297 }
6298
6299 return Changed;
6300}
6301
6302void llvm::UpgradeSectionAttributes(Module &M) {
6303 auto TrimSpaces = [](StringRef Section) -> std::string {
6304 SmallVector<StringRef, 5> Components;
6305 Section.split(A&: Components, Separator: ',');
6306
6307 SmallString<32> Buffer;
6308 raw_svector_ostream OS(Buffer);
6309
6310 for (auto Component : Components)
6311 OS << ',' << Component.trim();
6312
6313 return std::string(OS.str().substr(Start: 1));
6314 };
6315
6316 for (auto &GV : M.globals()) {
6317 if (!GV.hasSection())
6318 continue;
6319
6320 StringRef Section = GV.getSection();
6321
6322 if (!Section.starts_with(Prefix: "__DATA, __objc_catlist"))
6323 continue;
6324
6325 // __DATA, __objc_catlist, regular, no_dead_strip
6326 // __DATA,__objc_catlist,regular,no_dead_strip
6327 GV.setSection(TrimSpaces(Section));
6328 }
6329}
6330
6331namespace {
6332// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6333// callsites within a function that did not also have the strictfp attribute.
6334// Since 10.0, if strict FP semantics are needed within a function, the
6335// function must have the strictfp attribute and all calls within the function
6336// must also have the strictfp attribute. This latter restriction is
6337// necessary to prevent unwanted libcall simplification when a function is
6338// being cloned (such as for inlining).
6339//
6340// The "dangling" strictfp attribute usage was only used to prevent constant
6341// folding and other libcall simplification. The nobuiltin attribute on the
6342// callsite has the same effect.
6343struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6344 StrictFPUpgradeVisitor() = default;
6345
6346 void visitCallBase(CallBase &Call) {
6347 if (!Call.isStrictFP())
6348 return;
6349 if (isa<ConstrainedFPIntrinsic>(Val: &Call))
6350 return;
6351 // If we get here, the caller doesn't have the strictfp attribute
6352 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6353 Call.removeFnAttr(Kind: Attribute::StrictFP);
6354 Call.addFnAttr(Kind: Attribute::NoBuiltin);
6355 }
6356};
6357
6358/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6359struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6360 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6361 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6362
6363 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6364 if (!RMW.isFloatingPointOperation())
6365 return;
6366
6367 MDNode *Empty = MDNode::get(Context&: RMW.getContext(), MDs: {});
6368 RMW.setMetadata(Kind: "amdgpu.no.fine.grained.host.memory", Node: Empty);
6369 RMW.setMetadata(Kind: "amdgpu.no.remote.memory.access", Node: Empty);
6370 RMW.setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: Empty);
6371 }
6372};
6373} // namespace
6374
6375void llvm::UpgradeFunctionAttributes(Function &F) {
6376 // If a function definition doesn't have the strictfp attribute,
6377 // convert any callsite strictfp attributes to nobuiltin.
6378 if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) {
6379 StrictFPUpgradeVisitor SFPV;
6380 SFPV.visit(F);
6381 }
6382
6383 // Remove all incompatibile attributes from function.
6384 F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(
6385 Ty: F.getReturnType(), AS: F.getAttributes().getRetAttrs()));
6386 for (auto &Arg : F.args())
6387 Arg.removeAttrs(
6388 AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType(), AS: Arg.getAttributes()));
6389
6390 bool AddingAttrs = false, RemovingAttrs = false;
6391 AttrBuilder AttrsToAdd(F.getContext());
6392 AttributeMask AttrsToRemove;
6393
6394 // Older versions of LLVM treated an "implicit-section-name" attribute
6395 // similarly to directly setting the section on a Function.
6396 if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name");
6397 A.isValid() && A.isStringAttribute()) {
6398 F.setSection(A.getValueAsString());
6399 AttrsToRemove.addAttribute(A: "implicit-section-name");
6400 RemovingAttrs = true;
6401 }
6402
6403 if (Attribute A = F.getFnAttribute(Kind: "nooutline");
6404 A.isValid() && A.isStringAttribute()) {
6405 AttrsToRemove.addAttribute(A: "nooutline");
6406 AttrsToAdd.addAttribute(Val: Attribute::NoOutline);
6407 AddingAttrs = RemovingAttrs = true;
6408 }
6409
6410 if (Attribute A = F.getFnAttribute(Kind: "uniform-work-group-size");
6411 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6412 AttrsToRemove.addAttribute(A: "uniform-work-group-size");
6413 RemovingAttrs = true;
6414 if (A.getValueAsString() == "true") {
6415 AttrsToAdd.addAttribute(A: "uniform-work-group-size");
6416 AddingAttrs = true;
6417 }
6418 }
6419
6420 if (!F.empty()) {
6421 // For some reason this is called twice, and the first time is before any
6422 // instructions are loaded into the body.
6423
6424 if (Attribute A = F.getFnAttribute(Kind: "amdgpu-unsafe-fp-atomics");
6425 A.isValid()) {
6426
6427 if (A.getValueAsBool()) {
6428 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6429 Visitor.visit(F);
6430 }
6431
6432 // We will leave behind dead attribute uses on external declarations, but
6433 // clang never added these to declarations anyway.
6434 AttrsToRemove.addAttribute(A: "amdgpu-unsafe-fp-atomics");
6435 RemovingAttrs = true;
6436 }
6437 }
6438
6439 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6440 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6441
6442 bool HandleDenormalMode = false;
6443
6444 if (Attribute Attr = F.getFnAttribute(Kind: "denormal-fp-math"); Attr.isValid()) {
6445 DenormalMode ParsedMode = parseDenormalFPAttribute(Str: Attr.getValueAsString());
6446 if (ParsedMode.isValid()) {
6447 DenormalFPMath = ParsedMode;
6448 AttrsToRemove.addAttribute(A: "denormal-fp-math");
6449 AddingAttrs = RemovingAttrs = true;
6450 HandleDenormalMode = true;
6451 }
6452 }
6453
6454 if (Attribute Attr = F.getFnAttribute(Kind: "denormal-fp-math-f32");
6455 Attr.isValid()) {
6456 DenormalMode ParsedMode = parseDenormalFPAttribute(Str: Attr.getValueAsString());
6457 if (ParsedMode.isValid()) {
6458 DenormalFPMathF32 = ParsedMode;
6459 AttrsToRemove.addAttribute(A: "denormal-fp-math-f32");
6460 AddingAttrs = RemovingAttrs = true;
6461 HandleDenormalMode = true;
6462 }
6463 }
6464
6465 if (HandleDenormalMode)
6466 AttrsToAdd.addDenormalFPEnvAttr(
6467 Mode: DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6468
6469 if (RemovingAttrs)
6470 F.removeFnAttrs(Attrs: AttrsToRemove);
6471
6472 if (AddingAttrs)
6473 F.addFnAttrs(Attrs: AttrsToAdd);
6474}
6475
6476// Check if the function attribute is not present and set it.
6477static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName,
6478 StringRef Value) {
6479 if (!F.hasFnAttribute(Kind: FnAttrName))
6480 F.addFnAttr(Kind: FnAttrName, Val: Value);
6481}
6482
6483// Check if the function attribute is not present and set it if needed.
6484// If the attribute is "false" then removes it.
6485// If the attribute is "true" resets it to a valueless attribute.
6486static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6487 if (!F.hasFnAttribute(Kind: FnAttrName)) {
6488 if (Set)
6489 F.addFnAttr(Kind: FnAttrName);
6490 } else {
6491 auto A = F.getFnAttribute(Kind: FnAttrName);
6492 if ("false" == A.getValueAsString())
6493 F.removeFnAttr(Kind: FnAttrName);
6494 else if ("true" == A.getValueAsString()) {
6495 F.removeFnAttr(Kind: FnAttrName);
6496 F.addFnAttr(Kind: FnAttrName);
6497 }
6498 }
6499}
6500
6501void llvm::copyModuleAttrToFunctions(Module &M) {
6502 Triple T(M.getTargetTriple());
6503 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6504 return;
6505
6506 uint64_t BTEValue = 0;
6507 uint64_t BPPLRValue = 0;
6508 uint64_t GCSValue = 0;
6509 uint64_t SRAValue = 0;
6510 uint64_t SRAALLValue = 0;
6511 uint64_t SRABKeyValue = 0;
6512
6513 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6514 if (ModFlags) {
6515 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6516 MDNode *Op = ModFlags->getOperand(i: I);
6517 if (Op->getNumOperands() != 3)
6518 continue;
6519
6520 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6521 auto *CI = mdconst::dyn_extract<ConstantInt>(MD: Op->getOperand(I: 2));
6522 if (!ID || !CI)
6523 continue;
6524
6525 StringRef IDStr = ID->getString();
6526 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6527 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6528 : IDStr == "guarded-control-stack" ? &GCSValue
6529 : IDStr == "sign-return-address" ? &SRAValue
6530 : IDStr == "sign-return-address-all" ? &SRAALLValue
6531 : IDStr == "sign-return-address-with-bkey"
6532 ? &SRABKeyValue
6533 : nullptr;
6534 if (!ValPtr)
6535 continue;
6536
6537 *ValPtr = CI->getZExtValue();
6538 if (*ValPtr == 2)
6539 return;
6540 }
6541 }
6542
6543 bool BTE = BTEValue == 1;
6544 bool BPPLR = BPPLRValue == 1;
6545 bool GCS = GCSValue == 1;
6546 bool SRA = SRAValue == 1;
6547
6548 StringRef SignTypeValue = "non-leaf";
6549 if (SRA && SRAALLValue == 1)
6550 SignTypeValue = "all";
6551
6552 StringRef SignKeyValue = "a_key";
6553 if (SRA && SRABKeyValue == 1)
6554 SignKeyValue = "b_key";
6555
6556 for (Function &F : M.getFunctionList()) {
6557 if (F.isDeclaration())
6558 continue;
6559
6560 if (SRA) {
6561 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address", Value: SignTypeValue);
6562 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address-key", Value: SignKeyValue);
6563 } else {
6564 if (auto A = F.getFnAttribute(Kind: "sign-return-address");
6565 A.isValid() && "none" == A.getValueAsString()) {
6566 F.removeFnAttr(Kind: "sign-return-address");
6567 F.removeFnAttr(Kind: "sign-return-address-key");
6568 }
6569 }
6570 ConvertFunctionAttr(F, Set: BTE, FnAttrName: "branch-target-enforcement");
6571 ConvertFunctionAttr(F, Set: BPPLR, FnAttrName: "branch-protection-pauth-lr");
6572 ConvertFunctionAttr(F, Set: GCS, FnAttrName: "guarded-control-stack");
6573 }
6574
6575 if (BTE)
6576 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-target-enforcement", Val: 2);
6577 if (BPPLR)
6578 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-protection-pauth-lr", Val: 2);
6579 if (GCS)
6580 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "guarded-control-stack", Val: 2);
6581 if (SRA) {
6582 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address", Val: 2);
6583 if (SRAALLValue == 1)
6584 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-all", Val: 2);
6585 if (SRABKeyValue == 1)
6586 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-with-bkey", Val: 2);
6587 }
6588}
6589
6590static bool isOldLoopArgument(Metadata *MD) {
6591 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6592 if (!T)
6593 return false;
6594 if (T->getNumOperands() < 1)
6595 return false;
6596 auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6597 if (!S)
6598 return false;
6599 return S->getString().starts_with(Prefix: "llvm.vectorizer.");
6600}
6601
6602static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
6603 StringRef OldPrefix = "llvm.vectorizer.";
6604 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6605
6606 if (OldTag == "llvm.vectorizer.unroll")
6607 return MDString::get(Context&: C, Str: "llvm.loop.interleave.count");
6608
6609 return MDString::get(
6610 Context&: C, Str: (Twine("llvm.loop.vectorize.") + OldTag.drop_front(N: OldPrefix.size()))
6611 .str());
6612}
6613
6614static Metadata *upgradeLoopArgument(Metadata *MD) {
6615 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6616 if (!T)
6617 return MD;
6618 if (T->getNumOperands() < 1)
6619 return MD;
6620 auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6621 if (!OldTag)
6622 return MD;
6623 if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer."))
6624 return MD;
6625
6626 // This has an old tag. Upgrade it.
6627 SmallVector<Metadata *, 8> Ops;
6628 Ops.reserve(N: T->getNumOperands());
6629 Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString()));
6630 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6631 Ops.push_back(Elt: T->getOperand(I));
6632
6633 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6634}
6635
6636MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
6637 auto *T = dyn_cast<MDTuple>(Val: &N);
6638 if (!T)
6639 return &N;
6640
6641 if (none_of(Range: T->operands(), P: isOldLoopArgument))
6642 return &N;
6643
6644 SmallVector<Metadata *, 8> Ops;
6645 Ops.reserve(N: T->getNumOperands());
6646 for (Metadata *MD : T->operands())
6647 Ops.push_back(Elt: upgradeLoopArgument(MD));
6648
6649 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6650}
6651
6652std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
6653 Triple T(TT);
6654 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6655 // the address space of globals to 1. This does not apply to SPIRV Logical.
6656 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6657 !DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G")) {
6658 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6659 }
6660
6661 if (T.isLoongArch64() || T.isRISCV64()) {
6662 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6663 auto I = DL.find(Str: "-n64-");
6664 if (I != StringRef::npos)
6665 return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str();
6666 return DL.str();
6667 }
6668
6669 // AMDGPU data layout upgrades.
6670 std::string Res = DL.str();
6671 if (T.isAMDGPU()) {
6672 // Define address spaces for constants.
6673 if (!DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G"))
6674 Res.append(s: Res.empty() ? "G1" : "-G1");
6675
6676 // AMDGCN data layout upgrades.
6677 if (T.isAMDGCN()) {
6678
6679 // Add missing non-integral declarations.
6680 // This goes before adding new address spaces to prevent incoherent string
6681 // values.
6682 if (!DL.contains(Other: "-ni") && !DL.starts_with(Prefix: "ni"))
6683 Res.append(s: "-ni:7:8:9");
6684 // Update ni:7 to ni:7:8:9.
6685 if (DL.ends_with(Suffix: "ni:7"))
6686 Res.append(s: ":8:9");
6687 if (DL.ends_with(Suffix: "ni:7:8"))
6688 Res.append(s: ":9");
6689
6690 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6691 // resources) An empty data layout has already been upgraded to G1 by now.
6692 if (!DL.contains(Other: "-p7") && !DL.starts_with(Prefix: "p7"))
6693 Res.append(s: "-p7:160:256:256:32");
6694 if (!DL.contains(Other: "-p8") && !DL.starts_with(Prefix: "p8"))
6695 Res.append(s: "-p8:128:128:128:48");
6696 constexpr StringRef OldP8("-p8:128:128-");
6697 if (DL.contains(Other: OldP8))
6698 Res.replace(pos: Res.find(svt: OldP8), n1: OldP8.size(), s: "-p8:128:128:128:48-");
6699 if (!DL.contains(Other: "-p9") && !DL.starts_with(Prefix: "p9"))
6700 Res.append(s: "-p9:192:256:256:32");
6701 }
6702
6703 // Upgrade the ELF mangling mode.
6704 if (!DL.contains(Other: "m:e"))
6705 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6706
6707 return Res;
6708 }
6709
6710 if (T.isSystemZ() && !DL.empty()) {
6711 // Make sure the stack alignment is present.
6712 if (!DL.contains(Other: "-S64"))
6713 return "E-S64" + DL.drop_front(N: 1).str();
6714 return DL.str();
6715 }
6716
6717 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6718 // If the datalayout matches the expected format, add pointer size address
6719 // spaces to the datalayout.
6720 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6721 if (!DL.contains(Other: AddrSpaces)) {
6722 SmallVector<StringRef, 4> Groups;
6723 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6724 if (R.match(String: Res, Matches: &Groups))
6725 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6726 }
6727 };
6728
6729 // AArch64 data layout upgrades.
6730 if (T.isAArch64()) {
6731 // Add "-Fn32"
6732 if (!DL.empty() && !DL.contains(Other: "-Fn32"))
6733 Res.append(s: "-Fn32");
6734 AddPtr32Ptr64AddrSpaces();
6735 return Res;
6736 }
6737
6738 if (T.isSPARC() || (T.isMIPS64() && !DL.contains(Other: "m:m")) || T.isPPC64() ||
6739 T.isWasm()) {
6740 // Mips64 with o32 ABI did not add "-i128:128".
6741 // Add "-i128:128"
6742 std::string I64 = "-i64:64";
6743 std::string I128 = "-i128:128";
6744 if (!StringRef(Res).contains(Other: I128)) {
6745 size_t Pos = Res.find(str: I64);
6746 if (Pos != size_t(-1))
6747 Res.insert(pos1: Pos + I64.size(), str: I128);
6748 }
6749 }
6750
6751 if (T.isPPC() && T.isOSAIX() && !DL.contains(Other: "f64:32:64") && !DL.empty()) {
6752 size_t Pos = Res.find(s: "-S128");
6753 if (Pos == StringRef::npos)
6754 Pos = Res.size();
6755 Res.insert(pos: Pos, s: "-f64:32:64");
6756 }
6757
6758 if (!T.isX86())
6759 return Res;
6760
6761 AddPtr32Ptr64AddrSpaces();
6762
6763 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6764 // for i128 operations prior to this being reflected in the data layout, and
6765 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6766 // boundaries, so although this is a breaking change, the upgrade is expected
6767 // to fix more IR than it breaks.
6768 // Intel MCU is an exception and uses 4-byte-alignment.
6769 if (!T.isOSIAMCU()) {
6770 std::string I128 = "-i128:128";
6771 if (StringRef Ref = Res; !Ref.contains(Other: I128)) {
6772 SmallVector<StringRef, 4> Groups;
6773 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6774 if (R.match(String: Res, Matches: &Groups))
6775 Res = (Groups[1] + I128 + Groups[3]).str();
6776 }
6777 }
6778
6779 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6780 // Raising the alignment is safe because Clang did not produce f80 values in
6781 // the MSVC environment before this upgrade was added.
6782 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6783 StringRef Ref = Res;
6784 auto I = Ref.find(Str: "-f80:32-");
6785 if (I != StringRef::npos)
6786 Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str();
6787 }
6788
6789 return Res;
6790}
6791
6792void llvm::UpgradeAttributes(AttrBuilder &B) {
6793 StringRef FramePointer;
6794 Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim");
6795 if (A.isValid()) {
6796 // The value can be "true" or "false".
6797 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6798 B.removeAttribute(A: "no-frame-pointer-elim");
6799 }
6800 if (B.contains(A: "no-frame-pointer-elim-non-leaf")) {
6801 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6802 if (FramePointer != "all")
6803 FramePointer = "non-leaf";
6804 B.removeAttribute(A: "no-frame-pointer-elim-non-leaf");
6805 }
6806 if (!FramePointer.empty())
6807 B.addAttribute(A: "frame-pointer", V: FramePointer);
6808
6809 A = B.getAttribute(Kind: "null-pointer-is-valid");
6810 if (A.isValid()) {
6811 // The value can be "true" or "false".
6812 bool NullPointerIsValid = A.getValueAsString() == "true";
6813 B.removeAttribute(A: "null-pointer-is-valid");
6814 if (NullPointerIsValid)
6815 B.addAttribute(Val: Attribute::NullPointerIsValid);
6816 }
6817
6818 A = B.getAttribute(Kind: "uniform-work-group-size");
6819 if (A.isValid()) {
6820 StringRef Val = A.getValueAsString();
6821 if (!Val.empty()) {
6822 bool IsTrue = Val == "true";
6823 B.removeAttribute(A: "uniform-work-group-size");
6824 if (IsTrue)
6825 B.addAttribute(A: "uniform-work-group-size");
6826 }
6827 }
6828}
6829
6830void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6831 // clang.arc.attachedcall bundles are now required to have an operand.
6832 // If they don't, it's okay to drop them entirely: when there is an operand,
6833 // the "attachedcall" is meaningful and required, but without an operand,
6834 // it's just a marker NOP. Dropping it merely prevents an optimization.
6835 erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) {
6836 return OBD.getTag() == "clang.arc.attachedcall" &&
6837 OBD.inputs().empty();
6838 });
6839}
6840