1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/BinaryFormat/Dwarf.h"
21#include "llvm/IR/AttributeMask.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
26#include "llvm/IR/DebugInfoMetadata.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/IntrinsicInst.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
47#include "llvm/Support/AMDGPUAddrSpace.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/NVPTXAddrSpace.h"
51#include "llvm/Support/Regex.h"
52#include "llvm/Support/TimeProfiler.h"
53#include "llvm/TargetParser/Triple.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Report a fatal error along with the
67// Call Instruction which caused the error
68[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
69 CallBase *CI) {
70 CI->print(O&: llvm::errs());
71 llvm::errs() << "\n";
72 reportFatalUsageError(reason);
73}
74
75// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
76// changed their type from v4f32 to v2i64.
77static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
78 Function *&NewFn) {
79 // Check whether this is an old version of the function, which received
80 // v4f32 arguments.
81 Type *Arg0Type = F->getFunctionType()->getParamType(i: 0);
82 if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4))
83 return false;
84
85 // Yes, it's old, replace it with new version.
86 rename(GV: F);
87 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
88 return true;
89}
90
91// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
92// arguments have changed their type from i32 to i8.
93static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
94 Function *&NewFn) {
95 // Check that the last argument is an i32.
96 Type *LastArgType = F->getFunctionType()->getParamType(
97 i: F->getFunctionType()->getNumParams() - 1);
98 if (!LastArgType->isIntegerTy(Bitwidth: 32))
99 return false;
100
101 // Move this function aside and map down.
102 rename(GV: F);
103 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
104 return true;
105}
106
107// Upgrade the declaration of fp compare intrinsics that change return type
108// from scalar to vXi1 mask.
109static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
110 Function *&NewFn) {
111 // Check if the return type is a vector.
112 if (F->getReturnType()->isVectorTy())
113 return false;
114
115 rename(GV: F);
116 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
117 return true;
118}
119
120// Upgrade the declaration of multiply and add bytes intrinsics whose input
121// arguments' types have changed from vectors of i32 to vectors of i8
122static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID,
123 Function *&NewFn) {
124 // check if input argument type is a vector of i8
125 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
126 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
127 if (Arg1Type->isVectorTy() &&
128 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(Bitwidth: 8) &&
129 Arg2Type->isVectorTy() &&
130 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(Bitwidth: 8))
131 return false;
132
133 rename(GV: F);
134 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
135 return true;
136}
137
138// Upgrade the declaration of multipy and add words intrinsics whose input
139// arguments' types have changed to vectors of i32 to vectors of i16
140static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID,
141 Function *&NewFn) {
142 // check if input argument type is a vector of i16
143 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
144 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
145 if (Arg1Type->isVectorTy() &&
146 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(Bitwidth: 16) &&
147 Arg2Type->isVectorTy() &&
148 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(Bitwidth: 16))
149 return false;
150
151 rename(GV: F);
152 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
153 return true;
154}
155
156static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
157 Function *&NewFn) {
158 if (F->getReturnType()->getScalarType()->isBFloatTy())
159 return false;
160
161 rename(GV: F);
162 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
163 return true;
164}
165
166static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
167 Function *&NewFn) {
168 if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy())
169 return false;
170
171 rename(GV: F);
172 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
173 return true;
174}
175
176static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
177 // All of the intrinsics matches below should be marked with which llvm
178 // version started autoupgrading them. At some point in the future we would
179 // like to use this information to remove upgrade code for some older
180 // intrinsics. It is currently undecided how we will determine that future
181 // point.
182 if (Name.consume_front(Prefix: "avx."))
183 return (Name.starts_with(Prefix: "blend.p") || // Added in 3.7
184 Name == "cvt.ps2.pd.256" || // Added in 3.9
185 Name == "cvtdq2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.ps.256" || // Added in 7.0
187 Name.starts_with(Prefix: "movnt.") || // Added in 3.2
188 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
189 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
190 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 3.5
191 Name.starts_with(Prefix: "vbroadcastf128") || // Added in 4.0
192 Name.starts_with(Prefix: "vextractf128.") || // Added in 3.7
193 Name.starts_with(Prefix: "vinsertf128.") || // Added in 3.7
194 Name.starts_with(Prefix: "vperm2f128.") || // Added in 6.0
195 Name.starts_with(Prefix: "vpermil.")); // Added in 3.1
196
197 if (Name.consume_front(Prefix: "avx2."))
198 return (Name == "movntdqa" || // Added in 5.0
199 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
200 Name.starts_with(Prefix: "padds.") || // Added in 8.0
201 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
202 Name.starts_with(Prefix: "pblendd.") || // Added in 3.7
203 Name == "pblendw" || // Added in 3.7
204 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.8
205 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
206 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
207 Name.starts_with(Prefix: "pmax") || // Added in 3.9
208 Name.starts_with(Prefix: "pmin") || // Added in 3.9
209 Name.starts_with(Prefix: "pmovsx") || // Added in 3.9
210 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
211 Name == "pmul.dq" || // Added in 7.0
212 Name == "pmulu.dq" || // Added in 7.0
213 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
214 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
215 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
216 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
217 Name.starts_with(Prefix: "vbroadcast") || // Added in 3.8
218 Name == "vbroadcasti128" || // Added in 3.7
219 Name == "vextracti128" || // Added in 3.7
220 Name == "vinserti128" || // Added in 3.7
221 Name == "vperm2i128"); // Added in 6.0
222
223 if (Name.consume_front(Prefix: "avx512.")) {
224 if (Name.consume_front(Prefix: "mask."))
225 // 'avx512.mask.*'
226 return (Name.starts_with(Prefix: "add.p") || // Added in 7.0. 128/256 in 4.0
227 Name.starts_with(Prefix: "and.") || // Added in 3.9
228 Name.starts_with(Prefix: "andn.") || // Added in 3.9
229 Name.starts_with(Prefix: "broadcast.s") || // Added in 3.9
230 Name.starts_with(Prefix: "broadcastf32x4.") || // Added in 6.0
231 Name.starts_with(Prefix: "broadcastf32x8.") || // Added in 6.0
232 Name.starts_with(Prefix: "broadcastf64x2.") || // Added in 6.0
233 Name.starts_with(Prefix: "broadcastf64x4.") || // Added in 6.0
234 Name.starts_with(Prefix: "broadcasti32x4.") || // Added in 6.0
235 Name.starts_with(Prefix: "broadcasti32x8.") || // Added in 6.0
236 Name.starts_with(Prefix: "broadcasti64x2.") || // Added in 6.0
237 Name.starts_with(Prefix: "broadcasti64x4.") || // Added in 6.0
238 Name.starts_with(Prefix: "cmp.b") || // Added in 5.0
239 Name.starts_with(Prefix: "cmp.d") || // Added in 5.0
240 Name.starts_with(Prefix: "cmp.q") || // Added in 5.0
241 Name.starts_with(Prefix: "cmp.w") || // Added in 5.0
242 Name.starts_with(Prefix: "compress.b") || // Added in 9.0
243 Name.starts_with(Prefix: "compress.d") || // Added in 9.0
244 Name.starts_with(Prefix: "compress.p") || // Added in 9.0
245 Name.starts_with(Prefix: "compress.q") || // Added in 9.0
246 Name.starts_with(Prefix: "compress.store.") || // Added in 7.0
247 Name.starts_with(Prefix: "compress.w") || // Added in 9.0
248 Name.starts_with(Prefix: "conflict.") || // Added in 9.0
249 Name.starts_with(Prefix: "cvtdq2pd.") || // Added in 4.0
250 Name.starts_with(Prefix: "cvtdq2ps.") || // Added in 7.0 updated 9.0
251 Name == "cvtpd2dq.256" || // Added in 7.0
252 Name == "cvtpd2ps.256" || // Added in 7.0
253 Name == "cvtps2pd.128" || // Added in 7.0
254 Name == "cvtps2pd.256" || // Added in 7.0
255 Name.starts_with(Prefix: "cvtqq2pd.") || // Added in 7.0 updated 9.0
256 Name == "cvtqq2ps.256" || // Added in 9.0
257 Name == "cvtqq2ps.512" || // Added in 9.0
258 Name == "cvttpd2dq.256" || // Added in 7.0
259 Name == "cvttps2dq.128" || // Added in 7.0
260 Name == "cvttps2dq.256" || // Added in 7.0
261 Name.starts_with(Prefix: "cvtudq2pd.") || // Added in 4.0
262 Name.starts_with(Prefix: "cvtudq2ps.") || // Added in 7.0 updated 9.0
263 Name.starts_with(Prefix: "cvtuqq2pd.") || // Added in 7.0 updated 9.0
264 Name == "cvtuqq2ps.256" || // Added in 9.0
265 Name == "cvtuqq2ps.512" || // Added in 9.0
266 Name.starts_with(Prefix: "dbpsadbw.") || // Added in 7.0
267 Name.starts_with(Prefix: "div.p") || // Added in 7.0. 128/256 in 4.0
268 Name.starts_with(Prefix: "expand.b") || // Added in 9.0
269 Name.starts_with(Prefix: "expand.d") || // Added in 9.0
270 Name.starts_with(Prefix: "expand.load.") || // Added in 7.0
271 Name.starts_with(Prefix: "expand.p") || // Added in 9.0
272 Name.starts_with(Prefix: "expand.q") || // Added in 9.0
273 Name.starts_with(Prefix: "expand.w") || // Added in 9.0
274 Name.starts_with(Prefix: "fpclass.p") || // Added in 7.0
275 Name.starts_with(Prefix: "insert") || // Added in 4.0
276 Name.starts_with(Prefix: "load.") || // Added in 3.9
277 Name.starts_with(Prefix: "loadu.") || // Added in 3.9
278 Name.starts_with(Prefix: "lzcnt.") || // Added in 5.0
279 Name.starts_with(Prefix: "max.p") || // Added in 7.0. 128/256 in 5.0
280 Name.starts_with(Prefix: "min.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with(Prefix: "movddup") || // Added in 3.9
282 Name.starts_with(Prefix: "move.s") || // Added in 4.0
283 Name.starts_with(Prefix: "movshdup") || // Added in 3.9
284 Name.starts_with(Prefix: "movsldup") || // Added in 3.9
285 Name.starts_with(Prefix: "mul.p") || // Added in 7.0. 128/256 in 4.0
286 Name.starts_with(Prefix: "or.") || // Added in 3.9
287 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
288 Name.starts_with(Prefix: "packssdw.") || // Added in 5.0
289 Name.starts_with(Prefix: "packsswb.") || // Added in 5.0
290 Name.starts_with(Prefix: "packusdw.") || // Added in 5.0
291 Name.starts_with(Prefix: "packuswb.") || // Added in 5.0
292 Name.starts_with(Prefix: "padd.") || // Added in 4.0
293 Name.starts_with(Prefix: "padds.") || // Added in 8.0
294 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
295 Name.starts_with(Prefix: "palignr.") || // Added in 3.9
296 Name.starts_with(Prefix: "pand.") || // Added in 3.9
297 Name.starts_with(Prefix: "pandn.") || // Added in 3.9
298 Name.starts_with(Prefix: "pavg") || // Added in 6.0
299 Name.starts_with(Prefix: "pbroadcast") || // Added in 6.0
300 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.9
301 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.9
302 Name.starts_with(Prefix: "perm.df.") || // Added in 3.9
303 Name.starts_with(Prefix: "perm.di.") || // Added in 3.9
304 Name.starts_with(Prefix: "permvar.") || // Added in 7.0
305 Name.starts_with(Prefix: "pmaddubs.w.") || // Added in 7.0
306 Name.starts_with(Prefix: "pmaddw.d.") || // Added in 7.0
307 Name.starts_with(Prefix: "pmax") || // Added in 4.0
308 Name.starts_with(Prefix: "pmin") || // Added in 4.0
309 Name == "pmov.qd.256" || // Added in 9.0
310 Name == "pmov.qd.512" || // Added in 9.0
311 Name == "pmov.wb.256" || // Added in 9.0
312 Name == "pmov.wb.512" || // Added in 9.0
313 Name.starts_with(Prefix: "pmovsx") || // Added in 4.0
314 Name.starts_with(Prefix: "pmovzx") || // Added in 4.0
315 Name.starts_with(Prefix: "pmul.dq.") || // Added in 4.0
316 Name.starts_with(Prefix: "pmul.hr.sw.") || // Added in 7.0
317 Name.starts_with(Prefix: "pmulh.w.") || // Added in 7.0
318 Name.starts_with(Prefix: "pmulhu.w.") || // Added in 7.0
319 Name.starts_with(Prefix: "pmull.") || // Added in 4.0
320 Name.starts_with(Prefix: "pmultishift.qb.") || // Added in 8.0
321 Name.starts_with(Prefix: "pmulu.dq.") || // Added in 4.0
322 Name.starts_with(Prefix: "por.") || // Added in 3.9
323 Name.starts_with(Prefix: "prol.") || // Added in 8.0
324 Name.starts_with(Prefix: "prolv.") || // Added in 8.0
325 Name.starts_with(Prefix: "pror.") || // Added in 8.0
326 Name.starts_with(Prefix: "prorv.") || // Added in 8.0
327 Name.starts_with(Prefix: "pshuf.b.") || // Added in 4.0
328 Name.starts_with(Prefix: "pshuf.d.") || // Added in 3.9
329 Name.starts_with(Prefix: "pshufh.w.") || // Added in 3.9
330 Name.starts_with(Prefix: "pshufl.w.") || // Added in 3.9
331 Name.starts_with(Prefix: "psll.d") || // Added in 4.0
332 Name.starts_with(Prefix: "psll.q") || // Added in 4.0
333 Name.starts_with(Prefix: "psll.w") || // Added in 4.0
334 Name.starts_with(Prefix: "pslli") || // Added in 4.0
335 Name.starts_with(Prefix: "psllv") || // Added in 4.0
336 Name.starts_with(Prefix: "psra.d") || // Added in 4.0
337 Name.starts_with(Prefix: "psra.q") || // Added in 4.0
338 Name.starts_with(Prefix: "psra.w") || // Added in 4.0
339 Name.starts_with(Prefix: "psrai") || // Added in 4.0
340 Name.starts_with(Prefix: "psrav") || // Added in 4.0
341 Name.starts_with(Prefix: "psrl.d") || // Added in 4.0
342 Name.starts_with(Prefix: "psrl.q") || // Added in 4.0
343 Name.starts_with(Prefix: "psrl.w") || // Added in 4.0
344 Name.starts_with(Prefix: "psrli") || // Added in 4.0
345 Name.starts_with(Prefix: "psrlv") || // Added in 4.0
346 Name.starts_with(Prefix: "psub.") || // Added in 4.0
347 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
348 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
349 Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
350 Name.starts_with(Prefix: "punpckh") || // Added in 3.9
351 Name.starts_with(Prefix: "punpckl") || // Added in 3.9
352 Name.starts_with(Prefix: "pxor.") || // Added in 3.9
353 Name.starts_with(Prefix: "shuf.f") || // Added in 6.0
354 Name.starts_with(Prefix: "shuf.i") || // Added in 6.0
355 Name.starts_with(Prefix: "shuf.p") || // Added in 4.0
356 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
357 Name.starts_with(Prefix: "store.b.") || // Added in 3.9
358 Name.starts_with(Prefix: "store.d.") || // Added in 3.9
359 Name.starts_with(Prefix: "store.p") || // Added in 3.9
360 Name.starts_with(Prefix: "store.q.") || // Added in 3.9
361 Name.starts_with(Prefix: "store.w.") || // Added in 3.9
362 Name == "store.ss" || // Added in 7.0
363 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
364 Name.starts_with(Prefix: "sub.p") || // Added in 7.0. 128/256 in 4.0
365 Name.starts_with(Prefix: "ucmp.") || // Added in 5.0
366 Name.starts_with(Prefix: "unpckh.") || // Added in 3.9
367 Name.starts_with(Prefix: "unpckl.") || // Added in 3.9
368 Name.starts_with(Prefix: "valign.") || // Added in 4.0
369 Name == "vcvtph2ps.128" || // Added in 11.0
370 Name == "vcvtph2ps.256" || // Added in 11.0
371 Name.starts_with(Prefix: "vextract") || // Added in 4.0
372 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
373 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
374 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
375 Name.starts_with(Prefix: "vfnmsub.") || // Added in 7.0
376 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
377 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
378 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
379 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
380 Name.starts_with(Prefix: "vpermi2var.") || // Added in 7.0
381 Name.starts_with(Prefix: "vpermil.p") || // Added in 3.9
382 Name.starts_with(Prefix: "vpermilvar.") || // Added in 4.0
383 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
384 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
385 Name.starts_with(Prefix: "vpshld.") || // Added in 7.0
386 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
387 Name.starts_with(Prefix: "vpshrd.") || // Added in 7.0
388 Name.starts_with(Prefix: "vpshrdv.") || // Added in 8.0
389 Name.starts_with(Prefix: "vpshufbitqmb.") || // Added in 8.0
390 Name.starts_with(Prefix: "xor.")); // Added in 3.9
391
392 if (Name.consume_front(Prefix: "mask3."))
393 // 'avx512.mask3.*'
394 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
395 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
396 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
397 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
398 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front(Prefix: "maskz."))
401 // 'avx512.maskz.*'
402 return (Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
403 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
404 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
405 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
406 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
407 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
408 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
409 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
410 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
411 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
412 Name.starts_with(Prefix: "vpshrdv.")); // Added in 8.0
413
414 // 'avx512.*'
415 return (Name == "movntdqa" || // Added in 5.0
416 Name == "pmul.dq.512" || // Added in 7.0
417 Name == "pmulu.dq.512" || // Added in 7.0
418 Name.starts_with(Prefix: "broadcastm") || // Added in 6.0
419 Name.starts_with(Prefix: "cmp.p") || // Added in 12.0
420 Name.starts_with(Prefix: "cvtb2mask.") || // Added in 7.0
421 Name.starts_with(Prefix: "cvtd2mask.") || // Added in 7.0
422 Name.starts_with(Prefix: "cvtmask2") || // Added in 5.0
423 Name.starts_with(Prefix: "cvtq2mask.") || // Added in 7.0
424 Name == "cvtusi2sd" || // Added in 7.0
425 Name.starts_with(Prefix: "cvtw2mask.") || // Added in 7.0
426 Name == "kand.w" || // Added in 7.0
427 Name == "kandn.w" || // Added in 7.0
428 Name == "knot.w" || // Added in 7.0
429 Name == "kor.w" || // Added in 7.0
430 Name == "kortestc.w" || // Added in 7.0
431 Name == "kortestz.w" || // Added in 7.0
432 Name.starts_with(Prefix: "kunpck") || // added in 6.0
433 Name == "kxnor.w" || // Added in 7.0
434 Name == "kxor.w" || // Added in 7.0
435 Name.starts_with(Prefix: "padds.") || // Added in 8.0
436 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.9
437 Name.starts_with(Prefix: "prol") || // Added in 8.0
438 Name.starts_with(Prefix: "pror") || // Added in 8.0
439 Name.starts_with(Prefix: "psll.dq") || // Added in 3.9
440 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.9
441 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
442 Name.starts_with(Prefix: "ptestm") || // Added in 6.0
443 Name.starts_with(Prefix: "ptestnm") || // Added in 6.0
444 Name.starts_with(Prefix: "storent.") || // Added in 3.9
445 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 7.0
446 Name.starts_with(Prefix: "vpshld.") || // Added in 8.0
447 Name.starts_with(Prefix: "vpshrd.")); // Added in 8.0
448 }
449
450 if (Name.consume_front(Prefix: "fma."))
451 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
452 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
453 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
454 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
455 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
456
457 if (Name.consume_front(Prefix: "fma4."))
458 return Name.starts_with(Prefix: "vfmadd.s"); // Added in 7.0
459
460 if (Name.consume_front(Prefix: "sse."))
461 return (Name == "add.ss" || // Added in 4.0
462 Name == "cvtsi2ss" || // Added in 7.0
463 Name == "cvtsi642ss" || // Added in 7.0
464 Name == "div.ss" || // Added in 4.0
465 Name == "mul.ss" || // Added in 4.0
466 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
467 Name == "sqrt.ss" || // Added in 7.0
468 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
469 Name == "sub.ss"); // Added in 4.0
470
471 if (Name.consume_front(Prefix: "sse2."))
472 return (Name == "add.sd" || // Added in 4.0
473 Name == "cvtdq2pd" || // Added in 3.9
474 Name == "cvtdq2ps" || // Added in 7.0
475 Name == "cvtps2pd" || // Added in 3.9
476 Name == "cvtsi2sd" || // Added in 7.0
477 Name == "cvtsi642sd" || // Added in 7.0
478 Name == "cvtss2sd" || // Added in 7.0
479 Name == "div.sd" || // Added in 4.0
480 Name == "mul.sd" || // Added in 4.0
481 Name.starts_with(Prefix: "padds.") || // Added in 8.0
482 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
483 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
484 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
485 Name == "pmaxs.w" || // Added in 3.9
486 Name == "pmaxu.b" || // Added in 3.9
487 Name == "pmins.w" || // Added in 3.9
488 Name == "pminu.b" || // Added in 3.9
489 Name == "pmulu.dq" || // Added in 7.0
490 Name.starts_with(Prefix: "pshuf") || // Added in 3.9
491 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
492 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
493 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
494 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
495 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
496 Name == "sqrt.sd" || // Added in 7.0
497 Name == "storel.dq" || // Added in 3.9
498 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
499 Name == "sub.sd"); // Added in 4.0
500
501 if (Name.consume_front(Prefix: "sse41."))
502 return (Name.starts_with(Prefix: "blendp") || // Added in 3.7
503 Name == "movntdqa" || // Added in 5.0
504 Name == "pblendw" || // Added in 3.7
505 Name == "pmaxsb" || // Added in 3.9
506 Name == "pmaxsd" || // Added in 3.9
507 Name == "pmaxud" || // Added in 3.9
508 Name == "pmaxuw" || // Added in 3.9
509 Name == "pminsb" || // Added in 3.9
510 Name == "pminsd" || // Added in 3.9
511 Name == "pminud" || // Added in 3.9
512 Name == "pminuw" || // Added in 3.9
513 Name.starts_with(Prefix: "pmovsx") || // Added in 3.8
514 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
515 Name == "pmuldq"); // Added in 7.0
516
517 if (Name.consume_front(Prefix: "sse42."))
518 return Name == "crc32.64.8"; // Added in 3.4
519
520 if (Name.consume_front(Prefix: "sse4a."))
521 return Name.starts_with(Prefix: "movnt."); // Added in 3.9
522
523 if (Name.consume_front(Prefix: "ssse3."))
524 return (Name == "pabs.b.128" || // Added in 6.0
525 Name == "pabs.d.128" || // Added in 6.0
526 Name == "pabs.w.128"); // Added in 6.0
527
528 if (Name.consume_front(Prefix: "xop."))
529 return (Name == "vpcmov" || // Added in 3.8
530 Name == "vpcmov.256" || // Added in 5.0
531 Name.starts_with(Prefix: "vpcom") || // Added in 3.2, Updated in 9.0
532 Name.starts_with(Prefix: "vprot")); // Added in 8.0
533
534 return (Name == "addcarry.u32" || // Added in 8.0
535 Name == "addcarry.u64" || // Added in 8.0
536 Name == "addcarryx.u32" || // Added in 8.0
537 Name == "addcarryx.u64" || // Added in 8.0
538 Name == "subborrow.u32" || // Added in 8.0
539 Name == "subborrow.u64" || // Added in 8.0
540 Name.starts_with(Prefix: "vcvtph2ps.")); // Added in 11.0
541}
542
543static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
544 Function *&NewFn) {
545 // Only handle intrinsics that start with "x86.".
546 if (!Name.consume_front(Prefix: "x86."))
547 return false;
548
549 if (shouldUpgradeX86Intrinsic(F, Name)) {
550 NewFn = nullptr;
551 return true;
552 }
553
554 if (Name == "rdtscp") { // Added in 8.0
555 // If this intrinsic has 0 operands, it's the new version.
556 if (F->getFunctionType()->getNumParams() == 0)
557 return false;
558
559 rename(GV: F);
560 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
561 id: Intrinsic::x86_rdtscp);
562 return true;
563 }
564
565 Intrinsic::ID ID;
566
567 // SSE4.1 ptest functions may have an old signature.
568 if (Name.consume_front(Prefix: "sse41.ptest")) { // Added in 3.2
569 ID = StringSwitch<Intrinsic::ID>(Name)
570 .Case(S: "c", Value: Intrinsic::x86_sse41_ptestc)
571 .Case(S: "z", Value: Intrinsic::x86_sse41_ptestz)
572 .Case(S: "nzc", Value: Intrinsic::x86_sse41_ptestnzc)
573 .Default(Value: Intrinsic::not_intrinsic);
574 if (ID != Intrinsic::not_intrinsic)
575 return upgradePTESTIntrinsic(F, IID: ID, NewFn);
576
577 return false;
578 }
579
580 // Several blend and other instructions with masks used the wrong number of
581 // bits.
582
583 // Added in 3.6
584 ID = StringSwitch<Intrinsic::ID>(Name)
585 .Case(S: "sse41.insertps", Value: Intrinsic::x86_sse41_insertps)
586 .Case(S: "sse41.dppd", Value: Intrinsic::x86_sse41_dppd)
587 .Case(S: "sse41.dpps", Value: Intrinsic::x86_sse41_dpps)
588 .Case(S: "sse41.mpsadbw", Value: Intrinsic::x86_sse41_mpsadbw)
589 .Case(S: "avx.dp.ps.256", Value: Intrinsic::x86_avx_dp_ps_256)
590 .Case(S: "avx2.mpsadbw", Value: Intrinsic::x86_avx2_mpsadbw)
591 .Default(Value: Intrinsic::not_intrinsic);
592 if (ID != Intrinsic::not_intrinsic)
593 return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn);
594
595 if (Name.consume_front(Prefix: "avx512.")) {
596 if (Name.consume_front(Prefix: "mask.cmp.")) {
597 // Added in 7.0
598 ID = StringSwitch<Intrinsic::ID>(Name)
599 .Case(S: "pd.128", Value: Intrinsic::x86_avx512_mask_cmp_pd_128)
600 .Case(S: "pd.256", Value: Intrinsic::x86_avx512_mask_cmp_pd_256)
601 .Case(S: "pd.512", Value: Intrinsic::x86_avx512_mask_cmp_pd_512)
602 .Case(S: "ps.128", Value: Intrinsic::x86_avx512_mask_cmp_ps_128)
603 .Case(S: "ps.256", Value: Intrinsic::x86_avx512_mask_cmp_ps_256)
604 .Case(S: "ps.512", Value: Intrinsic::x86_avx512_mask_cmp_ps_512)
605 .Default(Value: Intrinsic::not_intrinsic);
606 if (ID != Intrinsic::not_intrinsic)
607 return upgradeX86MaskedFPCompare(F, IID: ID, NewFn);
608 } else if (Name.starts_with(Prefix: "vpdpbusd.") ||
609 Name.starts_with(Prefix: "vpdpbusds.")) {
610 // Added in 21.1
611 ID = StringSwitch<Intrinsic::ID>(Name)
612 .Case(S: "vpdpbusd.128", Value: Intrinsic::x86_avx512_vpdpbusd_128)
613 .Case(S: "vpdpbusd.256", Value: Intrinsic::x86_avx512_vpdpbusd_256)
614 .Case(S: "vpdpbusd.512", Value: Intrinsic::x86_avx512_vpdpbusd_512)
615 .Case(S: "vpdpbusds.128", Value: Intrinsic::x86_avx512_vpdpbusds_128)
616 .Case(S: "vpdpbusds.256", Value: Intrinsic::x86_avx512_vpdpbusds_256)
617 .Case(S: "vpdpbusds.512", Value: Intrinsic::x86_avx512_vpdpbusds_512)
618 .Default(Value: Intrinsic::not_intrinsic);
619 if (ID != Intrinsic::not_intrinsic)
620 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
621 } else if (Name.starts_with(Prefix: "vpdpwssd.") ||
622 Name.starts_with(Prefix: "vpdpwssds.")) {
623 // Added in 21.1
624 ID = StringSwitch<Intrinsic::ID>(Name)
625 .Case(S: "vpdpwssd.128", Value: Intrinsic::x86_avx512_vpdpwssd_128)
626 .Case(S: "vpdpwssd.256", Value: Intrinsic::x86_avx512_vpdpwssd_256)
627 .Case(S: "vpdpwssd.512", Value: Intrinsic::x86_avx512_vpdpwssd_512)
628 .Case(S: "vpdpwssds.128", Value: Intrinsic::x86_avx512_vpdpwssds_128)
629 .Case(S: "vpdpwssds.256", Value: Intrinsic::x86_avx512_vpdpwssds_256)
630 .Case(S: "vpdpwssds.512", Value: Intrinsic::x86_avx512_vpdpwssds_512)
631 .Default(Value: Intrinsic::not_intrinsic);
632 if (ID != Intrinsic::not_intrinsic)
633 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
634 }
635 return false; // No other 'x86.avx512.*'.
636 }
637
638 if (Name.consume_front(Prefix: "avx2.")) {
639 if (Name.consume_front(Prefix: "vpdpb")) {
640 // Added in 21.1
641 ID = StringSwitch<Intrinsic::ID>(Name)
642 .Case(S: "ssd.128", Value: Intrinsic::x86_avx2_vpdpbssd_128)
643 .Case(S: "ssd.256", Value: Intrinsic::x86_avx2_vpdpbssd_256)
644 .Case(S: "ssds.128", Value: Intrinsic::x86_avx2_vpdpbssds_128)
645 .Case(S: "ssds.256", Value: Intrinsic::x86_avx2_vpdpbssds_256)
646 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpbsud_128)
647 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpbsud_256)
648 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpbsuds_128)
649 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpbsuds_256)
650 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpbuud_128)
651 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpbuud_256)
652 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpbuuds_128)
653 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpbuuds_256)
654 .Default(Value: Intrinsic::not_intrinsic);
655 if (ID != Intrinsic::not_intrinsic)
656 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
657 } else if (Name.consume_front(Prefix: "vpdpw")) {
658 // Added in 21.1
659 ID = StringSwitch<Intrinsic::ID>(Name)
660 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpwsud_128)
661 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpwsud_256)
662 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpwsuds_128)
663 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpwsuds_256)
664 .Case(S: "usd.128", Value: Intrinsic::x86_avx2_vpdpwusd_128)
665 .Case(S: "usd.256", Value: Intrinsic::x86_avx2_vpdpwusd_256)
666 .Case(S: "usds.128", Value: Intrinsic::x86_avx2_vpdpwusds_128)
667 .Case(S: "usds.256", Value: Intrinsic::x86_avx2_vpdpwusds_256)
668 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpwuud_128)
669 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpwuud_256)
670 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpwuuds_128)
671 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpwuuds_256)
672 .Default(Value: Intrinsic::not_intrinsic);
673 if (ID != Intrinsic::not_intrinsic)
674 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
675 }
676 return false; // No other 'x86.avx2.*'
677 }
678
679 if (Name.consume_front(Prefix: "avx10.")) {
680 if (Name.consume_front(Prefix: "vpdpb")) {
681 // Added in 21.1
682 ID = StringSwitch<Intrinsic::ID>(Name)
683 .Case(S: "ssd.512", Value: Intrinsic::x86_avx10_vpdpbssd_512)
684 .Case(S: "ssds.512", Value: Intrinsic::x86_avx10_vpdpbssds_512)
685 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpbsud_512)
686 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpbsuds_512)
687 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpbuud_512)
688 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpbuuds_512)
689 .Default(Value: Intrinsic::not_intrinsic);
690 if (ID != Intrinsic::not_intrinsic)
691 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
692 } else if (Name.consume_front(Prefix: "vpdpw")) {
693 ID = StringSwitch<Intrinsic::ID>(Name)
694 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpwsud_512)
695 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpwsuds_512)
696 .Case(S: "usd.512", Value: Intrinsic::x86_avx10_vpdpwusd_512)
697 .Case(S: "usds.512", Value: Intrinsic::x86_avx10_vpdpwusds_512)
698 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpwuud_512)
699 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpwuuds_512)
700 .Default(Value: Intrinsic::not_intrinsic);
701 if (ID != Intrinsic::not_intrinsic)
702 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
703 }
704 return false; // No other 'x86.avx10.*'
705 }
706
707 if (Name.consume_front(Prefix: "avx512bf16.")) {
708 // Added in 9.0
709 ID = StringSwitch<Intrinsic::ID>(Name)
710 .Case(S: "cvtne2ps2bf16.128",
711 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
712 .Case(S: "cvtne2ps2bf16.256",
713 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
714 .Case(S: "cvtne2ps2bf16.512",
715 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
716 .Case(S: "mask.cvtneps2bf16.128",
717 Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
718 .Case(S: "cvtneps2bf16.256",
719 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
720 .Case(S: "cvtneps2bf16.512",
721 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
722 .Default(Value: Intrinsic::not_intrinsic);
723 if (ID != Intrinsic::not_intrinsic)
724 return upgradeX86BF16Intrinsic(F, IID: ID, NewFn);
725
726 // Added in 9.0
727 ID = StringSwitch<Intrinsic::ID>(Name)
728 .Case(S: "dpbf16ps.128", Value: Intrinsic::x86_avx512bf16_dpbf16ps_128)
729 .Case(S: "dpbf16ps.256", Value: Intrinsic::x86_avx512bf16_dpbf16ps_256)
730 .Case(S: "dpbf16ps.512", Value: Intrinsic::x86_avx512bf16_dpbf16ps_512)
731 .Default(Value: Intrinsic::not_intrinsic);
732 if (ID != Intrinsic::not_intrinsic)
733 return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn);
734 return false; // No other 'x86.avx512bf16.*'.
735 }
736
737 if (Name.consume_front(Prefix: "xop.")) {
738 Intrinsic::ID ID = Intrinsic::not_intrinsic;
739 if (Name.starts_with(Prefix: "vpermil2")) { // Added in 3.9
740 // Upgrade any XOP PERMIL2 index operand still using a float/double
741 // vector.
742 auto Idx = F->getFunctionType()->getParamType(i: 2);
743 if (Idx->isFPOrFPVectorTy()) {
744 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
745 unsigned EltSize = Idx->getScalarSizeInBits();
746 if (EltSize == 64 && IdxSize == 128)
747 ID = Intrinsic::x86_xop_vpermil2pd;
748 else if (EltSize == 32 && IdxSize == 128)
749 ID = Intrinsic::x86_xop_vpermil2ps;
750 else if (EltSize == 64 && IdxSize == 256)
751 ID = Intrinsic::x86_xop_vpermil2pd_256;
752 else
753 ID = Intrinsic::x86_xop_vpermil2ps_256;
754 }
755 } else if (F->arg_size() == 2)
756 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
757 ID = StringSwitch<Intrinsic::ID>(Name)
758 .Case(S: "vfrcz.ss", Value: Intrinsic::x86_xop_vfrcz_ss)
759 .Case(S: "vfrcz.sd", Value: Intrinsic::x86_xop_vfrcz_sd)
760 .Default(Value: Intrinsic::not_intrinsic);
761
762 if (ID != Intrinsic::not_intrinsic) {
763 rename(GV: F);
764 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
765 return true;
766 }
767 return false; // No other 'x86.xop.*'
768 }
769
770 if (Name == "seh.recoverfp") {
771 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
772 id: Intrinsic::eh_recoverfp);
773 return true;
774 }
775
776 return false;
777}
778
779// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
780// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
781static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
782 StringRef Name,
783 Function *&NewFn) {
784 if (Name.starts_with(Prefix: "rbit")) {
785 // '(arm|aarch64).rbit'.
786 NewFn = Intrinsic::getOrInsertDeclaration(
787 M: F->getParent(), id: Intrinsic::bitreverse, Tys: F->arg_begin()->getType());
788 return true;
789 }
790
791 if (Name == "thread.pointer") {
792 // '(arm|aarch64).thread.pointer'.
793 NewFn = Intrinsic::getOrInsertDeclaration(
794 M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType());
795 return true;
796 }
797
798 bool Neon = Name.consume_front(Prefix: "neon.");
799 if (Neon) {
800 // '(arm|aarch64).neon.*'.
801 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
802 // v16i8 respectively.
803 if (Name.consume_front(Prefix: "bfdot.")) {
804 // (arm|aarch64).neon.bfdot.*'.
805 Intrinsic::ID ID =
806 StringSwitch<Intrinsic::ID>(Name)
807 .Cases(CaseStrings: {"v2f32.v8i8", "v4f32.v16i8"},
808 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
809 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
810 .Default(Value: Intrinsic::not_intrinsic);
811 if (ID != Intrinsic::not_intrinsic) {
812 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
813 assert((OperandWidth == 64 || OperandWidth == 128) &&
814 "Unexpected operand width");
815 LLVMContext &Ctx = F->getParent()->getContext();
816 std::array<Type *, 2> Tys{
817 ._M_elems: {F->getReturnType(),
818 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}};
819 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
820 return true;
821 }
822 return false; // No other '(arm|aarch64).neon.bfdot.*'.
823 }
824
825 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
826 // anymore and accept v8bf16 instead of v16i8.
827 if (Name.consume_front(Prefix: "bfm")) {
828 // (arm|aarch64).neon.bfm*'.
829 if (Name.consume_back(Suffix: ".v4f32.v16i8")) {
830 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
831 Intrinsic::ID ID =
832 StringSwitch<Intrinsic::ID>(Name)
833 .Case(S: "mla",
834 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
835 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
836 .Case(S: "lalb",
837 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
838 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
839 .Case(S: "lalt",
840 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
841 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
842 .Default(Value: Intrinsic::not_intrinsic);
843 if (ID != Intrinsic::not_intrinsic) {
844 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
845 return true;
846 }
847 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
848 }
849 return false; // No other '(arm|aarch64).neon.bfm*.
850 }
851 // Continue on to Aarch64 Neon or Arm Neon.
852 }
853 // Continue on to Arm or Aarch64.
854
855 if (IsArm) {
856 // 'arm.*'.
857 if (Neon) {
858 // 'arm.neon.*'.
859 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
860 .StartsWith(S: "vclz.", Value: Intrinsic::ctlz)
861 .StartsWith(S: "vcnt.", Value: Intrinsic::ctpop)
862 .StartsWith(S: "vqadds.", Value: Intrinsic::sadd_sat)
863 .StartsWith(S: "vqaddu.", Value: Intrinsic::uadd_sat)
864 .StartsWith(S: "vqsubs.", Value: Intrinsic::ssub_sat)
865 .StartsWith(S: "vqsubu.", Value: Intrinsic::usub_sat)
866 .StartsWith(S: "vrinta.", Value: Intrinsic::round)
867 .StartsWith(S: "vrintn.", Value: Intrinsic::roundeven)
868 .StartsWith(S: "vrintm.", Value: Intrinsic::floor)
869 .StartsWith(S: "vrintp.", Value: Intrinsic::ceil)
870 .StartsWith(S: "vrintx.", Value: Intrinsic::rint)
871 .StartsWith(S: "vrintz.", Value: Intrinsic::trunc)
872 .Default(Value: Intrinsic::not_intrinsic);
873 if (ID != Intrinsic::not_intrinsic) {
874 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
875 Tys: F->arg_begin()->getType());
876 return true;
877 }
878
879 if (Name.consume_front(Prefix: "vst")) {
880 // 'arm.neon.vst*'.
881 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
882 SmallVector<StringRef, 2> Groups;
883 if (vstRegex.match(String: Name, Matches: &Groups)) {
884 static const Intrinsic::ID StoreInts[] = {
885 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
886 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
887
888 static const Intrinsic::ID StoreLaneInts[] = {
889 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
890 Intrinsic::arm_neon_vst4lane};
891
892 auto fArgs = F->getFunctionType()->params();
893 Type *Tys[] = {fArgs[0], fArgs[1]};
894 if (Groups[1].size() == 1)
895 NewFn = Intrinsic::getOrInsertDeclaration(
896 M: F->getParent(), id: StoreInts[fArgs.size() - 3], Tys);
897 else
898 NewFn = Intrinsic::getOrInsertDeclaration(
899 M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys);
900 return true;
901 }
902 return false; // No other 'arm.neon.vst*'.
903 }
904
905 return false; // No other 'arm.neon.*'.
906 }
907
908 if (Name.consume_front(Prefix: "mve.")) {
909 // 'arm.mve.*'.
910 if (Name == "vctp64") {
911 if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) {
912 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
913 // the function and deal with it below in UpgradeIntrinsicCall.
914 rename(GV: F);
915 return true;
916 }
917 return false; // Not 'arm.mve.vctp64'.
918 }
919
920 if (Name.starts_with(Prefix: "vrintn.v")) {
921 NewFn = Intrinsic::getOrInsertDeclaration(
922 M: F->getParent(), id: Intrinsic::roundeven, Tys: F->arg_begin()->getType());
923 return true;
924 }
925
926 // These too are changed to accept a v2i1 instead of the old v4i1.
927 if (Name.consume_back(Suffix: ".v4i1")) {
928 // 'arm.mve.*.v4i1'.
929 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32"))
930 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
931 return Name == "mull.int" || Name == "vqdmull";
932
933 if (Name.consume_back(Suffix: ".v2i64")) {
934 // 'arm.mve.*.v2i64.v4i1'
935 bool IsGather = Name.consume_front(Prefix: "vldr.gather.");
936 if (IsGather || Name.consume_front(Prefix: "vstr.scatter.")) {
937 if (Name.consume_front(Prefix: "base.")) {
938 // Optional 'wb.' prefix.
939 Name.consume_front(Prefix: "wb.");
940 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
941 // predicated.v2i64.v2i64.v4i1'.
942 return Name == "predicated.v2i64";
943 }
944
945 if (Name.consume_front(Prefix: "offset.predicated."))
946 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
947 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
948
949 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
950 return false;
951 }
952
953 return false; // No other 'arm.mve.*.v2i64.v4i1'.
954 }
955 return false; // No other 'arm.mve.*.v4i1'.
956 }
957 return false; // No other 'arm.mve.*'.
958 }
959
960 if (Name.consume_front(Prefix: "cde.vcx")) {
961 // 'arm.cde.vcx*'.
962 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1"))
963 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
964 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
965 Name == "3q" || Name == "3qa";
966
967 return false; // No other 'arm.cde.vcx*'.
968 }
969 } else {
970 // 'aarch64.*'.
971 if (Neon) {
972 // 'aarch64.neon.*'.
973 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
974 .StartsWith(S: "frintn", Value: Intrinsic::roundeven)
975 .StartsWith(S: "rbit", Value: Intrinsic::bitreverse)
976 .Default(Value: Intrinsic::not_intrinsic);
977 if (ID != Intrinsic::not_intrinsic) {
978 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
979 Tys: F->arg_begin()->getType());
980 return true;
981 }
982
983 if (Name.starts_with(Prefix: "addp")) {
984 // 'aarch64.neon.addp*'.
985 if (F->arg_size() != 2)
986 return false; // Invalid IR.
987 VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType());
988 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
989 NewFn = Intrinsic::getOrInsertDeclaration(
990 M: F->getParent(), id: Intrinsic::aarch64_neon_faddp, Tys: Ty);
991 return true;
992 }
993 }
994
995 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
996 if (Name.starts_with(Prefix: "bfcvt")) {
997 NewFn = nullptr;
998 return true;
999 }
1000
1001 return false; // No other 'aarch64.neon.*'.
1002 }
1003 if (Name.consume_front(Prefix: "sve.")) {
1004 // 'aarch64.sve.*'.
1005 if (Name.consume_front(Prefix: "bf")) {
1006 if (Name.consume_back(Suffix: ".lane")) {
1007 // 'aarch64.sve.bf*.lane'.
1008 Intrinsic::ID ID =
1009 StringSwitch<Intrinsic::ID>(Name)
1010 .Case(S: "dot", Value: Intrinsic::aarch64_sve_bfdot_lane_v2)
1011 .Case(S: "mlalb", Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1012 .Case(S: "mlalt", Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1013 .Default(Value: Intrinsic::not_intrinsic);
1014 if (ID != Intrinsic::not_intrinsic) {
1015 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1016 return true;
1017 }
1018 return false; // No other 'aarch64.sve.bf*.lane'.
1019 }
1020 return false; // No other 'aarch64.sve.bf*'.
1021 }
1022
1023 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1024 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1025 NewFn = nullptr;
1026 return true;
1027 }
1028
1029 if (Name.consume_front(Prefix: "addqv")) {
1030 // 'aarch64.sve.addqv'.
1031 if (!F->getReturnType()->isFPOrFPVectorTy())
1032 return false;
1033
1034 auto Args = F->getFunctionType()->params();
1035 Type *Tys[] = {F->getReturnType(), Args[1]};
1036 NewFn = Intrinsic::getOrInsertDeclaration(
1037 M: F->getParent(), id: Intrinsic::aarch64_sve_faddqv, Tys);
1038 return true;
1039 }
1040
1041 if (Name.consume_front(Prefix: "ld")) {
1042 // 'aarch64.sve.ld*'.
1043 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1044 if (LdRegex.match(String: Name)) {
1045 Type *ScalarTy =
1046 cast<VectorType>(Val: F->getReturnType())->getElementType();
1047 ElementCount EC =
1048 cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount();
1049 Type *Ty = VectorType::get(ElementType: ScalarTy, EC);
1050 static const Intrinsic::ID LoadIDs[] = {
1051 Intrinsic::aarch64_sve_ld2_sret,
1052 Intrinsic::aarch64_sve_ld3_sret,
1053 Intrinsic::aarch64_sve_ld4_sret,
1054 };
1055 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1056 id: LoadIDs[Name[0] - '2'], Tys: Ty);
1057 return true;
1058 }
1059 return false; // No other 'aarch64.sve.ld*'.
1060 }
1061
1062 if (Name.consume_front(Prefix: "tuple.")) {
1063 // 'aarch64.sve.tuple.*'.
1064 if (Name.starts_with(Prefix: "get")) {
1065 // 'aarch64.sve.tuple.get*'.
1066 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1067 NewFn = Intrinsic::getOrInsertDeclaration(
1068 M: F->getParent(), id: Intrinsic::vector_extract, Tys);
1069 return true;
1070 }
1071
1072 if (Name.starts_with(Prefix: "set")) {
1073 // 'aarch64.sve.tuple.set*'.
1074 auto Args = F->getFunctionType()->params();
1075 Type *Tys[] = {Args[0], Args[2], Args[1]};
1076 NewFn = Intrinsic::getOrInsertDeclaration(
1077 M: F->getParent(), id: Intrinsic::vector_insert, Tys);
1078 return true;
1079 }
1080
1081 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1082 if (CreateTupleRegex.match(String: Name)) {
1083 // 'aarch64.sve.tuple.create*'.
1084 auto Args = F->getFunctionType()->params();
1085 Type *Tys[] = {F->getReturnType(), Args[1]};
1086 NewFn = Intrinsic::getOrInsertDeclaration(
1087 M: F->getParent(), id: Intrinsic::vector_insert, Tys);
1088 return true;
1089 }
1090 return false; // No other 'aarch64.sve.tuple.*'.
1091 }
1092
1093 if (Name.starts_with(Prefix: "rev.nxv")) {
1094 // 'aarch64.sve.rev.<Ty>'
1095 NewFn = Intrinsic::getOrInsertDeclaration(
1096 M: F->getParent(), id: Intrinsic::vector_reverse, Tys: F->getReturnType());
1097 return true;
1098 }
1099
1100 return false; // No other 'aarch64.sve.*'.
1101 }
1102 }
1103 return false; // No other 'arm.*', 'aarch64.*'.
1104}
1105
1106static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
1107 StringRef Name) {
1108 if (Name.consume_front(Prefix: "cp.async.bulk.tensor.g2s.")) {
1109 Intrinsic::ID ID =
1110 StringSwitch<Intrinsic::ID>(Name)
1111 .Case(S: "im2col.3d",
1112 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1113 .Case(S: "im2col.4d",
1114 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1115 .Case(S: "im2col.5d",
1116 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1117 .Case(S: "tile.1d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1118 .Case(S: "tile.2d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1119 .Case(S: "tile.3d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1120 .Case(S: "tile.4d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1121 .Case(S: "tile.5d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1122 .Default(Value: Intrinsic::not_intrinsic);
1123
1124 if (ID == Intrinsic::not_intrinsic)
1125 return ID;
1126
1127 // These intrinsics may need upgrade for two reasons:
1128 // (1) When the address-space of the first argument is shared[AS=3]
1129 // (and we upgrade it to use shared_cluster address-space[AS=7])
1130 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1131 NVPTXAS::ADDRESS_SPACE_SHARED)
1132 return ID;
1133
1134 // (2) When there are only two boolean flag arguments at the end:
1135 //
1136 // The last three parameters of the older version of these
1137 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1138 //
1139 // The newer version reads as:
1140 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1141 //
1142 // So, when the type of the [N-3]rd argument is "not i1", then
1143 // it is the older version and we need to upgrade.
1144 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1145 Type *ArgType = F->getFunctionType()->getParamType(i: FlagStartIndex);
1146 if (!ArgType->isIntegerTy(Bitwidth: 1))
1147 return ID;
1148 }
1149
1150 return Intrinsic::not_intrinsic;
1151}
1152
1153static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1154 StringRef Name) {
1155 if (Name.consume_front(Prefix: "mapa.shared.cluster"))
1156 if (F->getReturnType()->getPointerAddressSpace() ==
1157 NVPTXAS::ADDRESS_SPACE_SHARED)
1158 return Intrinsic::nvvm_mapa_shared_cluster;
1159
1160 if (Name.consume_front(Prefix: "cp.async.bulk.")) {
1161 Intrinsic::ID ID =
1162 StringSwitch<Intrinsic::ID>(Name)
1163 .Case(S: "global.to.shared.cluster",
1164 Value: Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1165 .Case(S: "shared.cta.to.cluster",
1166 Value: Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1167 .Default(Value: Intrinsic::not_intrinsic);
1168
1169 if (ID != Intrinsic::not_intrinsic)
1170 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1171 NVPTXAS::ADDRESS_SPACE_SHARED)
1172 return ID;
1173 }
1174
1175 return Intrinsic::not_intrinsic;
1176}
1177
1178static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1179 if (Name.consume_front(Prefix: "fma.rn."))
1180 return StringSwitch<Intrinsic::ID>(Name)
1181 .Case(S: "bf16", Value: Intrinsic::nvvm_fma_rn_bf16)
1182 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fma_rn_bf16x2)
1183 .Case(S: "relu.bf16", Value: Intrinsic::nvvm_fma_rn_relu_bf16)
1184 .Case(S: "relu.bf16x2", Value: Intrinsic::nvvm_fma_rn_relu_bf16x2)
1185 .Default(Value: Intrinsic::not_intrinsic);
1186
1187 if (Name.consume_front(Prefix: "fmax."))
1188 return StringSwitch<Intrinsic::ID>(Name)
1189 .Case(S: "bf16", Value: Intrinsic::nvvm_fmax_bf16)
1190 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmax_bf16x2)
1191 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmax_ftz_bf16)
1192 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_bf16x2)
1193 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16)
1194 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1195 .Case(S: "ftz.nan.xorsign.abs.bf16",
1196 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1197 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1198 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1199 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1200 .Case(S: "ftz.xorsign.abs.bf16x2",
1201 Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1202 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmax_nan_bf16)
1203 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmax_nan_bf16x2)
1204 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1205 .Case(S: "nan.xorsign.abs.bf16x2",
1206 Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1207 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1208 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1209 .Default(Value: Intrinsic::not_intrinsic);
1210
1211 if (Name.consume_front(Prefix: "fmin."))
1212 return StringSwitch<Intrinsic::ID>(Name)
1213 .Case(S: "bf16", Value: Intrinsic::nvvm_fmin_bf16)
1214 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmin_bf16x2)
1215 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmin_ftz_bf16)
1216 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_bf16x2)
1217 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16)
1218 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1219 .Case(S: "ftz.nan.xorsign.abs.bf16",
1220 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1221 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1222 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1223 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1224 .Case(S: "ftz.xorsign.abs.bf16x2",
1225 Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1226 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmin_nan_bf16)
1227 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmin_nan_bf16x2)
1228 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1229 .Case(S: "nan.xorsign.abs.bf16x2",
1230 Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1231 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1232 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1233 .Default(Value: Intrinsic::not_intrinsic);
1234
1235 if (Name.consume_front(Prefix: "neg."))
1236 return StringSwitch<Intrinsic::ID>(Name)
1237 .Case(S: "bf16", Value: Intrinsic::nvvm_neg_bf16)
1238 .Case(S: "bf16x2", Value: Intrinsic::nvvm_neg_bf16x2)
1239 .Default(Value: Intrinsic::not_intrinsic);
1240
1241 return Intrinsic::not_intrinsic;
1242}
1243
1244static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1245 return Name.consume_front(Prefix: "local") || Name.consume_front(Prefix: "shared") ||
1246 Name.consume_front(Prefix: "global") || Name.consume_front(Prefix: "constant") ||
1247 Name.consume_front(Prefix: "param");
1248}
1249
1250static bool convertIntrinsicValidType(StringRef Name,
1251 const FunctionType *FuncTy) {
1252 Type *HalfTy = Type::getHalfTy(C&: FuncTy->getContext());
1253 if (Name.starts_with(Prefix: "to.fp16")) {
1254 return CastInst::castIsValid(op: Instruction::FPTrunc, SrcTy: FuncTy->getParamType(i: 0),
1255 DstTy: HalfTy) &&
1256 CastInst::castIsValid(op: Instruction::BitCast, SrcTy: HalfTy,
1257 DstTy: FuncTy->getReturnType());
1258 }
1259
1260 if (Name.starts_with(Prefix: "from.fp16")) {
1261 return CastInst::castIsValid(op: Instruction::BitCast, SrcTy: FuncTy->getParamType(i: 0),
1262 DstTy: HalfTy) &&
1263 CastInst::castIsValid(op: Instruction::FPExt, SrcTy: HalfTy,
1264 DstTy: FuncTy->getReturnType());
1265 }
1266
1267 return false;
1268}
1269
1270static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1271 bool CanUpgradeDebugIntrinsicsToRecords) {
1272 assert(F && "Illegal to upgrade a non-existent Function.");
1273
1274 StringRef Name = F->getName();
1275
1276 // Quickly eliminate it, if it's not a candidate.
1277 if (!Name.consume_front(Prefix: "llvm.") || Name.empty())
1278 return false;
1279
1280 switch (Name[0]) {
1281 default: break;
1282 case 'a': {
1283 bool IsArm = Name.consume_front(Prefix: "arm.");
1284 if (IsArm || Name.consume_front(Prefix: "aarch64.")) {
1285 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1286 return true;
1287 break;
1288 }
1289
1290 if (Name.consume_front(Prefix: "amdgcn.")) {
1291 if (Name == "alignbit") {
1292 // Target specific intrinsic became redundant
1293 NewFn = Intrinsic::getOrInsertDeclaration(
1294 M: F->getParent(), id: Intrinsic::fshr, Tys: {F->getReturnType()});
1295 return true;
1296 }
1297
1298 if (Name.consume_front(Prefix: "atomic.")) {
1299 if (Name.starts_with(Prefix: "inc") || Name.starts_with(Prefix: "dec") ||
1300 Name.starts_with(Prefix: "cond.sub") || Name.starts_with(Prefix: "csub")) {
1301 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1302 // and usub_sat so there's no new declaration.
1303 NewFn = nullptr;
1304 return true;
1305 }
1306 break; // No other 'amdgcn.atomic.*'
1307 }
1308
1309 // Legacy wmma iu intrinsics without the optional clamp operand.
1310 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1311 F->arg_size() == 7) {
1312 NewFn = nullptr;
1313 return true;
1314 }
1315 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1316 F->arg_size() == 8) {
1317 NewFn = nullptr;
1318 return true;
1319 }
1320
1321 if (Name.consume_front(Prefix: "ds.") || Name.consume_front(Prefix: "global.atomic.") ||
1322 Name.consume_front(Prefix: "flat.atomic.")) {
1323 if (Name.starts_with(Prefix: "fadd") ||
1324 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1325 (Name.starts_with(Prefix: "fmin") && !Name.starts_with(Prefix: "fmin.num")) ||
1326 (Name.starts_with(Prefix: "fmax") && !Name.starts_with(Prefix: "fmax.num"))) {
1327 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1328 // declaration.
1329 NewFn = nullptr;
1330 return true;
1331 }
1332 }
1333
1334 if (Name.starts_with(Prefix: "ldexp.")) {
1335 // Target specific intrinsic became redundant
1336 NewFn = Intrinsic::getOrInsertDeclaration(
1337 M: F->getParent(), id: Intrinsic::ldexp,
1338 Tys: {F->getReturnType(), F->getArg(i: 1)->getType()});
1339 return true;
1340 }
1341 break; // No other 'amdgcn.*'
1342 }
1343
1344 break;
1345 }
1346 case 'c': {
1347 if (F->arg_size() == 1) {
1348 if (Name.consume_front(Prefix: "convert.")) {
1349 if (convertIntrinsicValidType(Name, FuncTy: F->getFunctionType())) {
1350 NewFn = nullptr;
1351 return true;
1352 }
1353 }
1354
1355 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1356 .StartsWith(S: "ctlz.", Value: Intrinsic::ctlz)
1357 .StartsWith(S: "cttz.", Value: Intrinsic::cttz)
1358 .Default(Value: Intrinsic::not_intrinsic);
1359 if (ID != Intrinsic::not_intrinsic) {
1360 rename(GV: F);
1361 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1362 Tys: F->arg_begin()->getType());
1363 return true;
1364 }
1365 }
1366
1367 if (F->arg_size() == 2 && Name == "coro.end") {
1368 rename(GV: F);
1369 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1370 id: Intrinsic::coro_end);
1371 return true;
1372 }
1373
1374 break;
1375 }
1376 case 'd':
1377 if (Name.consume_front(Prefix: "dbg.")) {
1378 // Mark debug intrinsics for upgrade to new debug format.
1379 if (CanUpgradeDebugIntrinsicsToRecords) {
1380 if (Name == "addr" || Name == "value" || Name == "assign" ||
1381 Name == "declare" || Name == "label") {
1382 // There's no function to replace these with.
1383 NewFn = nullptr;
1384 // But we do want these to get upgraded.
1385 return true;
1386 }
1387 }
1388 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1389 // converted to DbgVariableRecords later.
1390 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1391 rename(GV: F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1393 id: Intrinsic::dbg_value);
1394 return true;
1395 }
1396 break; // No other 'dbg.*'.
1397 }
1398 break;
1399 case 'e':
1400 if (Name.consume_front(Prefix: "experimental.vector.")) {
1401 Intrinsic::ID ID =
1402 StringSwitch<Intrinsic::ID>(Name)
1403 // Skip over extract.last.active, otherwise it will be 'upgraded'
1404 // to a regular vector extract which is a different operation.
1405 .StartsWith(S: "extract.last.active.", Value: Intrinsic::not_intrinsic)
1406 .StartsWith(S: "extract.", Value: Intrinsic::vector_extract)
1407 .StartsWith(S: "insert.", Value: Intrinsic::vector_insert)
1408 .StartsWith(S: "reverse.", Value: Intrinsic::vector_reverse)
1409 .StartsWith(S: "interleave2.", Value: Intrinsic::vector_interleave2)
1410 .StartsWith(S: "deinterleave2.", Value: Intrinsic::vector_deinterleave2)
1411 .StartsWith(S: "partial.reduce.add",
1412 Value: Intrinsic::vector_partial_reduce_add)
1413 .Default(Value: Intrinsic::not_intrinsic);
1414 if (ID != Intrinsic::not_intrinsic) {
1415 const auto *FT = F->getFunctionType();
1416 SmallVector<Type *, 2> Tys;
1417 if (ID == Intrinsic::vector_extract ||
1418 ID == Intrinsic::vector_interleave2)
1419 // Extracting overloads the return type.
1420 Tys.push_back(Elt: FT->getReturnType());
1421 if (ID != Intrinsic::vector_interleave2)
1422 Tys.push_back(Elt: FT->getParamType(i: 0));
1423 if (ID == Intrinsic::vector_insert ||
1424 ID == Intrinsic::vector_partial_reduce_add)
1425 // Inserting overloads the inserted type.
1426 Tys.push_back(Elt: FT->getParamType(i: 1));
1427 rename(GV: F);
1428 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
1429 return true;
1430 }
1431
1432 if (Name.consume_front(Prefix: "reduce.")) {
1433 SmallVector<StringRef, 2> Groups;
1434 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1435 if (R.match(String: Name, Matches: &Groups))
1436 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1437 .Case(S: "add", Value: Intrinsic::vector_reduce_add)
1438 .Case(S: "mul", Value: Intrinsic::vector_reduce_mul)
1439 .Case(S: "and", Value: Intrinsic::vector_reduce_and)
1440 .Case(S: "or", Value: Intrinsic::vector_reduce_or)
1441 .Case(S: "xor", Value: Intrinsic::vector_reduce_xor)
1442 .Case(S: "smax", Value: Intrinsic::vector_reduce_smax)
1443 .Case(S: "smin", Value: Intrinsic::vector_reduce_smin)
1444 .Case(S: "umax", Value: Intrinsic::vector_reduce_umax)
1445 .Case(S: "umin", Value: Intrinsic::vector_reduce_umin)
1446 .Case(S: "fmax", Value: Intrinsic::vector_reduce_fmax)
1447 .Case(S: "fmin", Value: Intrinsic::vector_reduce_fmin)
1448 .Default(Value: Intrinsic::not_intrinsic);
1449
1450 bool V2 = false;
1451 if (ID == Intrinsic::not_intrinsic) {
1452 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1453 Groups.clear();
1454 V2 = true;
1455 if (R2.match(String: Name, Matches: &Groups))
1456 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1457 .Case(S: "fadd", Value: Intrinsic::vector_reduce_fadd)
1458 .Case(S: "fmul", Value: Intrinsic::vector_reduce_fmul)
1459 .Default(Value: Intrinsic::not_intrinsic);
1460 }
1461 if (ID != Intrinsic::not_intrinsic) {
1462 rename(GV: F);
1463 auto Args = F->getFunctionType()->params();
1464 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1465 Tys: {Args[V2 ? 1 : 0]});
1466 return true;
1467 }
1468 break; // No other 'expermental.vector.reduce.*'.
1469 }
1470
1471 if (Name.consume_front(Prefix: "splice"))
1472 return true;
1473 break; // No other 'experimental.vector.*'.
1474 }
1475 if (Name.consume_front(Prefix: "experimental.stepvector.")) {
1476 Intrinsic::ID ID = Intrinsic::stepvector;
1477 rename(GV: F);
1478 NewFn = Intrinsic::getOrInsertDeclaration(
1479 M: F->getParent(), id: ID, Tys: F->getFunctionType()->getReturnType());
1480 return true;
1481 }
1482 break; // No other 'e*'.
1483 case 'f':
1484 if (Name.starts_with(Prefix: "flt.rounds")) {
1485 rename(GV: F);
1486 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1487 id: Intrinsic::get_rounding);
1488 return true;
1489 }
1490 break;
1491 case 'i':
1492 if (Name.starts_with(Prefix: "invariant.group.barrier")) {
1493 // Rename invariant.group.barrier to launder.invariant.group
1494 auto Args = F->getFunctionType()->params();
1495 Type* ObjectPtr[1] = {Args[0]};
1496 rename(GV: F);
1497 NewFn = Intrinsic::getOrInsertDeclaration(
1498 M: F->getParent(), id: Intrinsic::launder_invariant_group, Tys: ObjectPtr);
1499 return true;
1500 }
1501 break;
1502 case 'l':
1503 if ((Name.starts_with(Prefix: "lifetime.start") ||
1504 Name.starts_with(Prefix: "lifetime.end")) &&
1505 F->arg_size() == 2) {
1506 Intrinsic::ID IID = Name.starts_with(Prefix: "lifetime.start")
1507 ? Intrinsic::lifetime_start
1508 : Intrinsic::lifetime_end;
1509 rename(GV: F);
1510 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1511 Tys: F->getArg(i: 0)->getType());
1512 return true;
1513 }
1514 break;
1515 case 'm': {
1516 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1517 // alignment parameter to embedding the alignment as an attribute of
1518 // the pointer args.
1519 if (unsigned ID = StringSwitch<unsigned>(Name)
1520 .StartsWith(S: "memcpy.", Value: Intrinsic::memcpy)
1521 .StartsWith(S: "memmove.", Value: Intrinsic::memmove)
1522 .Default(Value: 0)) {
1523 if (F->arg_size() == 5) {
1524 rename(GV: F);
1525 // Get the types of dest, src, and len
1526 ArrayRef<Type *> ParamTypes =
1527 F->getFunctionType()->params().slice(N: 0, M: 3);
1528 NewFn =
1529 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes);
1530 return true;
1531 }
1532 }
1533 if (Name.starts_with(Prefix: "memset.") && F->arg_size() == 5) {
1534 rename(GV: F);
1535 // Get the types of dest, and len
1536 const auto *FT = F->getFunctionType();
1537 Type *ParamTypes[2] = {
1538 FT->getParamType(i: 0), // Dest
1539 FT->getParamType(i: 2) // len
1540 };
1541 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1542 id: Intrinsic::memset, Tys: ParamTypes);
1543 return true;
1544 }
1545
1546 unsigned MaskedID =
1547 StringSwitch<unsigned>(Name)
1548 .StartsWith(S: "masked.load", Value: Intrinsic::masked_load)
1549 .StartsWith(S: "masked.gather", Value: Intrinsic::masked_gather)
1550 .StartsWith(S: "masked.store", Value: Intrinsic::masked_store)
1551 .StartsWith(S: "masked.scatter", Value: Intrinsic::masked_scatter)
1552 .Default(Value: 0);
1553 if (MaskedID && F->arg_size() == 4) {
1554 rename(GV: F);
1555 if (MaskedID == Intrinsic::masked_load ||
1556 MaskedID == Intrinsic::masked_gather) {
1557 NewFn = Intrinsic::getOrInsertDeclaration(
1558 M: F->getParent(), id: MaskedID,
1559 Tys: {F->getReturnType(), F->getArg(i: 0)->getType()});
1560 return true;
1561 }
1562 NewFn = Intrinsic::getOrInsertDeclaration(
1563 M: F->getParent(), id: MaskedID,
1564 Tys: {F->getArg(i: 0)->getType(), F->getArg(i: 1)->getType()});
1565 return true;
1566 }
1567 break;
1568 }
1569 case 'n': {
1570 if (Name.consume_front(Prefix: "nvvm.")) {
1571 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1572 if (F->arg_size() == 1) {
1573 Intrinsic::ID IID =
1574 StringSwitch<Intrinsic::ID>(Name)
1575 .Cases(CaseStrings: {"brev32", "brev64"}, Value: Intrinsic::bitreverse)
1576 .Case(S: "clz.i", Value: Intrinsic::ctlz)
1577 .Case(S: "popc.i", Value: Intrinsic::ctpop)
1578 .Default(Value: Intrinsic::not_intrinsic);
1579 if (IID != Intrinsic::not_intrinsic) {
1580 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1581 Tys: {F->getReturnType()});
1582 return true;
1583 }
1584 } else if (F->arg_size() == 2) {
1585 Intrinsic::ID IID =
1586 StringSwitch<Intrinsic::ID>(Name)
1587 .Cases(CaseStrings: {"max.s", "max.i", "max.ll"}, Value: Intrinsic::smax)
1588 .Cases(CaseStrings: {"min.s", "min.i", "min.ll"}, Value: Intrinsic::smin)
1589 .Cases(CaseStrings: {"max.us", "max.ui", "max.ull"}, Value: Intrinsic::umax)
1590 .Cases(CaseStrings: {"min.us", "min.ui", "min.ull"}, Value: Intrinsic::umin)
1591 .Default(Value: Intrinsic::not_intrinsic);
1592 if (IID != Intrinsic::not_intrinsic) {
1593 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1594 Tys: {F->getReturnType()});
1595 return true;
1596 }
1597 }
1598
1599 // Check for nvvm intrinsics that need a return type adjustment.
1600 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1601 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1602 if (IID != Intrinsic::not_intrinsic) {
1603 NewFn = nullptr;
1604 return true;
1605 }
1606 }
1607
1608 // Upgrade Distributed Shared Memory Intrinsics
1609 Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1610 if (IID != Intrinsic::not_intrinsic) {
1611 rename(GV: F);
1612 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1613 return true;
1614 }
1615
1616 // Upgrade TMA copy G2S Intrinsics
1617 IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1618 if (IID != Intrinsic::not_intrinsic) {
1619 rename(GV: F);
1620 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1621 return true;
1622 }
1623
1624 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1625 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1626 //
1627 // TODO: We could add lohi.i2d.
1628 bool Expand = false;
1629 if (Name.consume_front(Prefix: "abs."))
1630 // nvvm.abs.{i,ii}
1631 Expand =
1632 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1633 else if (Name.consume_front(Prefix: "fabs."))
1634 // nvvm.fabs.{f,ftz.f,d}
1635 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1636 else if (Name.consume_front(Prefix: "ex2.approx."))
1637 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1638 Expand =
1639 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1640 else if (Name.consume_front(Prefix: "atomic.load."))
1641 // nvvm.atomic.load.add.{f32,f64}.p
1642 // nvvm.atomic.load.{inc,dec}.32.p
1643 Expand = StringSwitch<bool>(Name)
1644 .StartsWith(S: "add.f32.p", Value: true)
1645 .StartsWith(S: "add.f64.p", Value: true)
1646 .StartsWith(S: "inc.32.p", Value: true)
1647 .StartsWith(S: "dec.32.p", Value: true)
1648 .Default(Value: false);
1649 else if (Name.consume_front(Prefix: "bitcast."))
1650 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1651 Expand =
1652 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1653 else if (Name.consume_front(Prefix: "rotate."))
1654 // nvvm.rotate.{b32,b64,right.b64}
1655 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1656 else if (Name.consume_front(Prefix: "ptr.gen.to."))
1657 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1658 Expand = consumeNVVMPtrAddrSpace(Name);
1659 else if (Name.consume_front(Prefix: "ptr."))
1660 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1661 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(Prefix: ".to.gen");
1662 else if (Name.consume_front(Prefix: "ldg.global."))
1663 // nvvm.ldg.global.{i,p,f}
1664 Expand = (Name.starts_with(Prefix: "i.") || Name.starts_with(Prefix: "f.") ||
1665 Name.starts_with(Prefix: "p."));
1666 else
1667 Expand = StringSwitch<bool>(Name)
1668 .Case(S: "barrier0", Value: true)
1669 .Case(S: "barrier.n", Value: true)
1670 .Case(S: "barrier.sync.cnt", Value: true)
1671 .Case(S: "barrier.sync", Value: true)
1672 .Case(S: "barrier", Value: true)
1673 .Case(S: "bar.sync", Value: true)
1674 .Case(S: "barrier0.popc", Value: true)
1675 .Case(S: "barrier0.and", Value: true)
1676 .Case(S: "barrier0.or", Value: true)
1677 .Case(S: "clz.ll", Value: true)
1678 .Case(S: "popc.ll", Value: true)
1679 .Case(S: "h2f", Value: true)
1680 .Case(S: "swap.lo.hi.b64", Value: true)
1681 .Case(S: "tanh.approx.f32", Value: true)
1682 .Default(Value: false);
1683
1684 if (Expand) {
1685 NewFn = nullptr;
1686 return true;
1687 }
1688 break; // No other 'nvvm.*'.
1689 }
1690 break;
1691 }
1692 case 'o':
1693 if (Name.starts_with(Prefix: "objectsize.")) {
1694 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1695 if (F->arg_size() == 2 || F->arg_size() == 3) {
1696 rename(GV: F);
1697 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1698 id: Intrinsic::objectsize, Tys);
1699 return true;
1700 }
1701 }
1702 break;
1703
1704 case 'p':
1705 if (Name.starts_with(Prefix: "ptr.annotation.") && F->arg_size() == 4) {
1706 rename(GV: F);
1707 NewFn = Intrinsic::getOrInsertDeclaration(
1708 M: F->getParent(), id: Intrinsic::ptr_annotation,
1709 Tys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()});
1710 return true;
1711 }
1712 break;
1713
1714 case 'r': {
1715 if (Name.consume_front(Prefix: "riscv.")) {
1716 Intrinsic::ID ID;
1717 ID = StringSwitch<Intrinsic::ID>(Name)
1718 .Case(S: "aes32dsi", Value: Intrinsic::riscv_aes32dsi)
1719 .Case(S: "aes32dsmi", Value: Intrinsic::riscv_aes32dsmi)
1720 .Case(S: "aes32esi", Value: Intrinsic::riscv_aes32esi)
1721 .Case(S: "aes32esmi", Value: Intrinsic::riscv_aes32esmi)
1722 .Default(Value: Intrinsic::not_intrinsic);
1723 if (ID != Intrinsic::not_intrinsic) {
1724 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) {
1725 rename(GV: F);
1726 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1727 return true;
1728 }
1729 break; // No other applicable upgrades.
1730 }
1731
1732 ID = StringSwitch<Intrinsic::ID>(Name)
1733 .StartsWith(S: "sm4ks", Value: Intrinsic::riscv_sm4ks)
1734 .StartsWith(S: "sm4ed", Value: Intrinsic::riscv_sm4ed)
1735 .Default(Value: Intrinsic::not_intrinsic);
1736 if (ID != Intrinsic::not_intrinsic) {
1737 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) ||
1738 F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1739 rename(GV: F);
1740 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1741 return true;
1742 }
1743 break; // No other applicable upgrades.
1744 }
1745
1746 ID = StringSwitch<Intrinsic::ID>(Name)
1747 .StartsWith(S: "sha256sig0", Value: Intrinsic::riscv_sha256sig0)
1748 .StartsWith(S: "sha256sig1", Value: Intrinsic::riscv_sha256sig1)
1749 .StartsWith(S: "sha256sum0", Value: Intrinsic::riscv_sha256sum0)
1750 .StartsWith(S: "sha256sum1", Value: Intrinsic::riscv_sha256sum1)
1751 .StartsWith(S: "sm3p0", Value: Intrinsic::riscv_sm3p0)
1752 .StartsWith(S: "sm3p1", Value: Intrinsic::riscv_sm3p1)
1753 .Default(Value: Intrinsic::not_intrinsic);
1754 if (ID != Intrinsic::not_intrinsic) {
1755 if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1756 rename(GV: F);
1757 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1758 return true;
1759 }
1760 break; // No other applicable upgrades.
1761 }
1762
1763 // Replace llvm.riscv.clmul with llvm.clmul.
1764 if (Name == "clmul.i32" || Name == "clmul.i64") {
1765 NewFn = Intrinsic::getOrInsertDeclaration(
1766 M: F->getParent(), id: Intrinsic::clmul, Tys: {F->getReturnType()});
1767 return true;
1768 }
1769
1770 break; // No other 'riscv.*' intrinsics
1771 }
1772 } break;
1773
1774 case 's':
1775 if (Name == "stackprotectorcheck") {
1776 NewFn = nullptr;
1777 return true;
1778 }
1779 break;
1780
1781 case 't':
1782 if (Name == "thread.pointer") {
1783 NewFn = Intrinsic::getOrInsertDeclaration(
1784 M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType());
1785 return true;
1786 }
1787 break;
1788
1789 case 'v': {
1790 if (Name == "var.annotation" && F->arg_size() == 4) {
1791 rename(GV: F);
1792 NewFn = Intrinsic::getOrInsertDeclaration(
1793 M: F->getParent(), id: Intrinsic::var_annotation,
1794 Tys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}});
1795 return true;
1796 }
1797 if (Name.consume_front(Prefix: "vector.splice")) {
1798 if (Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"))
1799 break;
1800 return true;
1801 }
1802 break;
1803 }
1804
1805 case 'w':
1806 if (Name.consume_front(Prefix: "wasm.")) {
1807 Intrinsic::ID ID =
1808 StringSwitch<Intrinsic::ID>(Name)
1809 .StartsWith(S: "fma.", Value: Intrinsic::wasm_relaxed_madd)
1810 .StartsWith(S: "fms.", Value: Intrinsic::wasm_relaxed_nmadd)
1811 .StartsWith(S: "laneselect.", Value: Intrinsic::wasm_relaxed_laneselect)
1812 .Default(Value: Intrinsic::not_intrinsic);
1813 if (ID != Intrinsic::not_intrinsic) {
1814 rename(GV: F);
1815 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1816 Tys: F->getReturnType());
1817 return true;
1818 }
1819
1820 if (Name.consume_front(Prefix: "dot.i8x16.i7x16.")) {
1821 ID = StringSwitch<Intrinsic::ID>(Name)
1822 .Case(S: "signed", Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1823 .Case(S: "add.signed",
1824 Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1825 .Default(Value: Intrinsic::not_intrinsic);
1826 if (ID != Intrinsic::not_intrinsic) {
1827 rename(GV: F);
1828 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1829 return true;
1830 }
1831 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1832 }
1833 break; // No other 'wasm.*'.
1834 }
1835 break;
1836
1837 case 'x':
1838 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1839 return true;
1840 }
1841
1842 auto *ST = dyn_cast<StructType>(Val: F->getReturnType());
1843 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1844 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1845 // Replace return type with literal non-packed struct. Only do this for
1846 // intrinsics declared to return a struct, not for intrinsics with
1847 // overloaded return type, in which case the exact struct type will be
1848 // mangled into the name.
1849 SmallVector<Intrinsic::IITDescriptor> Desc;
1850 Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc);
1851 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1852 auto *FT = F->getFunctionType();
1853 auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements());
1854 auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg());
1855 std::string Name = F->getName().str();
1856 rename(GV: F);
1857 NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
1858 N: Name, M: F->getParent());
1859
1860 // The new function may also need remangling.
1861 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn))
1862 NewFn = *Result;
1863 return true;
1864 }
1865 }
1866
1867 // Remangle our intrinsic since we upgrade the mangling
1868 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1869 if (Result != std::nullopt) {
1870 NewFn = *Result;
1871 return true;
1872 }
1873
1874 // This may not belong here. This function is effectively being overloaded
1875 // to both detect an intrinsic which needs upgrading, and to provide the
1876 // upgraded form of the intrinsic. We should perhaps have two separate
1877 // functions for this.
1878 return false;
1879}
1880
1881bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1882 bool CanUpgradeDebugIntrinsicsToRecords) {
1883 NewFn = nullptr;
1884 bool Upgraded =
1885 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1886
1887 // Upgrade intrinsic attributes. This does not change the function.
1888 if (NewFn)
1889 F = NewFn;
1890 if (Intrinsic::ID id = F->getIntrinsicID()) {
1891 // Only do this if the intrinsic signature is valid.
1892 SmallVector<Type *> OverloadTys;
1893 if (Intrinsic::getIntrinsicSignature(id, FT: F->getFunctionType(), ArgTys&: OverloadTys))
1894 F->setAttributes(
1895 Intrinsic::getAttributes(C&: F->getContext(), id, FT: F->getFunctionType()));
1896 }
1897 return Upgraded;
1898}
1899
1900GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1901 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1902 GV->getName() == "llvm.global_dtors")) ||
1903 !GV->hasInitializer())
1904 return nullptr;
1905 ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType());
1906 if (!ATy)
1907 return nullptr;
1908 StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType());
1909 if (!STy || STy->getNumElements() != 2)
1910 return nullptr;
1911
1912 LLVMContext &C = GV->getContext();
1913 IRBuilder<> IRB(C);
1914 auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1),
1915 elts: IRB.getPtrTy());
1916 Constant *Init = GV->getInitializer();
1917 unsigned N = Init->getNumOperands();
1918 std::vector<Constant *> NewCtors(N);
1919 for (unsigned i = 0; i != N; ++i) {
1920 auto Ctor = cast<Constant>(Val: Init->getOperand(i));
1921 NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u),
1922 Vs: Ctor->getAggregateElement(Elt: 1),
1923 Vs: ConstantPointerNull::get(T: IRB.getPtrTy()));
1924 }
1925 Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors);
1926
1927 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1928 NewInit, GV->getName());
1929}
1930
1931// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1932// to byte shuffles.
1933static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1934 unsigned Shift) {
1935 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1936 unsigned NumElts = ResultTy->getNumElements() * 8;
1937
1938 // Bitcast from a 64-bit element type to a byte element type.
1939 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1940 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1941
1942 // We'll be shuffling in zeroes.
1943 Value *Res = Constant::getNullValue(Ty: VecTy);
1944
1945 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1946 // we'll just return the zero vector.
1947 if (Shift < 16) {
1948 int Idxs[64];
1949 // 256/512-bit version is split into 2/4 16-byte lanes.
1950 for (unsigned l = 0; l != NumElts; l += 16)
1951 for (unsigned i = 0; i != 16; ++i) {
1952 unsigned Idx = NumElts + i - Shift;
1953 if (Idx < NumElts)
1954 Idx -= NumElts - 16; // end of lane, switch operand.
1955 Idxs[l + i] = Idx + l;
1956 }
1957
1958 Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts));
1959 }
1960
1961 // Bitcast back to a 64-bit element type.
1962 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1963}
1964
1965// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1966// to byte shuffles.
1967static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1968 unsigned Shift) {
1969 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1970 unsigned NumElts = ResultTy->getNumElements() * 8;
1971
1972 // Bitcast from a 64-bit element type to a byte element type.
1973 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1974 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1975
1976 // We'll be shuffling in zeroes.
1977 Value *Res = Constant::getNullValue(Ty: VecTy);
1978
1979 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1980 // we'll just return the zero vector.
1981 if (Shift < 16) {
1982 int Idxs[64];
1983 // 256/512-bit version is split into 2/4 16-byte lanes.
1984 for (unsigned l = 0; l != NumElts; l += 16)
1985 for (unsigned i = 0; i != 16; ++i) {
1986 unsigned Idx = i + Shift;
1987 if (Idx >= 16)
1988 Idx += NumElts - 16; // end of lane, switch operand.
1989 Idxs[l + i] = Idx + l;
1990 }
1991
1992 Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts));
1993 }
1994
1995 // Bitcast back to a 64-bit element type.
1996 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1997}
1998
1999static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2000 unsigned NumElts) {
2001 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2002 llvm::VectorType *MaskTy = FixedVectorType::get(
2003 ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
2004 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2005
2006 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2007 // i8 and we need to extract down to the right number of elements.
2008 if (NumElts <= 4) {
2009 int Indices[4];
2010 for (unsigned i = 0; i != NumElts; ++i)
2011 Indices[i] = i;
2012 Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts),
2013 Name: "extract");
2014 }
2015
2016 return Mask;
2017}
2018
2019static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2020 Value *Op1) {
2021 // If the mask is all ones just emit the first operation.
2022 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2023 if (C->isAllOnesValue())
2024 return Op0;
2025
2026 Mask = getX86MaskVec(Builder, Mask,
2027 NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements());
2028 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2029}
2030
2031static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2032 Value *Op1) {
2033 // If the mask is all ones just emit the first operation.
2034 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2035 if (C->isAllOnesValue())
2036 return Op0;
2037
2038 auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(),
2039 NumElts: Mask->getType()->getIntegerBitWidth());
2040 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2041 Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
2042 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2043}
2044
2045// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2046// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2047// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2048static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
2049 Value *Op1, Value *Shift,
2050 Value *Passthru, Value *Mask,
2051 bool IsVALIGN) {
2052 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue();
2053
2054 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2055 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2056 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2057 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2058
2059 // Mask the immediate for VALIGN.
2060 if (IsVALIGN)
2061 ShiftVal &= (NumElts - 1);
2062
2063 // If palignr is shifting the pair of vectors more than the size of two
2064 // lanes, emit zero.
2065 if (ShiftVal >= 32)
2066 return llvm::Constant::getNullValue(Ty: Op0->getType());
2067
2068 // If palignr is shifting the pair of input vectors more than one lane,
2069 // but less than two lanes, convert to shifting in zeroes.
2070 if (ShiftVal > 16) {
2071 ShiftVal -= 16;
2072 Op1 = Op0;
2073 Op0 = llvm::Constant::getNullValue(Ty: Op0->getType());
2074 }
2075
2076 int Indices[64];
2077 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2078 for (unsigned l = 0; l < NumElts; l += 16) {
2079 for (unsigned i = 0; i != 16; ++i) {
2080 unsigned Idx = ShiftVal + i;
2081 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2082 Idx += NumElts - 16; // End of lane, switch operand.
2083 Indices[l + i] = Idx + l;
2084 }
2085 }
2086
2087 Value *Align = Builder.CreateShuffleVector(
2088 V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr");
2089
2090 return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru);
2091}
2092
2093static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
2094 bool ZeroMask, bool IndexForm) {
2095 Type *Ty = CI.getType();
2096 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2097 unsigned EltWidth = Ty->getScalarSizeInBits();
2098 bool IsFloat = Ty->isFPOrFPVectorTy();
2099 Intrinsic::ID IID;
2100 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2101 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2102 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2103 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2104 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2105 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2106 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2107 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2108 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2109 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2110 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2111 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2112 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2113 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2114 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2115 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2116 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2117 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2118 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2119 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2120 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2121 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2122 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2123 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2124 else if (VecWidth == 128 && EltWidth == 16)
2125 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2126 else if (VecWidth == 256 && EltWidth == 16)
2127 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2128 else if (VecWidth == 512 && EltWidth == 16)
2129 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2130 else if (VecWidth == 128 && EltWidth == 8)
2131 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2132 else if (VecWidth == 256 && EltWidth == 8)
2133 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2134 else if (VecWidth == 512 && EltWidth == 8)
2135 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2136 else
2137 llvm_unreachable("Unexpected intrinsic");
2138
2139 Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1),
2140 CI.getArgOperand(i: 2) };
2141
2142 // If this isn't index form we need to swap operand 0 and 1.
2143 if (!IndexForm)
2144 std::swap(a&: Args[0], b&: Args[1]);
2145
2146 Value *V = Builder.CreateIntrinsic(ID: IID, Args);
2147 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2148 : Builder.CreateBitCast(V: CI.getArgOperand(i: 1),
2149 DestTy: Ty);
2150 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru);
2151}
2152
2153static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
2154 Intrinsic::ID IID) {
2155 Type *Ty = CI.getType();
2156 Value *Op0 = CI.getOperand(i_nocapture: 0);
2157 Value *Op1 = CI.getOperand(i_nocapture: 1);
2158 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1});
2159
2160 if (CI.arg_size() == 4) { // For masked intrinsics.
2161 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2162 Value *Mask = CI.getOperand(i_nocapture: 3);
2163 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2164 }
2165 return Res;
2166}
2167
2168static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
2169 bool IsRotateRight) {
2170 Type *Ty = CI.getType();
2171 Value *Src = CI.getArgOperand(i: 0);
2172 Value *Amt = CI.getArgOperand(i: 1);
2173
2174 // Amount may be scalar immediate, in which case create a splat vector.
2175 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2176 // we only care about the lowest log2 bits anyway.
2177 if (Amt->getType() != Ty) {
2178 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2179 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2180 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2181 }
2182
2183 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2184 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Src, Src, Amt});
2185
2186 if (CI.arg_size() == 4) { // For masked intrinsics.
2187 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2188 Value *Mask = CI.getOperand(i_nocapture: 3);
2189 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2190 }
2191 return Res;
2192}
2193
2194static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2195 bool IsSigned) {
2196 Type *Ty = CI.getType();
2197 Value *LHS = CI.getArgOperand(i: 0);
2198 Value *RHS = CI.getArgOperand(i: 1);
2199
2200 CmpInst::Predicate Pred;
2201 switch (Imm) {
2202 case 0x0:
2203 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2204 break;
2205 case 0x1:
2206 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2207 break;
2208 case 0x2:
2209 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2210 break;
2211 case 0x3:
2212 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2213 break;
2214 case 0x4:
2215 Pred = ICmpInst::ICMP_EQ;
2216 break;
2217 case 0x5:
2218 Pred = ICmpInst::ICMP_NE;
2219 break;
2220 case 0x6:
2221 return Constant::getNullValue(Ty); // FALSE
2222 case 0x7:
2223 return Constant::getAllOnesValue(Ty); // TRUE
2224 default:
2225 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2226 }
2227
2228 Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS);
2229 Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty);
2230 return Ext;
2231}
2232
2233static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
2234 bool IsShiftRight, bool ZeroMask) {
2235 Type *Ty = CI.getType();
2236 Value *Op0 = CI.getArgOperand(i: 0);
2237 Value *Op1 = CI.getArgOperand(i: 1);
2238 Value *Amt = CI.getArgOperand(i: 2);
2239
2240 if (IsShiftRight)
2241 std::swap(a&: Op0, b&: Op1);
2242
2243 // Amount may be scalar immediate, in which case create a splat vector.
2244 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2245 // we only care about the lowest log2 bits anyway.
2246 if (Amt->getType() != Ty) {
2247 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2248 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2249 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2250 }
2251
2252 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2253 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1, Amt});
2254
2255 unsigned NumArgs = CI.arg_size();
2256 if (NumArgs >= 4) { // For masked intrinsics.
2257 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) :
2258 ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) :
2259 CI.getArgOperand(i: 0);
2260 Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1);
2261 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2262 }
2263 return Res;
2264}
2265
2266static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2267 Value *Mask, bool Aligned) {
2268 const Align Alignment =
2269 Aligned
2270 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2271 : Align(1);
2272
2273 // If the mask is all ones just emit a regular store.
2274 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2275 if (C->isAllOnesValue())
2276 return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment);
2277
2278 // Convert the mask from an integer type to a vector of i1.
2279 unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements();
2280 Mask = getX86MaskVec(Builder, Mask, NumElts);
2281 return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask);
2282}
2283
2284static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2285 Value *Passthru, Value *Mask, bool Aligned) {
2286 Type *ValTy = Passthru->getType();
2287 const Align Alignment =
2288 Aligned
2289 ? Align(
2290 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2291 8)
2292 : Align(1);
2293
2294 // If the mask is all ones just emit a regular store.
2295 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2296 if (C->isAllOnesValue())
2297 return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment);
2298
2299 // Convert the mask from an integer type to a vector of i1.
2300 unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements();
2301 Mask = getX86MaskVec(Builder, Mask, NumElts);
2302 return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru);
2303}
2304
2305static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2306 Type *Ty = CI.getType();
2307 Value *Op0 = CI.getArgOperand(i: 0);
2308 Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::abs, Types: Ty,
2309 Args: {Op0, Builder.getInt1(V: false)});
2310 if (CI.arg_size() == 3)
2311 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1));
2312 return Res;
2313}
2314
2315static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2316 Type *Ty = CI.getType();
2317
2318 // Arguments have a vXi32 type so cast to vXi64.
2319 Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty);
2320 Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty);
2321
2322 if (IsSigned) {
2323 // Shift left then arithmetic shift right.
2324 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
2325 LHS = Builder.CreateShl(LHS, RHS: ShiftAmt);
2326 LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt);
2327 RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
2328 RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
2329 } else {
2330 // Clear the upper bits.
2331 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
2332 LHS = Builder.CreateAnd(LHS, RHS: Mask);
2333 RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask);
2334 }
2335
2336 Value *Res = Builder.CreateMul(LHS, RHS);
2337
2338 if (CI.arg_size() == 4)
2339 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2));
2340
2341 return Res;
2342}
2343
2344// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2345static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2346 Value *Mask) {
2347 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
2348 if (Mask) {
2349 const auto *C = dyn_cast<Constant>(Val: Mask);
2350 if (!C || !C->isAllOnesValue())
2351 Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts));
2352 }
2353
2354 if (NumElts < 8) {
2355 int Indices[8];
2356 for (unsigned i = 0; i != NumElts; ++i)
2357 Indices[i] = i;
2358 for (unsigned i = NumElts; i != 8; ++i)
2359 Indices[i] = NumElts + i % NumElts;
2360 Vec = Builder.CreateShuffleVector(V1: Vec,
2361 V2: Constant::getNullValue(Ty: Vec->getType()),
2362 Mask: Indices);
2363 }
2364 return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U)));
2365}
2366
2367static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2368 unsigned CC, bool Signed) {
2369 Value *Op0 = CI.getArgOperand(i: 0);
2370 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2371
2372 Value *Cmp;
2373 if (CC == 3) {
2374 Cmp = Constant::getNullValue(
2375 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2376 } else if (CC == 7) {
2377 Cmp = Constant::getAllOnesValue(
2378 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2379 } else {
2380 ICmpInst::Predicate Pred;
2381 switch (CC) {
2382 default: llvm_unreachable("Unknown condition code");
2383 case 0: Pred = ICmpInst::ICMP_EQ; break;
2384 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2385 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2386 case 4: Pred = ICmpInst::ICMP_NE; break;
2387 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2388 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2389 }
2390 Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1));
2391 }
2392
2393 Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1);
2394
2395 return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask);
2396}
2397
2398// Replace a masked intrinsic with an older unmasked intrinsic.
2399static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2400 Intrinsic::ID IID) {
2401 Value *Rep =
2402 Builder.CreateIntrinsic(ID: IID, Args: {CI.getArgOperand(i: 0), CI.getArgOperand(i: 1)});
2403 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2));
2404}
2405
2406static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2407 Value* A = CI.getArgOperand(i: 0);
2408 Value* B = CI.getArgOperand(i: 1);
2409 Value* Src = CI.getArgOperand(i: 2);
2410 Value* Mask = CI.getArgOperand(i: 3);
2411
2412 Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1));
2413 Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode);
2414 Value* Extract1 = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
2415 Value* Extract2 = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0);
2416 Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2);
2417 return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0);
2418}
2419
2420static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2421 Value* Op = CI.getArgOperand(i: 0);
2422 Type* ReturnOp = CI.getType();
2423 unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements();
2424 Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts);
2425 return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2");
2426}
2427
2428// Replace intrinsic with unmasked version and a select.
2429static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2430 CallBase &CI, Value *&Rep) {
2431 Name = Name.substr(Start: 12); // Remove avx512.mask.
2432
2433 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2434 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2435 Intrinsic::ID IID;
2436 if (Name.starts_with(Prefix: "max.p")) {
2437 if (VecWidth == 128 && EltWidth == 32)
2438 IID = Intrinsic::x86_sse_max_ps;
2439 else if (VecWidth == 128 && EltWidth == 64)
2440 IID = Intrinsic::x86_sse2_max_pd;
2441 else if (VecWidth == 256 && EltWidth == 32)
2442 IID = Intrinsic::x86_avx_max_ps_256;
2443 else if (VecWidth == 256 && EltWidth == 64)
2444 IID = Intrinsic::x86_avx_max_pd_256;
2445 else
2446 llvm_unreachable("Unexpected intrinsic");
2447 } else if (Name.starts_with(Prefix: "min.p")) {
2448 if (VecWidth == 128 && EltWidth == 32)
2449 IID = Intrinsic::x86_sse_min_ps;
2450 else if (VecWidth == 128 && EltWidth == 64)
2451 IID = Intrinsic::x86_sse2_min_pd;
2452 else if (VecWidth == 256 && EltWidth == 32)
2453 IID = Intrinsic::x86_avx_min_ps_256;
2454 else if (VecWidth == 256 && EltWidth == 64)
2455 IID = Intrinsic::x86_avx_min_pd_256;
2456 else
2457 llvm_unreachable("Unexpected intrinsic");
2458 } else if (Name.starts_with(Prefix: "pshuf.b.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pshuf_b;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pshuf_b_512;
2465 else
2466 llvm_unreachable("Unexpected intrinsic");
2467 } else if (Name.starts_with(Prefix: "pmul.hr.sw.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2474 else
2475 llvm_unreachable("Unexpected intrinsic");
2476 } else if (Name.starts_with(Prefix: "pmulh.w.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_pmulh_w;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_pmulh_w;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_pmulh_w_512;
2483 else
2484 llvm_unreachable("Unexpected intrinsic");
2485 } else if (Name.starts_with(Prefix: "pmulhu.w.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_pmulhu_w;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_pmulhu_w;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2492 else
2493 llvm_unreachable("Unexpected intrinsic");
2494 } else if (Name.starts_with(Prefix: "pmaddw.d.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_sse2_pmadd_wd;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_pmadd_wd;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2501 else
2502 llvm_unreachable("Unexpected intrinsic");
2503 } else if (Name.starts_with(Prefix: "pmaddubs.w.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2510 else
2511 llvm_unreachable("Unexpected intrinsic");
2512 } else if (Name.starts_with(Prefix: "packsswb.")) {
2513 if (VecWidth == 128)
2514 IID = Intrinsic::x86_sse2_packsswb_128;
2515 else if (VecWidth == 256)
2516 IID = Intrinsic::x86_avx2_packsswb;
2517 else if (VecWidth == 512)
2518 IID = Intrinsic::x86_avx512_packsswb_512;
2519 else
2520 llvm_unreachable("Unexpected intrinsic");
2521 } else if (Name.starts_with(Prefix: "packssdw.")) {
2522 if (VecWidth == 128)
2523 IID = Intrinsic::x86_sse2_packssdw_128;
2524 else if (VecWidth == 256)
2525 IID = Intrinsic::x86_avx2_packssdw;
2526 else if (VecWidth == 512)
2527 IID = Intrinsic::x86_avx512_packssdw_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530 } else if (Name.starts_with(Prefix: "packuswb.")) {
2531 if (VecWidth == 128)
2532 IID = Intrinsic::x86_sse2_packuswb_128;
2533 else if (VecWidth == 256)
2534 IID = Intrinsic::x86_avx2_packuswb;
2535 else if (VecWidth == 512)
2536 IID = Intrinsic::x86_avx512_packuswb_512;
2537 else
2538 llvm_unreachable("Unexpected intrinsic");
2539 } else if (Name.starts_with(Prefix: "packusdw.")) {
2540 if (VecWidth == 128)
2541 IID = Intrinsic::x86_sse41_packusdw;
2542 else if (VecWidth == 256)
2543 IID = Intrinsic::x86_avx2_packusdw;
2544 else if (VecWidth == 512)
2545 IID = Intrinsic::x86_avx512_packusdw_512;
2546 else
2547 llvm_unreachable("Unexpected intrinsic");
2548 } else if (Name.starts_with(Prefix: "vpermilvar.")) {
2549 if (VecWidth == 128 && EltWidth == 32)
2550 IID = Intrinsic::x86_avx_vpermilvar_ps;
2551 else if (VecWidth == 128 && EltWidth == 64)
2552 IID = Intrinsic::x86_avx_vpermilvar_pd;
2553 else if (VecWidth == 256 && EltWidth == 32)
2554 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2555 else if (VecWidth == 256 && EltWidth == 64)
2556 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2557 else if (VecWidth == 512 && EltWidth == 32)
2558 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2559 else if (VecWidth == 512 && EltWidth == 64)
2560 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2561 else
2562 llvm_unreachable("Unexpected intrinsic");
2563 } else if (Name == "cvtpd2dq.256") {
2564 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2565 } else if (Name == "cvtpd2ps.256") {
2566 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2567 } else if (Name == "cvttpd2dq.256") {
2568 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2569 } else if (Name == "cvttps2dq.128") {
2570 IID = Intrinsic::x86_sse2_cvttps2dq;
2571 } else if (Name == "cvttps2dq.256") {
2572 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2573 } else if (Name.starts_with(Prefix: "permvar.")) {
2574 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2575 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2576 IID = Intrinsic::x86_avx2_permps;
2577 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2578 IID = Intrinsic::x86_avx2_permd;
2579 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2580 IID = Intrinsic::x86_avx512_permvar_df_256;
2581 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2582 IID = Intrinsic::x86_avx512_permvar_di_256;
2583 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2584 IID = Intrinsic::x86_avx512_permvar_sf_512;
2585 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2586 IID = Intrinsic::x86_avx512_permvar_si_512;
2587 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2588 IID = Intrinsic::x86_avx512_permvar_df_512;
2589 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2590 IID = Intrinsic::x86_avx512_permvar_di_512;
2591 else if (VecWidth == 128 && EltWidth == 16)
2592 IID = Intrinsic::x86_avx512_permvar_hi_128;
2593 else if (VecWidth == 256 && EltWidth == 16)
2594 IID = Intrinsic::x86_avx512_permvar_hi_256;
2595 else if (VecWidth == 512 && EltWidth == 16)
2596 IID = Intrinsic::x86_avx512_permvar_hi_512;
2597 else if (VecWidth == 128 && EltWidth == 8)
2598 IID = Intrinsic::x86_avx512_permvar_qi_128;
2599 else if (VecWidth == 256 && EltWidth == 8)
2600 IID = Intrinsic::x86_avx512_permvar_qi_256;
2601 else if (VecWidth == 512 && EltWidth == 8)
2602 IID = Intrinsic::x86_avx512_permvar_qi_512;
2603 else
2604 llvm_unreachable("Unexpected intrinsic");
2605 } else if (Name.starts_with(Prefix: "dbpsadbw.")) {
2606 if (VecWidth == 128)
2607 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2608 else if (VecWidth == 256)
2609 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2610 else if (VecWidth == 512)
2611 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2612 else
2613 llvm_unreachable("Unexpected intrinsic");
2614 } else if (Name.starts_with(Prefix: "pmultishift.qb.")) {
2615 if (VecWidth == 128)
2616 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2617 else if (VecWidth == 256)
2618 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2619 else if (VecWidth == 512)
2620 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2621 else
2622 llvm_unreachable("Unexpected intrinsic");
2623 } else if (Name.starts_with(Prefix: "conflict.")) {
2624 if (Name[9] == 'd' && VecWidth == 128)
2625 IID = Intrinsic::x86_avx512_conflict_d_128;
2626 else if (Name[9] == 'd' && VecWidth == 256)
2627 IID = Intrinsic::x86_avx512_conflict_d_256;
2628 else if (Name[9] == 'd' && VecWidth == 512)
2629 IID = Intrinsic::x86_avx512_conflict_d_512;
2630 else if (Name[9] == 'q' && VecWidth == 128)
2631 IID = Intrinsic::x86_avx512_conflict_q_128;
2632 else if (Name[9] == 'q' && VecWidth == 256)
2633 IID = Intrinsic::x86_avx512_conflict_q_256;
2634 else if (Name[9] == 'q' && VecWidth == 512)
2635 IID = Intrinsic::x86_avx512_conflict_q_512;
2636 else
2637 llvm_unreachable("Unexpected intrinsic");
2638 } else if (Name.starts_with(Prefix: "pavg.")) {
2639 if (Name[5] == 'b' && VecWidth == 128)
2640 IID = Intrinsic::x86_sse2_pavg_b;
2641 else if (Name[5] == 'b' && VecWidth == 256)
2642 IID = Intrinsic::x86_avx2_pavg_b;
2643 else if (Name[5] == 'b' && VecWidth == 512)
2644 IID = Intrinsic::x86_avx512_pavg_b_512;
2645 else if (Name[5] == 'w' && VecWidth == 128)
2646 IID = Intrinsic::x86_sse2_pavg_w;
2647 else if (Name[5] == 'w' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx2_pavg_w;
2649 else if (Name[5] == 'w' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_pavg_w_512;
2651 else
2652 llvm_unreachable("Unexpected intrinsic");
2653 } else
2654 return false;
2655
2656 SmallVector<Value *, 4> Args(CI.args());
2657 Args.pop_back();
2658 Args.pop_back();
2659 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2660 unsigned NumArgs = CI.arg_size();
2661 Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep,
2662 Op1: CI.getArgOperand(i: NumArgs - 2));
2663 return true;
2664}
2665
2666/// Upgrade comment in call to inline asm that represents an objc retain release
2667/// marker.
2668void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2669 size_t Pos;
2670 if (AsmStr->find(s: "mov\tfp") == 0 &&
2671 AsmStr->find(s: "objc_retainAutoreleaseReturnValue") != std::string::npos &&
2672 (Pos = AsmStr->find(s: "# marker")) != std::string::npos) {
2673 AsmStr->replace(pos: Pos, n1: 1, s: ";");
2674 }
2675}
2676
2677static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2678 Function *F, IRBuilder<> &Builder) {
2679 Value *Rep = nullptr;
2680
2681 if (Name == "abs.i" || Name == "abs.ll") {
2682 Value *Arg = CI->getArgOperand(i: 0);
2683 Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg");
2684 Value *Cmp = Builder.CreateICmpSGE(
2685 LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond");
2686 Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs");
2687 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2688 Type *Ty = (Name == "abs.bf16")
2689 ? Builder.getBFloatTy()
2690 : FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 2);
2691 Value *Arg = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Ty);
2692 Value *Abs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::nvvm_fabs, V: Arg);
2693 Rep = Builder.CreateBitCast(V: Abs, DestTy: CI->getType());
2694 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2695 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2696 : Intrinsic::nvvm_fabs;
2697 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2698 } else if (Name.consume_front(Prefix: "ex2.approx.")) {
2699 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2700 Intrinsic::ID IID = Name.starts_with(Prefix: "ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2701 : Intrinsic::nvvm_ex2_approx;
2702 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2703 } else if (Name.starts_with(Prefix: "atomic.load.add.f32.p") ||
2704 Name.starts_with(Prefix: "atomic.load.add.f64.p")) {
2705 Value *Ptr = CI->getArgOperand(i: 0);
2706 Value *Val = CI->getArgOperand(i: 1);
2707 Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(),
2708 Ordering: AtomicOrdering::SequentiallyConsistent);
2709 } else if (Name.starts_with(Prefix: "atomic.load.inc.32.p") ||
2710 Name.starts_with(Prefix: "atomic.load.dec.32.p")) {
2711 Value *Ptr = CI->getArgOperand(i: 0);
2712 Value *Val = CI->getArgOperand(i: 1);
2713 auto Op = Name.starts_with(Prefix: "atomic.load.inc") ? AtomicRMWInst::UIncWrap
2714 : AtomicRMWInst::UDecWrap;
2715 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, Align: MaybeAlign(),
2716 Ordering: AtomicOrdering::SequentiallyConsistent);
2717 } else if (Name == "clz.ll") {
2718 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2719 Value *Arg = CI->getArgOperand(i: 0);
2720 Value *Ctlz = Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Arg->getType()},
2721 Args: {Arg, Builder.getFalse()},
2722 /*FMFSource=*/nullptr, Name: "ctlz");
2723 Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc");
2724 } else if (Name == "popc.ll") {
2725 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2726 // i64.
2727 Value *Arg = CI->getArgOperand(i: 0);
2728 Value *Popc = Builder.CreateIntrinsic(ID: Intrinsic::ctpop, Types: {Arg->getType()},
2729 Args: Arg, /*FMFSource=*/nullptr, Name: "ctpop");
2730 Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc");
2731 } else if (Name == "h2f") {
2732 Value *Cast =
2733 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
2734 Rep = Builder.CreateFPExt(V: Cast, DestTy: Builder.getFloatTy());
2735 } else if (Name.consume_front(Prefix: "bitcast.") &&
2736 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2737 Name == "d2ll")) {
2738 Rep = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2739 } else if (Name == "rotate.b32") {
2740 Value *Arg = CI->getOperand(i_nocapture: 0);
2741 Value *ShiftAmt = CI->getOperand(i_nocapture: 1);
2742 Rep = Builder.CreateIntrinsic(RetTy: Builder.getInt32Ty(), ID: Intrinsic::fshl,
2743 Args: {Arg, Arg, ShiftAmt});
2744 } else if (Name == "rotate.b64") {
2745 Type *Int64Ty = Builder.getInt64Ty();
2746 Value *Arg = CI->getOperand(i_nocapture: 0);
2747 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2748 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2749 Args: {Arg, Arg, ZExtShiftAmt});
2750 } else if (Name == "rotate.right.b64") {
2751 Type *Int64Ty = Builder.getInt64Ty();
2752 Value *Arg = CI->getOperand(i_nocapture: 0);
2753 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2754 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshr,
2755 Args: {Arg, Arg, ZExtShiftAmt});
2756 } else if (Name == "swap.lo.hi.b64") {
2757 Type *Int64Ty = Builder.getInt64Ty();
2758 Value *Arg = CI->getOperand(i_nocapture: 0);
2759 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2760 Args: {Arg, Arg, Builder.getInt64(C: 32)});
2761 } else if ((Name.consume_front(Prefix: "ptr.gen.to.") &&
2762 consumeNVVMPtrAddrSpace(Name)) ||
2763 (Name.consume_front(Prefix: "ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2764 Name.starts_with(Prefix: ".to.gen"))) {
2765 Rep = Builder.CreateAddrSpaceCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2766 } else if (Name.consume_front(Prefix: "ldg.global")) {
2767 Value *Ptr = CI->getArgOperand(i: 0);
2768 Align PtrAlign = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getAlignValue();
2769 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2770 Value *ASC = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Builder.getPtrTy(AddrSpace: 1));
2771 Instruction *LD = Builder.CreateAlignedLoad(Ty: CI->getType(), Ptr: ASC, Align: PtrAlign);
2772 MDNode *MD = MDNode::get(Context&: Builder.getContext(), MDs: {});
2773 LD->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
2774 return LD;
2775 } else if (Name == "tanh.approx.f32") {
2776 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2777 FastMathFlags FMF;
2778 FMF.setApproxFunc();
2779 Rep = Builder.CreateUnaryIntrinsic(ID: Intrinsic::tanh, V: CI->getArgOperand(i: 0),
2780 FMFSource: FMF);
2781 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2782 Value *Arg =
2783 Name.ends_with(Suffix: '0') ? Builder.getInt32(C: 0) : CI->getArgOperand(i: 0);
2784 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2785 Types: {}, Args: {Arg});
2786 } else if (Name == "barrier") {
2787 Rep = Builder.CreateIntrinsic(
2788 ID: Intrinsic::nvvm_barrier_cta_sync_aligned_count, Types: {},
2789 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2790 } else if (Name == "barrier.sync") {
2791 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_all, Types: {},
2792 Args: {CI->getArgOperand(i: 0)});
2793 } else if (Name == "barrier.sync.cnt") {
2794 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_count, Types: {},
2795 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2796 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2797 Name == "barrier0.or") {
2798 Value *C = CI->getArgOperand(i: 0);
2799 C = Builder.CreateICmpNE(LHS: C, RHS: Builder.getInt32(C: 0));
2800
2801 Intrinsic::ID IID =
2802 StringSwitch<Intrinsic::ID>(Name)
2803 .Case(S: "barrier0.popc",
2804 Value: Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2805 .Case(S: "barrier0.and",
2806 Value: Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2807 .Case(S: "barrier0.or",
2808 Value: Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2809 Value *Bar = Builder.CreateIntrinsic(ID: IID, Types: {}, Args: {Builder.getInt32(C: 0), C});
2810 Rep = Builder.CreateZExt(V: Bar, DestTy: CI->getType());
2811 } else {
2812 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2813 if (IID != Intrinsic::not_intrinsic &&
2814 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2815 rename(GV: F);
2816 Function *NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
2817 SmallVector<Value *, 2> Args;
2818 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2819 Value *Arg = CI->getArgOperand(i: I);
2820 Type *OldType = Arg->getType();
2821 Type *NewType = NewFn->getArg(i: I)->getType();
2822 Args.push_back(
2823 Elt: (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2824 ? Builder.CreateBitCast(V: Arg, DestTy: NewType)
2825 : Arg);
2826 }
2827 Rep = Builder.CreateCall(Callee: NewFn, Args);
2828 if (F->getReturnType()->isIntegerTy())
2829 Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType());
2830 }
2831 }
2832
2833 return Rep;
2834}
2835
2836static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2837 IRBuilder<> &Builder) {
2838 LLVMContext &C = F->getContext();
2839 Value *Rep = nullptr;
2840
2841 if (Name.starts_with(Prefix: "sse4a.movnt.")) {
2842 SmallVector<Metadata *, 1> Elts;
2843 Elts.push_back(
2844 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2845 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2846
2847 Value *Arg0 = CI->getArgOperand(i: 0);
2848 Value *Arg1 = CI->getArgOperand(i: 1);
2849
2850 // Nontemporal (unaligned) store of the 0'th element of the float/double
2851 // vector.
2852 Value *Extract =
2853 Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement");
2854
2855 StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Arg0, Align: Align(1));
2856 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2857 } else if (Name.starts_with(Prefix: "avx.movnt.") ||
2858 Name.starts_with(Prefix: "avx512.storent.")) {
2859 SmallVector<Metadata *, 1> Elts;
2860 Elts.push_back(
2861 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2862 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2863
2864 Value *Arg0 = CI->getArgOperand(i: 0);
2865 Value *Arg1 = CI->getArgOperand(i: 1);
2866
2867 StoreInst *SI = Builder.CreateAlignedStore(
2868 Val: Arg1, Ptr: Arg0,
2869 Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2870 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2871 } else if (Name == "sse2.storel.dq") {
2872 Value *Arg0 = CI->getArgOperand(i: 0);
2873 Value *Arg1 = CI->getArgOperand(i: 1);
2874
2875 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
2876 Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
2877 Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0);
2878 Builder.CreateAlignedStore(Val: Elt, Ptr: Arg0, Align: Align(1));
2879 } else if (Name.starts_with(Prefix: "sse.storeu.") ||
2880 Name.starts_with(Prefix: "sse2.storeu.") ||
2881 Name.starts_with(Prefix: "avx.storeu.")) {
2882 Value *Arg0 = CI->getArgOperand(i: 0);
2883 Value *Arg1 = CI->getArgOperand(i: 1);
2884 Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1));
2885 } else if (Name == "avx512.mask.store.ss") {
2886 Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1));
2887 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2888 Mask, Aligned: false);
2889 } else if (Name.starts_with(Prefix: "avx512.mask.store")) {
2890 // "avx512.mask.storeu." or "avx512.mask.store."
2891 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2892 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2893 Mask: CI->getArgOperand(i: 2), Aligned);
2894 } else if (Name.starts_with(Prefix: "sse2.pcmp") || Name.starts_with(Prefix: "avx2.pcmp")) {
2895 // Upgrade packed integer vector compare intrinsics to compare instructions.
2896 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2897 bool CmpEq = Name[9] == 'e';
2898 Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2899 LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
2900 Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "");
2901 } else if (Name.starts_with(Prefix: "avx512.broadcastm")) {
2902 Type *ExtTy = Type::getInt32Ty(C);
2903 if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8))
2904 ExtTy = Type::getInt64Ty(C);
2905 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2906 ExtTy->getPrimitiveSizeInBits();
2907 Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy);
2908 Rep = Builder.CreateVectorSplat(NumElts, V: Rep);
2909 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2910 Value *Vec = CI->getArgOperand(i: 0);
2911 Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0);
2912 Elt0 = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: Elt0->getType(), Args: Elt0);
2913 Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0);
2914 } else if (Name.starts_with(Prefix: "avx.sqrt.p") ||
2915 Name.starts_with(Prefix: "sse2.sqrt.p") ||
2916 Name.starts_with(Prefix: "sse.sqrt.p")) {
2917 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2918 Args: {CI->getArgOperand(i: 0)});
2919 } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p")) {
2920 if (CI->arg_size() == 4 &&
2921 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2922 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2923 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2924 : Intrinsic::x86_avx512_sqrt_pd_512;
2925
2926 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)};
2927 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2928 } else {
2929 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2930 Args: {CI->getArgOperand(i: 0)});
2931 }
2932 Rep =
2933 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2934 } else if (Name.starts_with(Prefix: "avx512.ptestm") ||
2935 Name.starts_with(Prefix: "avx512.ptestnm")) {
2936 Value *Op0 = CI->getArgOperand(i: 0);
2937 Value *Op1 = CI->getArgOperand(i: 1);
2938 Value *Mask = CI->getArgOperand(i: 2);
2939 Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1);
2940 llvm::Type *Ty = Op0->getType();
2941 Value *Zero = llvm::Constant::getNullValue(Ty);
2942 ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm")
2943 ? ICmpInst::ICMP_NE
2944 : ICmpInst::ICMP_EQ;
2945 Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero);
2946 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask);
2947 } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast")) {
2948 unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType())
2949 ->getNumElements();
2950 Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0));
2951 Rep =
2952 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2953 } else if (Name.starts_with(Prefix: "avx512.kunpck")) {
2954 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2955 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts);
2956 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts);
2957 int Indices[64];
2958 for (unsigned i = 0; i != NumElts; ++i)
2959 Indices[i] = i;
2960
2961 // First extract half of each vector. This gives better codegen than
2962 // doing it in a single shuffle.
2963 LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
2964 RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
2965 // Concat the vectors.
2966 // NOTE: Operands have to be swapped to match intrinsic definition.
2967 Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
2968 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2969 } else if (Name == "avx512.kand.w") {
2970 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2971 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2972 Rep = Builder.CreateAnd(LHS, RHS);
2973 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2974 } else if (Name == "avx512.kandn.w") {
2975 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2976 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2977 LHS = Builder.CreateNot(V: LHS);
2978 Rep = Builder.CreateAnd(LHS, RHS);
2979 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2980 } else if (Name == "avx512.kor.w") {
2981 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2982 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2983 Rep = Builder.CreateOr(LHS, RHS);
2984 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2985 } else if (Name == "avx512.kxor.w") {
2986 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2987 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2988 Rep = Builder.CreateXor(LHS, RHS);
2989 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2990 } else if (Name == "avx512.kxnor.w") {
2991 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2992 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2993 LHS = Builder.CreateNot(V: LHS);
2994 Rep = Builder.CreateXor(LHS, RHS);
2995 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2996 } else if (Name == "avx512.knot.w") {
2997 Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2998 Rep = Builder.CreateNot(V: Rep);
2999 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
3000 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3001 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
3002 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
3003 Rep = Builder.CreateOr(LHS, RHS);
3004 Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty());
3005 Value *C;
3006 if (Name[14] == 'c')
3007 C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty());
3008 else
3009 C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty());
3010 Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C);
3011 Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty());
3012 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3013 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3014 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3015 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3016 Type *I32Ty = Type::getInt32Ty(C);
3017 Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0),
3018 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3019 Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1),
3020 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3021 Value *EltOp;
3022 if (Name.contains(Other: ".add."))
3023 EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1);
3024 else if (Name.contains(Other: ".sub."))
3025 EltOp = Builder.CreateFSub(L: Elt0, R: Elt1);
3026 else if (Name.contains(Other: ".mul."))
3027 EltOp = Builder.CreateFMul(L: Elt0, R: Elt1);
3028 else
3029 EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1);
3030 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp,
3031 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3032 } else if (Name.starts_with(Prefix: "avx512.mask.pcmp")) {
3033 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3034 bool CmpEq = Name[16] == 'e';
3035 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true);
3036 } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb.")) {
3037 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3038 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3039 Intrinsic::ID IID;
3040 switch (VecWidth) {
3041 default:
3042 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3043 break;
3044 case 128:
3045 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3046 break;
3047 case 256:
3048 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3049 break;
3050 case 512:
3051 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3052 break;
3053 }
3054
3055 Rep =
3056 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3057 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3058 } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p")) {
3059 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3060 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3061 unsigned EltWidth = OpTy->getScalarSizeInBits();
3062 Intrinsic::ID IID;
3063 if (VecWidth == 128 && EltWidth == 32)
3064 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3065 else if (VecWidth == 256 && EltWidth == 32)
3066 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3067 else if (VecWidth == 512 && EltWidth == 32)
3068 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3069 else if (VecWidth == 128 && EltWidth == 64)
3070 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3071 else if (VecWidth == 256 && EltWidth == 64)
3072 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3073 else if (VecWidth == 512 && EltWidth == 64)
3074 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3075 else
3076 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3077
3078 Rep =
3079 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3080 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3081 } else if (Name.starts_with(Prefix: "avx512.cmp.p")) {
3082 SmallVector<Value *, 4> Args(CI->args());
3083 Type *OpTy = Args[0]->getType();
3084 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3085 unsigned EltWidth = OpTy->getScalarSizeInBits();
3086 Intrinsic::ID IID;
3087 if (VecWidth == 128 && EltWidth == 32)
3088 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3089 else if (VecWidth == 256 && EltWidth == 32)
3090 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3091 else if (VecWidth == 512 && EltWidth == 32)
3092 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3093 else if (VecWidth == 128 && EltWidth == 64)
3094 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3095 else if (VecWidth == 256 && EltWidth == 64)
3096 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3097 else if (VecWidth == 512 && EltWidth == 64)
3098 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3099 else
3100 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
3101
3102 Value *Mask = Constant::getAllOnesValue(Ty: CI->getType());
3103 if (VecWidth == 512)
3104 std::swap(a&: Mask, b&: Args.back());
3105 Args.push_back(Elt: Mask);
3106
3107 Rep = Builder.CreateIntrinsic(ID: IID, Args);
3108 } else if (Name.starts_with(Prefix: "avx512.mask.cmp.")) {
3109 // Integer compare intrinsics.
3110 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3111 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true);
3112 } else if (Name.starts_with(Prefix: "avx512.mask.ucmp.")) {
3113 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3114 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false);
3115 } else if (Name.starts_with(Prefix: "avx512.cvtb2mask.") ||
3116 Name.starts_with(Prefix: "avx512.cvtw2mask.") ||
3117 Name.starts_with(Prefix: "avx512.cvtd2mask.") ||
3118 Name.starts_with(Prefix: "avx512.cvtq2mask.")) {
3119 Value *Op = CI->getArgOperand(i: 0);
3120 Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType());
3121 Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero);
3122 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr);
3123 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3124 Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs") ||
3125 Name.starts_with(Prefix: "avx512.mask.pabs")) {
3126 Rep = upgradeAbs(Builder, CI&: *CI);
3127 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3128 Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs") ||
3129 Name.starts_with(Prefix: "avx512.mask.pmaxs")) {
3130 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax);
3131 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3132 Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu") ||
3133 Name.starts_with(Prefix: "avx512.mask.pmaxu")) {
3134 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax);
3135 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3136 Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins") ||
3137 Name.starts_with(Prefix: "avx512.mask.pmins")) {
3138 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin);
3139 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3140 Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu") ||
3141 Name.starts_with(Prefix: "avx512.mask.pminu")) {
3142 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin);
3143 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3144 Name == "avx512.pmulu.dq.512" ||
3145 Name.starts_with(Prefix: "avx512.mask.pmulu.dq.")) {
3146 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false);
3147 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3148 Name == "avx512.pmul.dq.512" ||
3149 Name.starts_with(Prefix: "avx512.mask.pmul.dq.")) {
3150 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true);
3151 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3152 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3153 Rep =
3154 Builder.CreateSIToFP(V: CI->getArgOperand(i: 1),
3155 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3156 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3157 } else if (Name == "avx512.cvtusi2sd") {
3158 Rep =
3159 Builder.CreateUIToFP(V: CI->getArgOperand(i: 1),
3160 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3161 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3162 } else if (Name == "sse2.cvtss2sd") {
3163 Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0);
3164 Rep = Builder.CreateFPExt(
3165 V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3166 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3167 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3168 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3169 Name.starts_with(Prefix: "avx512.mask.cvtdq2pd.") ||
3170 Name.starts_with(Prefix: "avx512.mask.cvtudq2pd.") ||
3171 Name.starts_with(Prefix: "avx512.mask.cvtdq2ps.") ||
3172 Name.starts_with(Prefix: "avx512.mask.cvtudq2ps.") ||
3173 Name.starts_with(Prefix: "avx512.mask.cvtqq2pd.") ||
3174 Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd.") ||
3175 Name == "avx512.mask.cvtqq2ps.256" ||
3176 Name == "avx512.mask.cvtqq2ps.512" ||
3177 Name == "avx512.mask.cvtuqq2ps.256" ||
3178 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3179 Name == "avx.cvt.ps2.pd.256" ||
3180 Name == "avx512.mask.cvtps2pd.128" ||
3181 Name == "avx512.mask.cvtps2pd.256") {
3182 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3183 Rep = CI->getArgOperand(i: 0);
3184 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3185
3186 unsigned NumDstElts = DstTy->getNumElements();
3187 if (NumDstElts < SrcTy->getNumElements()) {
3188 assert(NumDstElts == 2 && "Unexpected vector size");
3189 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1});
3190 }
3191
3192 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3193 bool IsUnsigned = Name.contains(Other: "cvtu");
3194 if (IsPS2PD)
3195 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd");
3196 else if (CI->arg_size() == 4 &&
3197 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
3198 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
3199 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3200 : Intrinsic::x86_avx512_sitofp_round;
3201 Rep = Builder.CreateIntrinsic(ID: IID, Types: {DstTy, SrcTy},
3202 Args: {Rep, CI->getArgOperand(i: 3)});
3203 } else {
3204 Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt")
3205 : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt");
3206 }
3207
3208 if (CI->arg_size() >= 3)
3209 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3210 Op1: CI->getArgOperand(i: 1));
3211 } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps.") ||
3212 Name.starts_with(Prefix: "vcvtph2ps.")) {
3213 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3214 Rep = CI->getArgOperand(i: 0);
3215 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3216 unsigned NumDstElts = DstTy->getNumElements();
3217 if (NumDstElts != SrcTy->getNumElements()) {
3218 assert(NumDstElts == 4 && "Unexpected vector size");
3219 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3});
3220 }
3221 Rep = Builder.CreateBitCast(
3222 V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts));
3223 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps");
3224 if (CI->arg_size() >= 3)
3225 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3226 Op1: CI->getArgOperand(i: 1));
3227 } else if (Name.starts_with(Prefix: "avx512.mask.load")) {
3228 // "avx512.mask.loadu." or "avx512.mask.load."
3229 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3230 Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1),
3231 Mask: CI->getArgOperand(i: 2), Aligned);
3232 } else if (Name.starts_with(Prefix: "avx512.mask.expand.load.")) {
3233 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3234 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3235 NumElts: ResultTy->getNumElements());
3236
3237 Rep = Builder.CreateIntrinsic(
3238 ID: Intrinsic::masked_expandload, Types: ResultTy,
3239 Args: {CI->getOperand(i_nocapture: 0), MaskVec, CI->getOperand(i_nocapture: 1)});
3240 } else if (Name.starts_with(Prefix: "avx512.mask.compress.store.")) {
3241 auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType());
3242 Value *MaskVec =
3243 getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3244 NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
3245
3246 Rep = Builder.CreateIntrinsic(
3247 ID: Intrinsic::masked_compressstore, Types: ResultTy,
3248 Args: {CI->getArgOperand(i: 1), CI->getArgOperand(i: 0), MaskVec});
3249 } else if (Name.starts_with(Prefix: "avx512.mask.compress.") ||
3250 Name.starts_with(Prefix: "avx512.mask.expand.")) {
3251 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3252
3253 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3254 NumElts: ResultTy->getNumElements());
3255
3256 bool IsCompress = Name[12] == 'c';
3257 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3258 : Intrinsic::x86_avx512_mask_expand;
3259 Rep = Builder.CreateIntrinsic(
3260 ID: IID, Types: ResultTy, Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec});
3261 } else if (Name.starts_with(Prefix: "xop.vpcom")) {
3262 bool IsSigned;
3263 if (Name.ends_with(Suffix: "ub") || Name.ends_with(Suffix: "uw") || Name.ends_with(Suffix: "ud") ||
3264 Name.ends_with(Suffix: "uq"))
3265 IsSigned = false;
3266 else if (Name.ends_with(Suffix: "b") || Name.ends_with(Suffix: "w") ||
3267 Name.ends_with(Suffix: "d") || Name.ends_with(Suffix: "q"))
3268 IsSigned = true;
3269 else
3270 reportFatalUsageErrorWithCI(reason: "Intrinsic has unknown suffix", CI);
3271
3272 unsigned Imm;
3273 if (CI->arg_size() == 3) {
3274 Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3275 } else {
3276 Name = Name.substr(Start: 9); // strip off "xop.vpcom"
3277 if (Name.starts_with(Prefix: "lt"))
3278 Imm = 0;
3279 else if (Name.starts_with(Prefix: "le"))
3280 Imm = 1;
3281 else if (Name.starts_with(Prefix: "gt"))
3282 Imm = 2;
3283 else if (Name.starts_with(Prefix: "ge"))
3284 Imm = 3;
3285 else if (Name.starts_with(Prefix: "eq"))
3286 Imm = 4;
3287 else if (Name.starts_with(Prefix: "ne"))
3288 Imm = 5;
3289 else if (Name.starts_with(Prefix: "false"))
3290 Imm = 6;
3291 else if (Name.starts_with(Prefix: "true"))
3292 Imm = 7;
3293 else
3294 llvm_unreachable("Unknown condition");
3295 }
3296
3297 Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned);
3298 } else if (Name.starts_with(Prefix: "xop.vpcmov")) {
3299 Value *Sel = CI->getArgOperand(i: 2);
3300 Value *NotSel = Builder.CreateNot(V: Sel);
3301 Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel);
3302 Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel);
3303 Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1);
3304 } else if (Name.starts_with(Prefix: "xop.vprot") || Name.starts_with(Prefix: "avx512.prol") ||
3305 Name.starts_with(Prefix: "avx512.mask.prol")) {
3306 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false);
3307 } else if (Name.starts_with(Prefix: "avx512.pror") ||
3308 Name.starts_with(Prefix: "avx512.mask.pror")) {
3309 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true);
3310 } else if (Name.starts_with(Prefix: "avx512.vpshld.") ||
3311 Name.starts_with(Prefix: "avx512.mask.vpshld") ||
3312 Name.starts_with(Prefix: "avx512.maskz.vpshld")) {
3313 bool ZeroMask = Name[11] == 'z';
3314 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask);
3315 } else if (Name.starts_with(Prefix: "avx512.vpshrd.") ||
3316 Name.starts_with(Prefix: "avx512.mask.vpshrd") ||
3317 Name.starts_with(Prefix: "avx512.maskz.vpshrd")) {
3318 bool ZeroMask = Name[11] == 'z';
3319 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask);
3320 } else if (Name == "sse42.crc32.64.8") {
3321 Value *Trunc0 =
3322 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C));
3323 Rep = Builder.CreateIntrinsic(ID: Intrinsic::x86_sse42_crc32_32_8,
3324 Args: {Trunc0, CI->getArgOperand(i: 1)});
3325 Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "");
3326 } else if (Name.starts_with(Prefix: "avx.vbroadcast.s") ||
3327 Name.starts_with(Prefix: "avx512.vbroadcast.s")) {
3328 // Replace broadcasts with a series of insertelements.
3329 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3330 Type *EltTy = VecTy->getElementType();
3331 unsigned EltNum = VecTy->getNumElements();
3332 Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0));
3333 Type *I32Ty = Type::getInt32Ty(C);
3334 Rep = PoisonValue::get(T: VecTy);
3335 for (unsigned I = 0; I < EltNum; ++I)
3336 Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I));
3337 } else if (Name.starts_with(Prefix: "sse41.pmovsx") ||
3338 Name.starts_with(Prefix: "sse41.pmovzx") ||
3339 Name.starts_with(Prefix: "avx2.pmovsx") ||
3340 Name.starts_with(Prefix: "avx2.pmovzx") ||
3341 Name.starts_with(Prefix: "avx512.mask.pmovsx") ||
3342 Name.starts_with(Prefix: "avx512.mask.pmovzx")) {
3343 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3344 unsigned NumDstElts = DstTy->getNumElements();
3345
3346 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3347 SmallVector<int, 8> ShuffleMask(NumDstElts);
3348 for (unsigned i = 0; i != NumDstElts; ++i)
3349 ShuffleMask[i] = i;
3350
3351 Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3352
3353 bool DoSext = Name.contains(Other: "pmovsx");
3354 Rep =
3355 DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy);
3356 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3357 if (CI->arg_size() == 3)
3358 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3359 Op1: CI->getArgOperand(i: 1));
3360 } else if (Name == "avx512.mask.pmov.qd.256" ||
3361 Name == "avx512.mask.pmov.qd.512" ||
3362 Name == "avx512.mask.pmov.wb.256" ||
3363 Name == "avx512.mask.pmov.wb.512") {
3364 Type *Ty = CI->getArgOperand(i: 1)->getType();
3365 Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty);
3366 Rep =
3367 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3368 } else if (Name.starts_with(Prefix: "avx.vbroadcastf128") ||
3369 Name == "avx2.vbroadcasti128") {
3370 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3371 Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType();
3372 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3373 auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts);
3374 Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
3375 if (NumSrcElts == 2)
3376 Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1});
3377 else
3378 Rep = Builder.CreateShuffleVector(V: Load,
3379 Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3380 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i") ||
3381 Name.starts_with(Prefix: "avx512.mask.shuf.f")) {
3382 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3383 Type *VT = CI->getType();
3384 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3385 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3386 unsigned ControlBitsMask = NumLanes - 1;
3387 unsigned NumControlBits = NumLanes / 2;
3388 SmallVector<int, 8> ShuffleMask(0);
3389
3390 for (unsigned l = 0; l != NumLanes; ++l) {
3391 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3392 // We actually need the other source.
3393 if (l >= NumLanes / 2)
3394 LaneMask += NumLanes;
3395 for (unsigned i = 0; i != NumElementsInLane; ++i)
3396 ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i);
3397 }
3398 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3399 V2: CI->getArgOperand(i: 1), Mask: ShuffleMask);
3400 Rep =
3401 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3402 } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf") ||
3403 Name.starts_with(Prefix: "avx512.mask.broadcasti")) {
3404 unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType())
3405 ->getNumElements();
3406 unsigned NumDstElts =
3407 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3408
3409 SmallVector<int, 8> ShuffleMask(NumDstElts);
3410 for (unsigned i = 0; i != NumDstElts; ++i)
3411 ShuffleMask[i] = i % NumSrcElts;
3412
3413 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3414 V2: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3415 Rep =
3416 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3417 } else if (Name.starts_with(Prefix: "avx2.pbroadcast") ||
3418 Name.starts_with(Prefix: "avx2.vbroadcast") ||
3419 Name.starts_with(Prefix: "avx512.pbroadcast") ||
3420 Name.starts_with(Prefix: "avx512.mask.broadcast.s")) {
3421 // Replace vp?broadcasts with a vector shuffle.
3422 Value *Op = CI->getArgOperand(i: 0);
3423 ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount();
3424 Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC);
3425 SmallVector<int, 8> M;
3426 ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M);
3427 Rep = Builder.CreateShuffleVector(V: Op, Mask: M);
3428
3429 if (CI->arg_size() == 3)
3430 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3431 Op1: CI->getArgOperand(i: 1));
3432 } else if (Name.starts_with(Prefix: "sse2.padds.") ||
3433 Name.starts_with(Prefix: "avx2.padds.") ||
3434 Name.starts_with(Prefix: "avx512.padds.") ||
3435 Name.starts_with(Prefix: "avx512.mask.padds.")) {
3436 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat);
3437 } else if (Name.starts_with(Prefix: "sse2.psubs.") ||
3438 Name.starts_with(Prefix: "avx2.psubs.") ||
3439 Name.starts_with(Prefix: "avx512.psubs.") ||
3440 Name.starts_with(Prefix: "avx512.mask.psubs.")) {
3441 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat);
3442 } else if (Name.starts_with(Prefix: "sse2.paddus.") ||
3443 Name.starts_with(Prefix: "avx2.paddus.") ||
3444 Name.starts_with(Prefix: "avx512.mask.paddus.")) {
3445 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat);
3446 } else if (Name.starts_with(Prefix: "sse2.psubus.") ||
3447 Name.starts_with(Prefix: "avx2.psubus.") ||
3448 Name.starts_with(Prefix: "avx512.mask.psubus.")) {
3449 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat);
3450 } else if (Name.starts_with(Prefix: "avx512.mask.palignr.")) {
3451 Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0),
3452 Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2),
3453 Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3454 IsVALIGN: false);
3455 } else if (Name.starts_with(Prefix: "avx512.mask.valign.")) {
3456 Rep = upgradeX86ALIGNIntrinsics(
3457 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3458 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true);
3459 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3460 // 128/256-bit shift left specified in bits.
3461 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3462 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3463 Shift: Shift / 8); // Shift is in bits.
3464 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3465 // 128/256-bit shift right specified in bits.
3466 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3467 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3468 Shift: Shift / 8); // Shift is in bits.
3469 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3470 Name == "avx512.psll.dq.512") {
3471 // 128/256/512-bit shift left specified in bytes.
3472 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3473 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3474 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3475 Name == "avx512.psrl.dq.512") {
3476 // 128/256/512-bit shift right specified in bytes.
3477 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3478 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3479 } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp") ||
3480 Name.starts_with(Prefix: "avx.blend.p") || Name == "avx2.pblendw" ||
3481 Name.starts_with(Prefix: "avx2.pblendd.")) {
3482 Value *Op0 = CI->getArgOperand(i: 0);
3483 Value *Op1 = CI->getArgOperand(i: 1);
3484 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3485 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3486 unsigned NumElts = VecTy->getNumElements();
3487
3488 SmallVector<int, 16> Idxs(NumElts);
3489 for (unsigned i = 0; i != NumElts; ++i)
3490 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3491
3492 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3493 } else if (Name.starts_with(Prefix: "avx.vinsertf128.") ||
3494 Name == "avx2.vinserti128" ||
3495 Name.starts_with(Prefix: "avx512.mask.insert")) {
3496 Value *Op0 = CI->getArgOperand(i: 0);
3497 Value *Op1 = CI->getArgOperand(i: 1);
3498 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3499 unsigned DstNumElts =
3500 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3501 unsigned SrcNumElts =
3502 cast<FixedVectorType>(Val: Op1->getType())->getNumElements();
3503 unsigned Scale = DstNumElts / SrcNumElts;
3504
3505 // Mask off the high bits of the immediate value; hardware ignores those.
3506 Imm = Imm % Scale;
3507
3508 // Extend the second operand into a vector the size of the destination.
3509 SmallVector<int, 8> Idxs(DstNumElts);
3510 for (unsigned i = 0; i != SrcNumElts; ++i)
3511 Idxs[i] = i;
3512 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3513 Idxs[i] = SrcNumElts;
3514 Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs);
3515
3516 // Insert the second operand into the first operand.
3517
3518 // Note that there is no guarantee that instruction lowering will actually
3519 // produce a vinsertf128 instruction for the created shuffles. In
3520 // particular, the 0 immediate case involves no lane changes, so it can
3521 // be handled as a blend.
3522
3523 // Example of shuffle mask for 32-bit elements:
3524 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3525 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3526
3527 // First fill with identify mask.
3528 for (unsigned i = 0; i != DstNumElts; ++i)
3529 Idxs[i] = i;
3530 // Then replace the elements where we need to insert.
3531 for (unsigned i = 0; i != SrcNumElts; ++i)
3532 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3533 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs);
3534
3535 // If the intrinsic has a mask operand, handle that.
3536 if (CI->arg_size() == 5)
3537 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3538 Op1: CI->getArgOperand(i: 3));
3539 } else if (Name.starts_with(Prefix: "avx.vextractf128.") ||
3540 Name == "avx2.vextracti128" ||
3541 Name.starts_with(Prefix: "avx512.mask.vextract")) {
3542 Value *Op0 = CI->getArgOperand(i: 0);
3543 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3544 unsigned DstNumElts =
3545 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3546 unsigned SrcNumElts =
3547 cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
3548 unsigned Scale = SrcNumElts / DstNumElts;
3549
3550 // Mask off the high bits of the immediate value; hardware ignores those.
3551 Imm = Imm % Scale;
3552
3553 // Get indexes for the subvector of the input vector.
3554 SmallVector<int, 8> Idxs(DstNumElts);
3555 for (unsigned i = 0; i != DstNumElts; ++i) {
3556 Idxs[i] = i + (Imm * DstNumElts);
3557 }
3558 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3559
3560 // If the intrinsic has a mask operand, handle that.
3561 if (CI->arg_size() == 4)
3562 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3563 Op1: CI->getArgOperand(i: 2));
3564 } else if (Name.starts_with(Prefix: "avx512.mask.perm.df.") ||
3565 Name.starts_with(Prefix: "avx512.mask.perm.di.")) {
3566 Value *Op0 = CI->getArgOperand(i: 0);
3567 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3568 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3569 unsigned NumElts = VecTy->getNumElements();
3570
3571 SmallVector<int, 8> Idxs(NumElts);
3572 for (unsigned i = 0; i != NumElts; ++i)
3573 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3574
3575 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3576
3577 if (CI->arg_size() == 4)
3578 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3579 Op1: CI->getArgOperand(i: 2));
3580 } else if (Name.starts_with(Prefix: "avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3581 // The immediate permute control byte looks like this:
3582 // [1:0] - select 128 bits from sources for low half of destination
3583 // [2] - ignore
3584 // [3] - zero low half of destination
3585 // [5:4] - select 128 bits from sources for high half of destination
3586 // [6] - ignore
3587 // [7] - zero high half of destination
3588
3589 uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3590
3591 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3592 unsigned HalfSize = NumElts / 2;
3593 SmallVector<int, 8> ShuffleMask(NumElts);
3594
3595 // Determine which operand(s) are actually in use for this instruction.
3596 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3597 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3598
3599 // If needed, replace operands based on zero mask.
3600 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0;
3601 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1;
3602
3603 // Permute low half of result.
3604 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3605 for (unsigned i = 0; i < HalfSize; ++i)
3606 ShuffleMask[i] = StartIndex + i;
3607
3608 // Permute high half of result.
3609 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3610 for (unsigned i = 0; i < HalfSize; ++i)
3611 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3612
3613 Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask);
3614
3615 } else if (Name.starts_with(Prefix: "avx.vpermil.") || Name == "sse2.pshuf.d" ||
3616 Name.starts_with(Prefix: "avx512.mask.vpermil.p") ||
3617 Name.starts_with(Prefix: "avx512.mask.pshuf.d.")) {
3618 Value *Op0 = CI->getArgOperand(i: 0);
3619 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3620 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3621 unsigned NumElts = VecTy->getNumElements();
3622 // Calculate the size of each index in the immediate.
3623 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3624 unsigned IdxMask = ((1 << IdxSize) - 1);
3625
3626 SmallVector<int, 8> Idxs(NumElts);
3627 // Lookup the bits for this element, wrapping around the immediate every
3628 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3629 // to offset by the first index of each group.
3630 for (unsigned i = 0; i != NumElts; ++i)
3631 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3632
3633 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3634
3635 if (CI->arg_size() == 4)
3636 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3637 Op1: CI->getArgOperand(i: 2));
3638 } else if (Name == "sse2.pshufl.w" ||
3639 Name.starts_with(Prefix: "avx512.mask.pshufl.w.")) {
3640 Value *Op0 = CI->getArgOperand(i: 0);
3641 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3642 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3643
3644 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3645 reportFatalUsageErrorWithCI(reason: "Intrinsic has invalid signature", CI);
3646
3647 SmallVector<int, 16> Idxs(NumElts);
3648 for (unsigned l = 0; l != NumElts; l += 8) {
3649 for (unsigned i = 0; i != 4; ++i)
3650 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3651 for (unsigned i = 4; i != 8; ++i)
3652 Idxs[i + l] = i + l;
3653 }
3654
3655 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3656
3657 if (CI->arg_size() == 4)
3658 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3659 Op1: CI->getArgOperand(i: 2));
3660 } else if (Name == "sse2.pshufh.w" ||
3661 Name.starts_with(Prefix: "avx512.mask.pshufh.w.")) {
3662 Value *Op0 = CI->getArgOperand(i: 0);
3663 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3664 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3665
3666 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3667 reportFatalUsageErrorWithCI(reason: "Intrinsic has invalid signature", CI);
3668
3669 SmallVector<int, 16> Idxs(NumElts);
3670 for (unsigned l = 0; l != NumElts; l += 8) {
3671 for (unsigned i = 0; i != 4; ++i)
3672 Idxs[i + l] = i + l;
3673 for (unsigned i = 0; i != 4; ++i)
3674 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3675 }
3676
3677 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3678
3679 if (CI->arg_size() == 4)
3680 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3681 Op1: CI->getArgOperand(i: 2));
3682 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p")) {
3683 Value *Op0 = CI->getArgOperand(i: 0);
3684 Value *Op1 = CI->getArgOperand(i: 1);
3685 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3686 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3687
3688 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3689 unsigned HalfLaneElts = NumLaneElts / 2;
3690
3691 SmallVector<int, 16> Idxs(NumElts);
3692 for (unsigned i = 0; i != NumElts; ++i) {
3693 // Base index is the starting element of the lane.
3694 Idxs[i] = i - (i % NumLaneElts);
3695 // If we are half way through the lane switch to the other source.
3696 if ((i % NumLaneElts) >= HalfLaneElts)
3697 Idxs[i] += NumElts;
3698 // Now select the specific element. By adding HalfLaneElts bits from
3699 // the immediate. Wrapping around the immediate every 8-bits.
3700 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3701 }
3702
3703 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3704
3705 Rep =
3706 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3707 } else if (Name.starts_with(Prefix: "avx512.mask.movddup") ||
3708 Name.starts_with(Prefix: "avx512.mask.movshdup") ||
3709 Name.starts_with(Prefix: "avx512.mask.movsldup")) {
3710 Value *Op0 = CI->getArgOperand(i: 0);
3711 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3712 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3713
3714 unsigned Offset = 0;
3715 if (Name.starts_with(Prefix: "avx512.mask.movshdup."))
3716 Offset = 1;
3717
3718 SmallVector<int, 16> Idxs(NumElts);
3719 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3720 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3721 Idxs[i + l + 0] = i + l + Offset;
3722 Idxs[i + l + 1] = i + l + Offset;
3723 }
3724
3725 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3726
3727 Rep =
3728 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3729 } else if (Name.starts_with(Prefix: "avx512.mask.punpckl") ||
3730 Name.starts_with(Prefix: "avx512.mask.unpckl.")) {
3731 Value *Op0 = CI->getArgOperand(i: 0);
3732 Value *Op1 = CI->getArgOperand(i: 1);
3733 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3734 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3735
3736 SmallVector<int, 64> Idxs(NumElts);
3737 for (int l = 0; l != NumElts; l += NumLaneElts)
3738 for (int i = 0; i != NumLaneElts; ++i)
3739 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3740
3741 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3742
3743 Rep =
3744 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3745 } else if (Name.starts_with(Prefix: "avx512.mask.punpckh") ||
3746 Name.starts_with(Prefix: "avx512.mask.unpckh.")) {
3747 Value *Op0 = CI->getArgOperand(i: 0);
3748 Value *Op1 = CI->getArgOperand(i: 1);
3749 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3750 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3751
3752 SmallVector<int, 64> Idxs(NumElts);
3753 for (int l = 0; l != NumElts; l += NumLaneElts)
3754 for (int i = 0; i != NumLaneElts; ++i)
3755 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3756
3757 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3758
3759 Rep =
3760 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3761 } else if (Name.starts_with(Prefix: "avx512.mask.and.") ||
3762 Name.starts_with(Prefix: "avx512.mask.pand.")) {
3763 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3764 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3765 Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3766 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3767 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3768 Rep =
3769 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3770 } else if (Name.starts_with(Prefix: "avx512.mask.andn.") ||
3771 Name.starts_with(Prefix: "avx512.mask.pandn.")) {
3772 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3773 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3774 Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy));
3775 Rep = Builder.CreateAnd(LHS: Rep,
3776 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3777 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3778 Rep =
3779 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3780 } else if (Name.starts_with(Prefix: "avx512.mask.or.") ||
3781 Name.starts_with(Prefix: "avx512.mask.por.")) {
3782 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3783 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3784 Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3785 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3786 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3787 Rep =
3788 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3789 } else if (Name.starts_with(Prefix: "avx512.mask.xor.") ||
3790 Name.starts_with(Prefix: "avx512.mask.pxor.")) {
3791 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3792 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3793 Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3794 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3795 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3796 Rep =
3797 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3798 } else if (Name.starts_with(Prefix: "avx512.mask.padd.")) {
3799 Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3800 Rep =
3801 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3802 } else if (Name.starts_with(Prefix: "avx512.mask.psub.")) {
3803 Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3804 Rep =
3805 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3806 } else if (Name.starts_with(Prefix: "avx512.mask.pmull.")) {
3807 Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3808 Rep =
3809 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3810 } else if (Name.starts_with(Prefix: "avx512.mask.add.p")) {
3811 if (Name.ends_with(Suffix: ".512")) {
3812 Intrinsic::ID IID;
3813 if (Name[17] == 's')
3814 IID = Intrinsic::x86_avx512_add_ps_512;
3815 else
3816 IID = Intrinsic::x86_avx512_add_pd_512;
3817
3818 Rep = Builder.CreateIntrinsic(
3819 ID: IID,
3820 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3821 } else {
3822 Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3823 }
3824 Rep =
3825 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3826 } else if (Name.starts_with(Prefix: "avx512.mask.div.p")) {
3827 if (Name.ends_with(Suffix: ".512")) {
3828 Intrinsic::ID IID;
3829 if (Name[17] == 's')
3830 IID = Intrinsic::x86_avx512_div_ps_512;
3831 else
3832 IID = Intrinsic::x86_avx512_div_pd_512;
3833
3834 Rep = Builder.CreateIntrinsic(
3835 ID: IID,
3836 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3837 } else {
3838 Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3839 }
3840 Rep =
3841 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3842 } else if (Name.starts_with(Prefix: "avx512.mask.mul.p")) {
3843 if (Name.ends_with(Suffix: ".512")) {
3844 Intrinsic::ID IID;
3845 if (Name[17] == 's')
3846 IID = Intrinsic::x86_avx512_mul_ps_512;
3847 else
3848 IID = Intrinsic::x86_avx512_mul_pd_512;
3849
3850 Rep = Builder.CreateIntrinsic(
3851 ID: IID,
3852 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3853 } else {
3854 Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3855 }
3856 Rep =
3857 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3858 } else if (Name.starts_with(Prefix: "avx512.mask.sub.p")) {
3859 if (Name.ends_with(Suffix: ".512")) {
3860 Intrinsic::ID IID;
3861 if (Name[17] == 's')
3862 IID = Intrinsic::x86_avx512_sub_ps_512;
3863 else
3864 IID = Intrinsic::x86_avx512_sub_pd_512;
3865
3866 Rep = Builder.CreateIntrinsic(
3867 ID: IID,
3868 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3869 } else {
3870 Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3871 }
3872 Rep =
3873 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3874 } else if ((Name.starts_with(Prefix: "avx512.mask.max.p") ||
3875 Name.starts_with(Prefix: "avx512.mask.min.p")) &&
3876 Name.drop_front(N: 18) == ".512") {
3877 bool IsDouble = Name[17] == 'd';
3878 bool IsMin = Name[13] == 'i';
3879 static const Intrinsic::ID MinMaxTbl[2][2] = {
3880 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3881 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3882 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3883
3884 Rep = Builder.CreateIntrinsic(
3885 ID: IID,
3886 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3887 Rep =
3888 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3889 } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt.")) {
3890 Rep =
3891 Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: CI->getType(),
3892 Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)});
3893 Rep =
3894 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3895 } else if (Name.starts_with(Prefix: "avx512.mask.psll")) {
3896 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3897 bool IsVariable = Name[16] == 'v';
3898 char Size = Name[16] == '.' ? Name[17]
3899 : Name[17] == '.' ? Name[18]
3900 : Name[18] == '.' ? Name[19]
3901 : Name[20];
3902
3903 Intrinsic::ID IID;
3904 if (IsVariable && Name[17] != '.') {
3905 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3906 IID = Intrinsic::x86_avx2_psllv_q;
3907 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3908 IID = Intrinsic::x86_avx2_psllv_q_256;
3909 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3910 IID = Intrinsic::x86_avx2_psllv_d;
3911 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3912 IID = Intrinsic::x86_avx2_psllv_d_256;
3913 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3914 IID = Intrinsic::x86_avx512_psllv_w_128;
3915 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3916 IID = Intrinsic::x86_avx512_psllv_w_256;
3917 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3918 IID = Intrinsic::x86_avx512_psllv_w_512;
3919 else
3920 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3921 } else if (Name.ends_with(Suffix: ".128")) {
3922 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3923 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3924 : Intrinsic::x86_sse2_psll_d;
3925 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3926 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3927 : Intrinsic::x86_sse2_psll_q;
3928 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3929 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3930 : Intrinsic::x86_sse2_psll_w;
3931 else
3932 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3933 } else if (Name.ends_with(Suffix: ".256")) {
3934 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3935 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3936 : Intrinsic::x86_avx2_psll_d;
3937 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3938 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3939 : Intrinsic::x86_avx2_psll_q;
3940 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3941 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3942 : Intrinsic::x86_avx2_psll_w;
3943 else
3944 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3945 } else {
3946 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3947 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3948 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3949 : Intrinsic::x86_avx512_psll_d_512;
3950 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3951 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3952 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3953 : Intrinsic::x86_avx512_psll_q_512;
3954 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3955 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3956 : Intrinsic::x86_avx512_psll_w_512;
3957 else
3958 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3959 }
3960
3961 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3962 } else if (Name.starts_with(Prefix: "avx512.mask.psrl")) {
3963 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3964 bool IsVariable = Name[16] == 'v';
3965 char Size = Name[16] == '.' ? Name[17]
3966 : Name[17] == '.' ? Name[18]
3967 : Name[18] == '.' ? Name[19]
3968 : Name[20];
3969
3970 Intrinsic::ID IID;
3971 if (IsVariable && Name[17] != '.') {
3972 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3973 IID = Intrinsic::x86_avx2_psrlv_q;
3974 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3975 IID = Intrinsic::x86_avx2_psrlv_q_256;
3976 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3977 IID = Intrinsic::x86_avx2_psrlv_d;
3978 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3979 IID = Intrinsic::x86_avx2_psrlv_d_256;
3980 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3981 IID = Intrinsic::x86_avx512_psrlv_w_128;
3982 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3983 IID = Intrinsic::x86_avx512_psrlv_w_256;
3984 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3985 IID = Intrinsic::x86_avx512_psrlv_w_512;
3986 else
3987 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
3988 } else if (Name.ends_with(Suffix: ".128")) {
3989 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3990 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3991 : Intrinsic::x86_sse2_psrl_d;
3992 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3993 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3994 : Intrinsic::x86_sse2_psrl_q;
3995 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3996 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3997 : Intrinsic::x86_sse2_psrl_w;
3998 else
3999 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4000 } else if (Name.ends_with(Suffix: ".256")) {
4001 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4002 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4003 : Intrinsic::x86_avx2_psrl_d;
4004 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4005 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4006 : Intrinsic::x86_avx2_psrl_q;
4007 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4008 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4009 : Intrinsic::x86_avx2_psrl_w;
4010 else
4011 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4012 } else {
4013 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4014 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4015 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4016 : Intrinsic::x86_avx512_psrl_d_512;
4017 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4018 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4019 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4020 : Intrinsic::x86_avx512_psrl_q_512;
4021 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4022 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4023 : Intrinsic::x86_avx512_psrl_w_512;
4024 else
4025 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4026 }
4027
4028 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4029 } else if (Name.starts_with(Prefix: "avx512.mask.psra")) {
4030 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4031 bool IsVariable = Name[16] == 'v';
4032 char Size = Name[16] == '.' ? Name[17]
4033 : Name[17] == '.' ? Name[18]
4034 : Name[18] == '.' ? Name[19]
4035 : Name[20];
4036
4037 Intrinsic::ID IID;
4038 if (IsVariable && Name[17] != '.') {
4039 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4040 IID = Intrinsic::x86_avx2_psrav_d;
4041 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4042 IID = Intrinsic::x86_avx2_psrav_d_256;
4043 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4044 IID = Intrinsic::x86_avx512_psrav_w_128;
4045 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4046 IID = Intrinsic::x86_avx512_psrav_w_256;
4047 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4048 IID = Intrinsic::x86_avx512_psrav_w_512;
4049 else
4050 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4051 } else if (Name.ends_with(Suffix: ".128")) {
4052 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4053 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4054 : Intrinsic::x86_sse2_psra_d;
4055 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4056 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4057 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4058 : Intrinsic::x86_avx512_psra_q_128;
4059 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4060 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4061 : Intrinsic::x86_sse2_psra_w;
4062 else
4063 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4064 } else if (Name.ends_with(Suffix: ".256")) {
4065 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4066 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4067 : Intrinsic::x86_avx2_psra_d;
4068 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4069 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4070 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4071 : Intrinsic::x86_avx512_psra_q_256;
4072 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4073 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4074 : Intrinsic::x86_avx2_psra_w;
4075 else
4076 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4077 } else {
4078 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4079 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4080 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4081 : Intrinsic::x86_avx512_psra_d_512;
4082 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4083 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4084 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4085 : Intrinsic::x86_avx512_psra_q_512;
4086 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4087 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4088 : Intrinsic::x86_avx512_psra_w_512;
4089 else
4090 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected size", CI);
4091 }
4092
4093 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4094 } else if (Name.starts_with(Prefix: "avx512.mask.move.s")) {
4095 Rep = upgradeMaskedMove(Builder, CI&: *CI);
4096 } else if (Name.starts_with(Prefix: "avx512.cvtmask2")) {
4097 Rep = upgradeMaskToInt(Builder, CI&: *CI);
4098 } else if (Name.ends_with(Suffix: ".movntdqa")) {
4099 MDNode *Node = MDNode::get(
4100 Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
4101
4102 LoadInst *LI = Builder.CreateAlignedLoad(
4103 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0),
4104 Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
4105 LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
4106 Rep = LI;
4107 } else if (Name.starts_with(Prefix: "fma.vfmadd.") ||
4108 Name.starts_with(Prefix: "fma.vfmsub.") ||
4109 Name.starts_with(Prefix: "fma.vfnmadd.") ||
4110 Name.starts_with(Prefix: "fma.vfnmsub.")) {
4111 bool NegMul = Name[6] == 'n';
4112 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4113 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4114
4115 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4116 CI->getArgOperand(i: 2)};
4117
4118 if (IsScalar) {
4119 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4120 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4121 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4122 }
4123
4124 if (NegMul && !IsScalar)
4125 Ops[0] = Builder.CreateFNeg(V: Ops[0]);
4126 if (NegMul && IsScalar)
4127 Ops[1] = Builder.CreateFNeg(V: Ops[1]);
4128 if (NegAcc)
4129 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4130
4131 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
4132
4133 if (IsScalar)
4134 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
4135 } else if (Name.starts_with(Prefix: "fma4.vfmadd.s")) {
4136 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4137 CI->getArgOperand(i: 2)};
4138
4139 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4140 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4141 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4142
4143 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
4144
4145 Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()),
4146 NewElt: Rep, Idx: (uint64_t)0);
4147 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s") ||
4148 Name.starts_with(Prefix: "avx512.maskz.vfmadd.s") ||
4149 Name.starts_with(Prefix: "avx512.mask3.vfmadd.s") ||
4150 Name.starts_with(Prefix: "avx512.mask3.vfmsub.s") ||
4151 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s")) {
4152 bool IsMask3 = Name[11] == '3';
4153 bool IsMaskZ = Name[11] == 'z';
4154 // Drop the "avx512.mask." to make it easier.
4155 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4156 bool NegMul = Name[2] == 'n';
4157 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4158
4159 Value *A = CI->getArgOperand(i: 0);
4160 Value *B = CI->getArgOperand(i: 1);
4161 Value *C = CI->getArgOperand(i: 2);
4162
4163 if (NegMul && (IsMask3 || IsMaskZ))
4164 A = Builder.CreateFNeg(V: A);
4165 if (NegMul && !(IsMask3 || IsMaskZ))
4166 B = Builder.CreateFNeg(V: B);
4167 if (NegAcc)
4168 C = Builder.CreateFNeg(V: C);
4169
4170 A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0);
4171 B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
4172 C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0);
4173
4174 if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4175 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) {
4176 Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)};
4177
4178 Intrinsic::ID IID;
4179 if (Name.back() == 'd')
4180 IID = Intrinsic::x86_avx512_vfmadd_f64;
4181 else
4182 IID = Intrinsic::x86_avx512_vfmadd_f32;
4183 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4184 } else {
4185 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4186 }
4187
4188 Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType())
4189 : IsMask3 ? C
4190 : A;
4191
4192 // For Mask3 with NegAcc, we need to create a new extractelement that
4193 // avoids the negation above.
4194 if (NegAcc && IsMask3)
4195 PassThru =
4196 Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0);
4197
4198 Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4199 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep,
4200 Idx: (uint64_t)0);
4201 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p") ||
4202 Name.starts_with(Prefix: "avx512.mask.vfnmadd.p") ||
4203 Name.starts_with(Prefix: "avx512.mask.vfnmsub.p") ||
4204 Name.starts_with(Prefix: "avx512.mask3.vfmadd.p") ||
4205 Name.starts_with(Prefix: "avx512.mask3.vfmsub.p") ||
4206 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p") ||
4207 Name.starts_with(Prefix: "avx512.maskz.vfmadd.p")) {
4208 bool IsMask3 = Name[11] == '3';
4209 bool IsMaskZ = Name[11] == 'z';
4210 // Drop the "avx512.mask." to make it easier.
4211 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4212 bool NegMul = Name[2] == 'n';
4213 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4214
4215 Value *A = CI->getArgOperand(i: 0);
4216 Value *B = CI->getArgOperand(i: 1);
4217 Value *C = CI->getArgOperand(i: 2);
4218
4219 if (NegMul && (IsMask3 || IsMaskZ))
4220 A = Builder.CreateFNeg(V: A);
4221 if (NegMul && !(IsMask3 || IsMaskZ))
4222 B = Builder.CreateFNeg(V: B);
4223 if (NegAcc)
4224 C = Builder.CreateFNeg(V: C);
4225
4226 if (CI->arg_size() == 5 &&
4227 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4228 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) {
4229 Intrinsic::ID IID;
4230 // Check the character before ".512" in string.
4231 if (Name[Name.size() - 5] == 's')
4232 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4233 else
4234 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4235
4236 Rep = Builder.CreateIntrinsic(ID: IID, Args: {A, B, C, CI->getArgOperand(i: 4)});
4237 } else {
4238 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4239 }
4240
4241 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4242 : IsMask3 ? CI->getArgOperand(i: 2)
4243 : CI->getArgOperand(i: 0);
4244
4245 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4246 } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p")) {
4247 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4248 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4249 Intrinsic::ID IID;
4250 if (VecWidth == 128 && EltWidth == 32)
4251 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4252 else if (VecWidth == 256 && EltWidth == 32)
4253 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4254 else if (VecWidth == 128 && EltWidth == 64)
4255 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4256 else if (VecWidth == 256 && EltWidth == 64)
4257 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4258 else
4259 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4260
4261 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4262 CI->getArgOperand(i: 2)};
4263 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4264 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4265 } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p") ||
4266 Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p") ||
4267 Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p") ||
4268 Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p")) {
4269 bool IsMask3 = Name[11] == '3';
4270 bool IsMaskZ = Name[11] == 'z';
4271 // Drop the "avx512.mask." to make it easier.
4272 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4273 bool IsSubAdd = Name[3] == 's';
4274 if (CI->arg_size() == 5) {
4275 Intrinsic::ID IID;
4276 // Check the character before ".512" in string.
4277 if (Name[Name.size() - 5] == 's')
4278 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4279 else
4280 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4281
4282 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4283 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
4284 if (IsSubAdd)
4285 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4286
4287 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4288 } else {
4289 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4290
4291 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4292 CI->getArgOperand(i: 2)};
4293
4294 Function *FMA = Intrinsic::getOrInsertDeclaration(
4295 M: CI->getModule(), id: Intrinsic::fma, Tys: Ops[0]->getType());
4296 Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops);
4297 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4298 Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops);
4299
4300 if (IsSubAdd)
4301 std::swap(a&: Even, b&: Odd);
4302
4303 SmallVector<int, 32> Idxs(NumElts);
4304 for (int i = 0; i != NumElts; ++i)
4305 Idxs[i] = i + (i % 2) * NumElts;
4306
4307 Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs);
4308 }
4309
4310 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4311 : IsMask3 ? CI->getArgOperand(i: 2)
4312 : CI->getArgOperand(i: 0);
4313
4314 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4315 } else if (Name.starts_with(Prefix: "avx512.mask.pternlog.") ||
4316 Name.starts_with(Prefix: "avx512.maskz.pternlog.")) {
4317 bool ZeroMask = Name[11] == 'z';
4318 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4319 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4320 Intrinsic::ID IID;
4321 if (VecWidth == 128 && EltWidth == 32)
4322 IID = Intrinsic::x86_avx512_pternlog_d_128;
4323 else if (VecWidth == 256 && EltWidth == 32)
4324 IID = Intrinsic::x86_avx512_pternlog_d_256;
4325 else if (VecWidth == 512 && EltWidth == 32)
4326 IID = Intrinsic::x86_avx512_pternlog_d_512;
4327 else if (VecWidth == 128 && EltWidth == 64)
4328 IID = Intrinsic::x86_avx512_pternlog_q_128;
4329 else if (VecWidth == 256 && EltWidth == 64)
4330 IID = Intrinsic::x86_avx512_pternlog_q_256;
4331 else if (VecWidth == 512 && EltWidth == 64)
4332 IID = Intrinsic::x86_avx512_pternlog_q_512;
4333 else
4334 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4335
4336 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4337 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)};
4338 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4339 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4340 : CI->getArgOperand(i: 0);
4341 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru);
4342 } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52") ||
4343 Name.starts_with(Prefix: "avx512.maskz.vpmadd52")) {
4344 bool ZeroMask = Name[11] == 'z';
4345 bool High = Name[20] == 'h' || Name[21] == 'h';
4346 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4347 Intrinsic::ID IID;
4348 if (VecWidth == 128 && !High)
4349 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4350 else if (VecWidth == 256 && !High)
4351 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4352 else if (VecWidth == 512 && !High)
4353 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4354 else if (VecWidth == 128 && High)
4355 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4356 else if (VecWidth == 256 && High)
4357 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4358 else if (VecWidth == 512 && High)
4359 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4360 else
4361 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4362
4363 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4364 CI->getArgOperand(i: 2)};
4365 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4366 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4367 : CI->getArgOperand(i: 0);
4368 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4369 } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var.") ||
4370 Name.starts_with(Prefix: "avx512.mask.vpermt2var.") ||
4371 Name.starts_with(Prefix: "avx512.maskz.vpermt2var.")) {
4372 bool ZeroMask = Name[11] == 'z';
4373 bool IndexForm = Name[17] == 'i';
4374 Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm);
4375 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd.") ||
4376 Name.starts_with(Prefix: "avx512.maskz.vpdpbusd.") ||
4377 Name.starts_with(Prefix: "avx512.mask.vpdpbusds.") ||
4378 Name.starts_with(Prefix: "avx512.maskz.vpdpbusds.")) {
4379 bool ZeroMask = Name[11] == 'z';
4380 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4381 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4382 Intrinsic::ID IID;
4383 if (VecWidth == 128 && !IsSaturating)
4384 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4385 else if (VecWidth == 256 && !IsSaturating)
4386 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4387 else if (VecWidth == 512 && !IsSaturating)
4388 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4389 else if (VecWidth == 128 && IsSaturating)
4390 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4391 else if (VecWidth == 256 && IsSaturating)
4392 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4393 else if (VecWidth == 512 && IsSaturating)
4394 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4395 else
4396 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4397
4398 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4399 CI->getArgOperand(i: 2)};
4400
4401 // Input arguments types were incorrectly set to vectors of i32 before but
4402 // they should be vectors of i8. Insert bit cast when encountering the old
4403 // types
4404 if (Args[1]->getType()->isVectorTy() &&
4405 cast<VectorType>(Val: Args[1]->getType())
4406 ->getElementType()
4407 ->isIntegerTy(Bitwidth: 32) &&
4408 Args[2]->getType()->isVectorTy() &&
4409 cast<VectorType>(Val: Args[2]->getType())
4410 ->getElementType()
4411 ->isIntegerTy(Bitwidth: 32)) {
4412 Type *NewArgType = nullptr;
4413 if (VecWidth == 128)
4414 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 16, Scalable: false);
4415 else if (VecWidth == 256)
4416 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 32, Scalable: false);
4417 else if (VecWidth == 512)
4418 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 64, Scalable: false);
4419 else
4420 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected vector bit width",
4421 CI);
4422
4423 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4424 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4425 }
4426
4427 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4428 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4429 : CI->getArgOperand(i: 0);
4430 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4431 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd.") ||
4432 Name.starts_with(Prefix: "avx512.maskz.vpdpwssd.") ||
4433 Name.starts_with(Prefix: "avx512.mask.vpdpwssds.") ||
4434 Name.starts_with(Prefix: "avx512.maskz.vpdpwssds.")) {
4435 bool ZeroMask = Name[11] == 'z';
4436 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4437 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4438 Intrinsic::ID IID;
4439 if (VecWidth == 128 && !IsSaturating)
4440 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4441 else if (VecWidth == 256 && !IsSaturating)
4442 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4443 else if (VecWidth == 512 && !IsSaturating)
4444 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4445 else if (VecWidth == 128 && IsSaturating)
4446 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4447 else if (VecWidth == 256 && IsSaturating)
4448 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4449 else if (VecWidth == 512 && IsSaturating)
4450 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4451 else
4452 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4453
4454 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4455 CI->getArgOperand(i: 2)};
4456
4457 // Input arguments types were incorrectly set to vectors of i32 before but
4458 // they should be vectors of i16. Insert bit cast when encountering the old
4459 // types
4460 if (Args[1]->getType()->isVectorTy() &&
4461 cast<VectorType>(Val: Args[1]->getType())
4462 ->getElementType()
4463 ->isIntegerTy(Bitwidth: 32) &&
4464 Args[2]->getType()->isVectorTy() &&
4465 cast<VectorType>(Val: Args[2]->getType())
4466 ->getElementType()
4467 ->isIntegerTy(Bitwidth: 32)) {
4468 Type *NewArgType = nullptr;
4469 if (VecWidth == 128)
4470 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 8, Scalable: false);
4471 else if (VecWidth == 256)
4472 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 16, Scalable: false);
4473 else if (VecWidth == 512)
4474 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 32, Scalable: false);
4475 else
4476 reportFatalUsageErrorWithCI(reason: "Intrinsic has unexpected vector bit width",
4477 CI);
4478
4479 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4480 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4481 }
4482
4483 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4484 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4485 : CI->getArgOperand(i: 0);
4486 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4487 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4488 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4489 Name == "subborrow.u32" || Name == "subborrow.u64") {
4490 Intrinsic::ID IID;
4491 if (Name[0] == 'a' && Name.back() == '2')
4492 IID = Intrinsic::x86_addcarry_32;
4493 else if (Name[0] == 'a' && Name.back() == '4')
4494 IID = Intrinsic::x86_addcarry_64;
4495 else if (Name[0] == 's' && Name.back() == '2')
4496 IID = Intrinsic::x86_subborrow_32;
4497 else if (Name[0] == 's' && Name.back() == '4')
4498 IID = Intrinsic::x86_subborrow_64;
4499 else
4500 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4501
4502 // Make a call with 3 operands.
4503 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4504 CI->getArgOperand(i: 2)};
4505 Value *NewCall = Builder.CreateIntrinsic(ID: IID, Args);
4506
4507 // Extract the second result and store it.
4508 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4509 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 3), Align: Align(1));
4510 // Replace the original call result with the first result of the new call.
4511 Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4512
4513 CI->replaceAllUsesWith(V: CF);
4514 Rep = nullptr;
4515 } else if (Name.starts_with(Prefix: "avx512.mask.") &&
4516 upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) {
4517 // Rep will be updated by the call in the condition.
4518 } else
4519 reportFatalUsageErrorWithCI(reason: "Unexpected intrinsic", CI);
4520
4521 return Rep;
4522}
4523
4524static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4525 Function *F, IRBuilder<> &Builder) {
4526 if (Name.starts_with(Prefix: "neon.bfcvt")) {
4527 if (Name.starts_with(Prefix: "neon.bfcvtn2")) {
4528 SmallVector<int, 32> LoMask(4);
4529 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
4530 SmallVector<int, 32> ConcatMask(8);
4531 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4532 Value *Inactive = Builder.CreateShuffleVector(V: CI->getOperand(i_nocapture: 0), Mask: LoMask);
4533 Value *Trunc =
4534 Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 1), DestTy: Inactive->getType());
4535 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
4536 } else if (Name.starts_with(Prefix: "neon.bfcvtn")) {
4537 SmallVector<int, 32> ConcatMask(8);
4538 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4539 Type *V4BF16 =
4540 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: F->getContext()), NumElts: 4);
4541 Value *Trunc = Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), DestTy: V4BF16);
4542 dbgs() << "Trunc: " << *Trunc << "\n";
4543 return Builder.CreateShuffleVector(
4544 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
4545 } else {
4546 return Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0),
4547 DestTy: Type::getBFloatTy(C&: F->getContext()));
4548 }
4549 } else if (Name.starts_with(Prefix: "sve.fcvt")) {
4550 Intrinsic::ID NewID =
4551 StringSwitch<Intrinsic::ID>(Name)
4552 .Case(S: "sve.fcvt.bf16f32", Value: Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4553 .Case(S: "sve.fcvtnt.bf16f32",
4554 Value: Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4555 .Default(Value: Intrinsic::not_intrinsic);
4556 if (NewID == Intrinsic::not_intrinsic)
4557 llvm_unreachable("Unhandled Intrinsic!");
4558
4559 SmallVector<Value *, 3> Args(CI->args());
4560
4561 // The original intrinsics incorrectly used a predicate based on the
4562 // smallest element type rather than the largest.
4563 Type *BadPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
4564 Type *GoodPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
4565
4566 if (Args[1]->getType() != BadPredTy)
4567 llvm_unreachable("Unexpected predicate type!");
4568
4569 Args[1] = Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_to_svbool,
4570 Types: BadPredTy, Args: Args[1]);
4571 Args[1] = Builder.CreateIntrinsic(
4572 ID: Intrinsic::aarch64_sve_convert_from_svbool, Types: GoodPredTy, Args: Args[1]);
4573
4574 return Builder.CreateIntrinsic(ID: NewID, Args, /*FMFSource=*/nullptr,
4575 Name: CI->getName());
4576 }
4577
4578 llvm_unreachable("Unhandled Intrinsic!");
4579}
4580
4581static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4582 IRBuilder<> &Builder) {
4583 if (Name == "mve.vctp64.old") {
4584 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4585 // correct type.
4586 Value *VCTP = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_vctp64, Types: {},
4587 Args: CI->getArgOperand(i: 0),
4588 /*FMFSource=*/nullptr, Name: CI->getName());
4589 Value *C1 = Builder.CreateIntrinsic(
4590 ID: Intrinsic::arm_mve_pred_v2i,
4591 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}, Args: VCTP);
4592 return Builder.CreateIntrinsic(
4593 ID: Intrinsic::arm_mve_pred_i2v,
4594 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: C1);
4595 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4596 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4597 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4598 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4599 Name ==
4600 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4601 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4602 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4603 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4604 Name ==
4605 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4606 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4607 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4608 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4609 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4610 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4611 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4612 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4613 std::vector<Type *> Tys;
4614 unsigned ID = CI->getIntrinsicID();
4615 Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2);
4616 switch (ID) {
4617 case Intrinsic::arm_mve_mull_int_predicated:
4618 case Intrinsic::arm_mve_vqdmull_predicated:
4619 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4620 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty};
4621 break;
4622 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4623 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4624 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4625 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4626 V2I1Ty};
4627 break;
4628 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4629 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4630 CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4631 break;
4632 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4633 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(),
4634 CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty};
4635 break;
4636 case Intrinsic::arm_cde_vcx1q_predicated:
4637 case Intrinsic::arm_cde_vcx1qa_predicated:
4638 case Intrinsic::arm_cde_vcx2q_predicated:
4639 case Intrinsic::arm_cde_vcx2qa_predicated:
4640 case Intrinsic::arm_cde_vcx3q_predicated:
4641 case Intrinsic::arm_cde_vcx3qa_predicated:
4642 Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4643 break;
4644 default:
4645 llvm_unreachable("Unhandled Intrinsic!");
4646 }
4647
4648 std::vector<Value *> Ops;
4649 for (Value *Op : CI->args()) {
4650 Type *Ty = Op->getType();
4651 if (Ty->getScalarSizeInBits() == 1) {
4652 Value *C1 = Builder.CreateIntrinsic(
4653 ID: Intrinsic::arm_mve_pred_v2i,
4654 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: Op);
4655 Op = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_pred_i2v, Types: {V2I1Ty}, Args: C1);
4656 }
4657 Ops.push_back(x: Op);
4658 }
4659
4660 return Builder.CreateIntrinsic(ID, Types: Tys, Args: Ops, /*FMFSource=*/nullptr,
4661 Name: CI->getName());
4662 }
4663 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4664}
4665
4666// These are expected to have the arguments:
4667// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4668//
4669// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4670//
4671static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4672 Function *F, IRBuilder<> &Builder) {
4673 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4674 // for compatibility.
4675 auto UpgradeLegacyWMMAIUIntrinsicCall =
4676 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4677 ArrayRef<Type *> OverloadTys) -> Value * {
4678 // Prepare arguments, append clamp=0 for compatibility
4679 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4680 Args.push_back(Elt: Builder.getFalse());
4681
4682 // Insert the declaration for the right overload types
4683 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
4684 M: F->getParent(), id: F->getIntrinsicID(), Tys: OverloadTys);
4685
4686 // Copy operand bundles if any
4687 SmallVector<OperandBundleDef, 1> Bundles;
4688 CI->getOperandBundlesAsDefs(Defs&: Bundles);
4689
4690 // Create the new call and copy calling properties
4691 auto *NewCall = cast<CallInst>(Val: Builder.CreateCall(Callee: NewDecl, Args, OpBundles: Bundles));
4692 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4693 NewCall->setCallingConv(CI->getCallingConv());
4694 NewCall->setAttributes(CI->getAttributes());
4695 NewCall->setDebugLoc(CI->getDebugLoc());
4696 NewCall->copyMetadata(SrcInst: *CI);
4697 return NewCall;
4698 };
4699
4700 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4701 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4702 "intrinsic should have 7 arguments");
4703 Type *T1 = CI->getArgOperand(i: 4)->getType();
4704 Type *T2 = CI->getArgOperand(i: 1)->getType();
4705 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4706 }
4707 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4708 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4709 "intrinsic should have 8 arguments");
4710 Type *T1 = CI->getArgOperand(i: 4)->getType();
4711 Type *T2 = CI->getArgOperand(i: 1)->getType();
4712 Type *T3 = CI->getArgOperand(i: 3)->getType();
4713 Type *T4 = CI->getArgOperand(i: 5)->getType();
4714 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4715 }
4716
4717 AtomicRMWInst::BinOp RMWOp =
4718 StringSwitch<AtomicRMWInst::BinOp>(Name)
4719 .StartsWith(S: "ds.fadd", Value: AtomicRMWInst::FAdd)
4720 .StartsWith(S: "ds.fmin", Value: AtomicRMWInst::FMin)
4721 .StartsWith(S: "ds.fmax", Value: AtomicRMWInst::FMax)
4722 .StartsWith(S: "atomic.inc.", Value: AtomicRMWInst::UIncWrap)
4723 .StartsWith(S: "atomic.dec.", Value: AtomicRMWInst::UDecWrap)
4724 .StartsWith(S: "global.atomic.fadd", Value: AtomicRMWInst::FAdd)
4725 .StartsWith(S: "flat.atomic.fadd", Value: AtomicRMWInst::FAdd)
4726 .StartsWith(S: "global.atomic.fmin", Value: AtomicRMWInst::FMin)
4727 .StartsWith(S: "flat.atomic.fmin", Value: AtomicRMWInst::FMin)
4728 .StartsWith(S: "global.atomic.fmax", Value: AtomicRMWInst::FMax)
4729 .StartsWith(S: "flat.atomic.fmax", Value: AtomicRMWInst::FMax)
4730 .StartsWith(S: "atomic.cond.sub", Value: AtomicRMWInst::USubCond)
4731 .StartsWith(S: "atomic.csub", Value: AtomicRMWInst::USubSat);
4732
4733 unsigned NumOperands = CI->getNumOperands();
4734 if (NumOperands < 3) // Malformed bitcode.
4735 return nullptr;
4736
4737 Value *Ptr = CI->getArgOperand(i: 0);
4738 PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType());
4739 if (!PtrTy) // Malformed.
4740 return nullptr;
4741
4742 Value *Val = CI->getArgOperand(i: 1);
4743 if (Val->getType() != CI->getType()) // Malformed.
4744 return nullptr;
4745
4746 ConstantInt *OrderArg = nullptr;
4747 bool IsVolatile = false;
4748
4749 // These should have 5 arguments (plus the callee). A separate version of the
4750 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4751 if (NumOperands > 3)
4752 OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4753
4754 // Ignore scope argument at 3
4755
4756 if (NumOperands > 5) {
4757 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4));
4758 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4759 }
4760
4761 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4762 if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue()))
4763 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4764 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4765 Order = AtomicOrdering::SequentiallyConsistent;
4766
4767 LLVMContext &Ctx = F->getContext();
4768
4769 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4770 Type *RetTy = CI->getType();
4771 if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) {
4772 if (VT->getElementType()->isIntegerTy(Bitwidth: 16)) {
4773 VectorType *AsBF16 =
4774 VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount());
4775 Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16);
4776 }
4777 }
4778
4779 // The scope argument never really worked correctly. Use agent as the most
4780 // conservative option which should still always produce the instruction.
4781 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent");
4782 AtomicRMWInst *RMW =
4783 Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID);
4784
4785 unsigned AddrSpace = PtrTy->getAddressSpace();
4786 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4787 MDNode *EmptyMD = MDNode::get(Context&: F->getContext(), MDs: {});
4788 RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory", Node: EmptyMD);
4789 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4790 RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: EmptyMD);
4791 }
4792
4793 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4794 MDBuilder MDB(F->getContext());
4795 MDNode *RangeNotPrivate =
4796 MDB.createRange(Lo: APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4797 Hi: APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4798 RMW->setMetadata(KindID: LLVMContext::MD_noalias_addrspace, Node: RangeNotPrivate);
4799 }
4800
4801 if (IsVolatile)
4802 RMW->setVolatile(true);
4803
4804 return Builder.CreateBitCast(V: RMW, DestTy: RetTy);
4805}
4806
4807/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4808/// plain MDNode, as it's the verifier's job to check these are the correct
4809/// types later.
4810static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4811 if (Op < CI->arg_size()) {
4812 if (MetadataAsValue *MAV =
4813 dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) {
4814 Metadata *MD = MAV->getMetadata();
4815 return dyn_cast_if_present<MDNode>(Val: MD);
4816 }
4817 }
4818 return nullptr;
4819}
4820
4821/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4822static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4823 if (Op < CI->arg_size())
4824 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op)))
4825 return MAV->getMetadata();
4826 return nullptr;
4827}
4828
4829static MDNode *getDebugLocSafe(const Instruction *I) {
4830 // The MDNode attached to this instruction might not be the correct type,
4831 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4832 return I->getDebugLoc().getAsMDNode();
4833}
4834
4835/// Convert debug intrinsic calls to non-instruction debug records.
4836/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4837/// \p CI - The debug intrinsic call.
4838static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4839 DbgRecord *DR = nullptr;
4840 if (Name == "label") {
4841 DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(Label: unwrapMAVOp(CI, Op: 0),
4842 DL: CI->getDebugLoc());
4843 } else if (Name == "assign") {
4844 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4845 Type: DbgVariableRecord::LocationType::Assign, Val: unwrapMAVMetadataOp(CI, Op: 0),
4846 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: unwrapMAVOp(CI, Op: 3),
4847 Address: unwrapMAVMetadataOp(CI, Op: 4),
4848 /*The address is a Value ref, it will be stored as a Metadata */
4849 AddressExpression: unwrapMAVOp(CI, Op: 5), DI: getDebugLocSafe(I: CI));
4850 } else if (Name == "declare") {
4851 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4852 Type: DbgVariableRecord::LocationType::Declare, Val: unwrapMAVMetadataOp(CI, Op: 0),
4853 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4854 DI: getDebugLocSafe(I: CI));
4855 } else if (Name == "addr") {
4856 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4857 MDNode *ExprNode = unwrapMAVOp(CI, Op: 2);
4858 // Don't try to add something to the expression if it's not an expression.
4859 // Instead, allow the verifier to fail later.
4860 if (DIExpression *Expr = dyn_cast<DIExpression>(Val: ExprNode)) {
4861 ExprNode = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
4862 }
4863 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4864 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4865 Variable: unwrapMAVOp(CI, Op: 1), Expression: ExprNode, AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4866 DI: getDebugLocSafe(I: CI));
4867 } else if (Name == "value") {
4868 // An old version of dbg.value had an extra offset argument.
4869 unsigned VarOp = 1;
4870 unsigned ExprOp = 2;
4871 if (CI->arg_size() == 4) {
4872 auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1));
4873 // Nonzero offset dbg.values get dropped without a replacement.
4874 if (!Offset || !Offset->isNullValue())
4875 return;
4876 VarOp = 2;
4877 ExprOp = 3;
4878 }
4879 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4880 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4881 Variable: unwrapMAVOp(CI, Op: VarOp), Expression: unwrapMAVOp(CI, Op: ExprOp), AssignID: nullptr, Address: nullptr,
4882 AddressExpression: nullptr, DI: getDebugLocSafe(I: CI));
4883 }
4884 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4885 CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator());
4886}
4887
4888static Value *upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder) {
4889 auto *Offset = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4890 if (!Offset)
4891 reportFatalUsageError(reason: "Invalid llvm.vector.splice offset argument");
4892 int64_t OffsetVal = Offset->getSExtValue();
4893 return Builder.CreateIntrinsic(ID: OffsetVal >= 0
4894 ? Intrinsic::vector_splice_left
4895 : Intrinsic::vector_splice_right,
4896 Types: CI->getType(),
4897 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4898 Builder.getInt32(C: std::abs(i: OffsetVal))});
4899}
4900
4901static Value *upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI,
4902 Function *F, IRBuilder<> &Builder) {
4903 if (Name.starts_with(Prefix: "to.fp16")) {
4904 Value *Cast =
4905 Builder.CreateFPTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
4906 return Builder.CreateBitCast(V: Cast, DestTy: CI->getType());
4907 }
4908
4909 if (Name.starts_with(Prefix: "from.fp16")) {
4910 Value *Cast =
4911 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
4912 return Builder.CreateFPExt(V: Cast, DestTy: CI->getType());
4913 }
4914
4915 return nullptr;
4916}
4917
4918/// Upgrade a call to an old intrinsic. All argument and return casting must be
4919/// provided to seamlessly integrate with existing context.
4920void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4921 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4922 // checks the callee's function type matches. It's likely we need to handle
4923 // type changes here.
4924 Function *F = dyn_cast<Function>(Val: CI->getCalledOperand());
4925 if (!F)
4926 return;
4927
4928 LLVMContext &C = CI->getContext();
4929 IRBuilder<> Builder(C);
4930 if (isa<FPMathOperator>(Val: CI))
4931 Builder.setFastMathFlags(CI->getFastMathFlags());
4932 Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator());
4933
4934 if (!NewFn) {
4935 // Get the Function's name.
4936 StringRef Name = F->getName();
4937 if (!Name.consume_front(Prefix: "llvm."))
4938 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4939
4940 bool IsX86 = Name.consume_front(Prefix: "x86.");
4941 bool IsNVVM = Name.consume_front(Prefix: "nvvm.");
4942 bool IsAArch64 = Name.consume_front(Prefix: "aarch64.");
4943 bool IsARM = Name.consume_front(Prefix: "arm.");
4944 bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn.");
4945 bool IsDbg = Name.consume_front(Prefix: "dbg.");
4946 bool IsOldSplice =
4947 (Name.consume_front(Prefix: "experimental.vector.splice") ||
4948 Name.consume_front(Prefix: "vector.splice")) &&
4949 !(Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"));
4950 Value *Rep = nullptr;
4951
4952 if (!IsX86 && Name == "stackprotectorcheck") {
4953 Rep = nullptr;
4954 } else if (IsNVVM) {
4955 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4956 } else if (IsX86) {
4957 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4958 } else if (IsAArch64) {
4959 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4960 } else if (IsARM) {
4961 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4962 } else if (IsAMDGCN) {
4963 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4964 } else if (IsDbg) {
4965 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4966 } else if (IsOldSplice) {
4967 Rep = upgradeVectorSplice(CI, Builder);
4968 } else if (Name.consume_front(Prefix: "convert.")) {
4969 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
4970 } else {
4971 llvm_unreachable("Unknown function for CallBase upgrade.");
4972 }
4973
4974 if (Rep)
4975 CI->replaceAllUsesWith(V: Rep);
4976 CI->eraseFromParent();
4977 return;
4978 }
4979
4980 const auto &DefaultCase = [&]() -> void {
4981 if (F == NewFn)
4982 return;
4983
4984 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4985 // Handle generic mangling change.
4986 assert(
4987 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4988 "Unknown function for CallBase upgrade and isn't just a name change");
4989 CI->setCalledFunction(NewFn);
4990 return;
4991 }
4992
4993 // This must be an upgrade from a named to a literal struct.
4994 if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) {
4995 assert(OldST != NewFn->getReturnType() &&
4996 "Return type must have changed");
4997 assert(OldST->getNumElements() ==
4998 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4999 "Must have same number of elements");
5000
5001 SmallVector<Value *> Args(CI->args());
5002 CallInst *NewCI = Builder.CreateCall(Callee: NewFn, Args);
5003 NewCI->setAttributes(CI->getAttributes());
5004 Value *Res = PoisonValue::get(T: OldST);
5005 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5006 Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx);
5007 Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx);
5008 }
5009 CI->replaceAllUsesWith(V: Res);
5010 CI->eraseFromParent();
5011 return;
5012 }
5013
5014 // We're probably about to produce something invalid. Let the verifier catch
5015 // it instead of dying here.
5016 CI->setCalledOperand(
5017 ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType()));
5018 return;
5019 };
5020 CallInst *NewCall = nullptr;
5021 switch (NewFn->getIntrinsicID()) {
5022 default: {
5023 DefaultCase();
5024 return;
5025 }
5026 case Intrinsic::arm_neon_vst1:
5027 case Intrinsic::arm_neon_vst2:
5028 case Intrinsic::arm_neon_vst3:
5029 case Intrinsic::arm_neon_vst4:
5030 case Intrinsic::arm_neon_vst2lane:
5031 case Intrinsic::arm_neon_vst3lane:
5032 case Intrinsic::arm_neon_vst4lane: {
5033 SmallVector<Value *, 4> Args(CI->args());
5034 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5035 break;
5036 }
5037 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5038 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5039 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5040 LLVMContext &Ctx = F->getParent()->getContext();
5041 SmallVector<Value *, 4> Args(CI->args());
5042 Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx),
5043 V: cast<ConstantInt>(Val: Args[3])->getZExtValue());
5044 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5045 break;
5046 }
5047 case Intrinsic::aarch64_sve_ld3_sret:
5048 case Intrinsic::aarch64_sve_ld4_sret:
5049 case Intrinsic::aarch64_sve_ld2_sret: {
5050 StringRef Name = F->getName();
5051 Name = Name.substr(Start: 5);
5052 unsigned N = StringSwitch<unsigned>(Name)
5053 .StartsWith(S: "aarch64.sve.ld2", Value: 2)
5054 .StartsWith(S: "aarch64.sve.ld3", Value: 3)
5055 .StartsWith(S: "aarch64.sve.ld4", Value: 4)
5056 .Default(Value: 0);
5057 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5058 unsigned MinElts = RetTy->getMinNumElements() / N;
5059 SmallVector<Value *, 2> Args(CI->args());
5060 Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args);
5061 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5062 for (unsigned I = 0; I < N; I++) {
5063 Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I);
5064 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx: I * MinElts);
5065 }
5066 NewCall = dyn_cast<CallInst>(Val: Ret);
5067 break;
5068 }
5069
5070 case Intrinsic::coro_end: {
5071 SmallVector<Value *, 3> Args(CI->args());
5072 Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext()));
5073 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5074 break;
5075 }
5076
5077 case Intrinsic::vector_extract: {
5078 StringRef Name = F->getName();
5079 Name = Name.substr(Start: 5); // Strip llvm
5080 if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get")) {
5081 DefaultCase();
5082 return;
5083 }
5084 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5085 unsigned MinElts = RetTy->getMinNumElements();
5086 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5087 Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
5088 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx});
5089 break;
5090 }
5091
5092 case Intrinsic::vector_insert: {
5093 StringRef Name = F->getName();
5094 Name = Name.substr(Start: 5);
5095 if (!Name.starts_with(Prefix: "aarch64.sve.tuple")) {
5096 DefaultCase();
5097 return;
5098 }
5099 if (Name.starts_with(Prefix: "aarch64.sve.tuple.set")) {
5100 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5101 auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType());
5102 Value *NewIdx =
5103 ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements());
5104 NewCall = Builder.CreateCall(
5105 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx});
5106 break;
5107 }
5108 if (Name.starts_with(Prefix: "aarch64.sve.tuple.create")) {
5109 unsigned N = StringSwitch<unsigned>(Name)
5110 .StartsWith(S: "aarch64.sve.tuple.create2", Value: 2)
5111 .StartsWith(S: "aarch64.sve.tuple.create3", Value: 3)
5112 .StartsWith(S: "aarch64.sve.tuple.create4", Value: 4)
5113 .Default(Value: 0);
5114 assert(N > 1 && "Create is expected to be between 2-4");
5115 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5116 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5117 unsigned MinElts = RetTy->getMinNumElements() / N;
5118 for (unsigned I = 0; I < N; I++) {
5119 Value *V = CI->getArgOperand(i: I);
5120 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx: I * MinElts);
5121 }
5122 NewCall = dyn_cast<CallInst>(Val: Ret);
5123 }
5124 break;
5125 }
5126
5127 case Intrinsic::arm_neon_bfdot:
5128 case Intrinsic::arm_neon_bfmmla:
5129 case Intrinsic::arm_neon_bfmlalb:
5130 case Intrinsic::arm_neon_bfmlalt:
5131 case Intrinsic::aarch64_neon_bfdot:
5132 case Intrinsic::aarch64_neon_bfmmla:
5133 case Intrinsic::aarch64_neon_bfmlalb:
5134 case Intrinsic::aarch64_neon_bfmlalt: {
5135 SmallVector<Value *, 3> Args;
5136 assert(CI->arg_size() == 3 &&
5137 "Mismatch between function args and call args");
5138 size_t OperandWidth =
5139 CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits();
5140 assert((OperandWidth == 64 || OperandWidth == 128) &&
5141 "Unexpected operand width");
5142 Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16);
5143 auto Iter = CI->args().begin();
5144 Args.push_back(Elt: *Iter++);
5145 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5146 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5147 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5148 break;
5149 }
5150
5151 case Intrinsic::bitreverse:
5152 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5153 break;
5154
5155 case Intrinsic::ctlz:
5156 case Intrinsic::cttz: {
5157 if (CI->arg_size() != 1) {
5158 DefaultCase();
5159 return;
5160 }
5161
5162 NewCall =
5163 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()});
5164 break;
5165 }
5166
5167 case Intrinsic::objectsize: {
5168 Value *NullIsUnknownSize =
5169 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2);
5170 Value *Dynamic =
5171 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3);
5172 NewCall = Builder.CreateCall(
5173 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic});
5174 break;
5175 }
5176
5177 case Intrinsic::ctpop:
5178 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5179 break;
5180 case Intrinsic::dbg_value: {
5181 StringRef Name = F->getName();
5182 Name = Name.substr(Start: 5); // Strip llvm.
5183 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5184 if (Name.starts_with(Prefix: "dbg.addr")) {
5185 DIExpression *Expr = cast<DIExpression>(
5186 Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata());
5187 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
5188 NewCall =
5189 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5190 MetadataAsValue::get(Context&: C, MD: Expr)});
5191 break;
5192 }
5193
5194 // Upgrade from the old version that had an extra offset argument.
5195 assert(CI->arg_size() == 4);
5196 // Drop nonzero offsets instead of attempting to upgrade them.
5197 if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)))
5198 if (Offset->isNullValue()) {
5199 NewCall = Builder.CreateCall(
5200 Callee: NewFn,
5201 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)});
5202 break;
5203 }
5204 CI->eraseFromParent();
5205 return;
5206 }
5207
5208 case Intrinsic::ptr_annotation:
5209 // Upgrade from versions that lacked the annotation attribute argument.
5210 if (CI->arg_size() != 4) {
5211 DefaultCase();
5212 return;
5213 }
5214
5215 // Create a new call with an added null annotation attribute argument.
5216 NewCall = Builder.CreateCall(
5217 Callee: NewFn,
5218 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5219 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5220 NewCall->takeName(V: CI);
5221 CI->replaceAllUsesWith(V: NewCall);
5222 CI->eraseFromParent();
5223 return;
5224
5225 case Intrinsic::var_annotation:
5226 // Upgrade from versions that lacked the annotation attribute argument.
5227 if (CI->arg_size() != 4) {
5228 DefaultCase();
5229 return;
5230 }
5231 // Create a new call with an added null annotation attribute argument.
5232 NewCall = Builder.CreateCall(
5233 Callee: NewFn,
5234 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5235 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5236 NewCall->takeName(V: CI);
5237 CI->replaceAllUsesWith(V: NewCall);
5238 CI->eraseFromParent();
5239 return;
5240
5241 case Intrinsic::riscv_aes32dsi:
5242 case Intrinsic::riscv_aes32dsmi:
5243 case Intrinsic::riscv_aes32esi:
5244 case Intrinsic::riscv_aes32esmi:
5245 case Intrinsic::riscv_sm4ks:
5246 case Intrinsic::riscv_sm4ed: {
5247 // The last argument to these intrinsics used to be i8 and changed to i32.
5248 // The type overload for sm4ks and sm4ed was removed.
5249 Value *Arg2 = CI->getArgOperand(i: 2);
5250 if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64))
5251 return;
5252
5253 Value *Arg0 = CI->getArgOperand(i: 0);
5254 Value *Arg1 = CI->getArgOperand(i: 1);
5255 if (CI->getType()->isIntegerTy(Bitwidth: 64)) {
5256 Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty());
5257 Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty());
5258 }
5259
5260 Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C),
5261 V: cast<ConstantInt>(Val: Arg2)->getZExtValue());
5262
5263 NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2});
5264 Value *Res = NewCall;
5265 if (Res->getType() != CI->getType())
5266 Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5267 NewCall->takeName(V: CI);
5268 CI->replaceAllUsesWith(V: Res);
5269 CI->eraseFromParent();
5270 return;
5271 }
5272 case Intrinsic::nvvm_mapa_shared_cluster: {
5273 // Create a new call with the correct address space.
5274 NewCall =
5275 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
5276 Value *Res = NewCall;
5277 Res = Builder.CreateAddrSpaceCast(
5278 V: Res, DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED));
5279 NewCall->takeName(V: CI);
5280 CI->replaceAllUsesWith(V: Res);
5281 CI->eraseFromParent();
5282 return;
5283 }
5284 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5285 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5286 // Create a new call with the correct address space.
5287 SmallVector<Value *, 4> Args(CI->args());
5288 Args[0] = Builder.CreateAddrSpaceCast(
5289 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5290
5291 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5292 NewCall->takeName(V: CI);
5293 CI->replaceAllUsesWith(V: NewCall);
5294 CI->eraseFromParent();
5295 return;
5296 }
5297 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5298 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5299 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5300 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5301 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5302 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5303 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5304 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5305 SmallVector<Value *, 16> Args(CI->args());
5306
5307 // Create AddrSpaceCast to shared_cluster if needed.
5308 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5309 unsigned AS = CI->getArgOperand(i: 0)->getType()->getPointerAddressSpace();
5310 if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
5311 Args[0] = Builder.CreateAddrSpaceCast(
5312 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5313
5314 // Attach the flag argument for cta_group, with a
5315 // default value of 0. This handles case (2) in
5316 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5317 size_t NumArgs = CI->arg_size();
5318 Value *FlagArg = CI->getArgOperand(i: NumArgs - 3);
5319 if (!FlagArg->getType()->isIntegerTy(Bitwidth: 1))
5320 Args.push_back(Elt: ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0));
5321
5322 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5323 NewCall->takeName(V: CI);
5324 CI->replaceAllUsesWith(V: NewCall);
5325 CI->eraseFromParent();
5326 return;
5327 }
5328 case Intrinsic::riscv_sha256sig0:
5329 case Intrinsic::riscv_sha256sig1:
5330 case Intrinsic::riscv_sha256sum0:
5331 case Intrinsic::riscv_sha256sum1:
5332 case Intrinsic::riscv_sm3p0:
5333 case Intrinsic::riscv_sm3p1: {
5334 // The last argument to these intrinsics used to be i8 and changed to i32.
5335 // The type overload for sm4ks and sm4ed was removed.
5336 if (!CI->getType()->isIntegerTy(Bitwidth: 64))
5337 return;
5338
5339 Value *Arg =
5340 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty());
5341
5342 NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg);
5343 Value *Res =
5344 Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5345 NewCall->takeName(V: CI);
5346 CI->replaceAllUsesWith(V: Res);
5347 CI->eraseFromParent();
5348 return;
5349 }
5350
5351 case Intrinsic::x86_xop_vfrcz_ss:
5352 case Intrinsic::x86_xop_vfrcz_sd:
5353 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)});
5354 break;
5355
5356 case Intrinsic::x86_xop_vpermil2pd:
5357 case Intrinsic::x86_xop_vpermil2ps:
5358 case Intrinsic::x86_xop_vpermil2pd_256:
5359 case Intrinsic::x86_xop_vpermil2ps_256: {
5360 SmallVector<Value *, 4> Args(CI->args());
5361 VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType());
5362 VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy);
5363 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy);
5364 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5365 break;
5366 }
5367
5368 case Intrinsic::x86_sse41_ptestc:
5369 case Intrinsic::x86_sse41_ptestz:
5370 case Intrinsic::x86_sse41_ptestnzc: {
5371 // The arguments for these intrinsics used to be v4f32, and changed
5372 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5373 // So, the only thing required is a bitcast for both arguments.
5374 // First, check the arguments have the old type.
5375 Value *Arg0 = CI->getArgOperand(i: 0);
5376 if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4))
5377 return;
5378
5379 // Old intrinsic, add bitcasts
5380 Value *Arg1 = CI->getArgOperand(i: 1);
5381
5382 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
5383
5384 Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast");
5385 Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
5386
5387 NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1});
5388 break;
5389 }
5390
5391 case Intrinsic::x86_rdtscp: {
5392 // This used to take 1 arguments. If we have no arguments, it is already
5393 // upgraded.
5394 if (CI->getNumOperands() == 0)
5395 return;
5396
5397 NewCall = Builder.CreateCall(Callee: NewFn);
5398 // Extract the second result and store it.
5399 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
5400 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
5401 // Replace the original call result with the first result of the new call.
5402 Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
5403
5404 NewCall->takeName(V: CI);
5405 CI->replaceAllUsesWith(V: TSC);
5406 CI->eraseFromParent();
5407 return;
5408 }
5409
5410 case Intrinsic::x86_sse41_insertps:
5411 case Intrinsic::x86_sse41_dppd:
5412 case Intrinsic::x86_sse41_dpps:
5413 case Intrinsic::x86_sse41_mpsadbw:
5414 case Intrinsic::x86_avx_dp_ps_256:
5415 case Intrinsic::x86_avx2_mpsadbw: {
5416 // Need to truncate the last argument from i32 to i8 -- this argument models
5417 // an inherently 8-bit immediate operand to these x86 instructions.
5418 SmallVector<Value *, 4> Args(CI->args());
5419
5420 // Replace the last argument with a trunc.
5421 Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc");
5422 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5423 break;
5424 }
5425
5426 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5427 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5428 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5429 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5430 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5431 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5432 SmallVector<Value *, 4> Args(CI->args());
5433 unsigned NumElts =
5434 cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements();
5435 Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts);
5436
5437 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5438 Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr);
5439
5440 NewCall->takeName(V: CI);
5441 CI->replaceAllUsesWith(V: Res);
5442 CI->eraseFromParent();
5443 return;
5444 }
5445
5446 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5447 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5448 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5449 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5450 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5451 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5452 SmallVector<Value *, 4> Args(CI->args());
5453 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
5454 if (NewFn->getIntrinsicID() ==
5455 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5456 Args[1] = Builder.CreateBitCast(
5457 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5458
5459 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5460 Value *Res = Builder.CreateBitCast(
5461 V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts));
5462
5463 NewCall->takeName(V: CI);
5464 CI->replaceAllUsesWith(V: Res);
5465 CI->eraseFromParent();
5466 return;
5467 }
5468 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5469 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5470 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5471 SmallVector<Value *, 4> Args(CI->args());
5472 unsigned NumElts =
5473 cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2;
5474 Args[1] = Builder.CreateBitCast(
5475 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5476 Args[2] = Builder.CreateBitCast(
5477 V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5478
5479 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5480 break;
5481 }
5482
5483 case Intrinsic::thread_pointer: {
5484 NewCall = Builder.CreateCall(Callee: NewFn, Args: {});
5485 break;
5486 }
5487
5488 case Intrinsic::memcpy:
5489 case Intrinsic::memmove:
5490 case Intrinsic::memset: {
5491 // We have to make sure that the call signature is what we're expecting.
5492 // We only want to change the old signatures by removing the alignment arg:
5493 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5494 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5495 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5496 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5497 // Note: i8*'s in the above can be any pointer type
5498 if (CI->arg_size() != 5) {
5499 DefaultCase();
5500 return;
5501 }
5502 // Remove alignment argument (3), and add alignment attributes to the
5503 // dest/src pointers.
5504 Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5505 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
5506 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5507 AttributeList OldAttrs = CI->getAttributes();
5508 AttributeList NewAttrs = AttributeList::get(
5509 C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(),
5510 ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1),
5511 OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)});
5512 NewCall->setAttributes(NewAttrs);
5513 auto *MemCI = cast<MemIntrinsic>(Val: NewCall);
5514 // All mem intrinsics support dest alignment.
5515 const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
5516 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5517 // Memcpy/Memmove also support source alignment.
5518 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI))
5519 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5520 break;
5521 }
5522
5523 case Intrinsic::masked_load:
5524 case Intrinsic::masked_gather:
5525 case Intrinsic::masked_store:
5526 case Intrinsic::masked_scatter: {
5527 if (CI->arg_size() != 4) {
5528 DefaultCase();
5529 return;
5530 }
5531
5532 auto GetMaybeAlign = [](Value *Op) {
5533 if (auto *CI = dyn_cast<ConstantInt>(Val: Op)) {
5534 uint64_t Val = CI->getZExtValue();
5535 if (Val == 0)
5536 return MaybeAlign();
5537 if (isPowerOf2_64(Value: Val))
5538 return MaybeAlign(Val);
5539 }
5540 reportFatalUsageError(reason: "Invalid alignment argument");
5541 };
5542 auto GetAlign = [&](Value *Op) {
5543 MaybeAlign Align = GetMaybeAlign(Op);
5544 if (Align)
5545 return *Align;
5546 reportFatalUsageError(reason: "Invalid zero alignment argument");
5547 };
5548
5549 const DataLayout &DL = CI->getDataLayout();
5550 switch (NewFn->getIntrinsicID()) {
5551 case Intrinsic::masked_load:
5552 NewCall = Builder.CreateMaskedLoad(
5553 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0), Alignment: GetAlign(CI->getArgOperand(i: 1)),
5554 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5555 break;
5556 case Intrinsic::masked_gather:
5557 NewCall = Builder.CreateMaskedGather(
5558 Ty: CI->getType(), Ptrs: CI->getArgOperand(i: 0),
5559 Alignment: DL.getValueOrABITypeAlignment(Alignment: GetMaybeAlign(CI->getArgOperand(i: 1)),
5560 Ty: CI->getType()->getScalarType()),
5561 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5562 break;
5563 case Intrinsic::masked_store:
5564 NewCall = Builder.CreateMaskedStore(
5565 Val: CI->getArgOperand(i: 0), Ptr: CI->getArgOperand(i: 1),
5566 Alignment: GetAlign(CI->getArgOperand(i: 2)), Mask: CI->getArgOperand(i: 3));
5567 break;
5568 case Intrinsic::masked_scatter:
5569 NewCall = Builder.CreateMaskedScatter(
5570 Val: CI->getArgOperand(i: 0), Ptrs: CI->getArgOperand(i: 1),
5571 Alignment: DL.getValueOrABITypeAlignment(
5572 Alignment: GetMaybeAlign(CI->getArgOperand(i: 2)),
5573 Ty: CI->getArgOperand(i: 0)->getType()->getScalarType()),
5574 Mask: CI->getArgOperand(i: 3));
5575 break;
5576 default:
5577 llvm_unreachable("Unexpected intrinsic ID");
5578 }
5579 // Previous metadata is still valid.
5580 NewCall->copyMetadata(SrcInst: *CI);
5581 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
5582 break;
5583 }
5584
5585 case Intrinsic::lifetime_start:
5586 case Intrinsic::lifetime_end: {
5587 if (CI->arg_size() != 2) {
5588 DefaultCase();
5589 return;
5590 }
5591
5592 Value *Ptr = CI->getArgOperand(i: 1);
5593 // Try to strip pointer casts, such that the lifetime works on an alloca.
5594 Ptr = Ptr->stripPointerCasts();
5595 if (isa<AllocaInst>(Val: Ptr)) {
5596 // Don't use NewFn, as we might have looked through an addrspacecast.
5597 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5598 NewCall = Builder.CreateLifetimeStart(Ptr);
5599 else
5600 NewCall = Builder.CreateLifetimeEnd(Ptr);
5601 break;
5602 }
5603
5604 // Otherwise remove the lifetime marker.
5605 CI->eraseFromParent();
5606 return;
5607 }
5608
5609 case Intrinsic::x86_avx512_vpdpbusd_128:
5610 case Intrinsic::x86_avx512_vpdpbusd_256:
5611 case Intrinsic::x86_avx512_vpdpbusd_512:
5612 case Intrinsic::x86_avx512_vpdpbusds_128:
5613 case Intrinsic::x86_avx512_vpdpbusds_256:
5614 case Intrinsic::x86_avx512_vpdpbusds_512:
5615 case Intrinsic::x86_avx2_vpdpbssd_128:
5616 case Intrinsic::x86_avx2_vpdpbssd_256:
5617 case Intrinsic::x86_avx10_vpdpbssd_512:
5618 case Intrinsic::x86_avx2_vpdpbssds_128:
5619 case Intrinsic::x86_avx2_vpdpbssds_256:
5620 case Intrinsic::x86_avx10_vpdpbssds_512:
5621 case Intrinsic::x86_avx2_vpdpbsud_128:
5622 case Intrinsic::x86_avx2_vpdpbsud_256:
5623 case Intrinsic::x86_avx10_vpdpbsud_512:
5624 case Intrinsic::x86_avx2_vpdpbsuds_128:
5625 case Intrinsic::x86_avx2_vpdpbsuds_256:
5626 case Intrinsic::x86_avx10_vpdpbsuds_512:
5627 case Intrinsic::x86_avx2_vpdpbuud_128:
5628 case Intrinsic::x86_avx2_vpdpbuud_256:
5629 case Intrinsic::x86_avx10_vpdpbuud_512:
5630 case Intrinsic::x86_avx2_vpdpbuuds_128:
5631 case Intrinsic::x86_avx2_vpdpbuuds_256:
5632 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5633 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5634 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5635 CI->getArgOperand(i: 2)};
5636 Type *NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: NumElts, Scalable: false);
5637 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5638 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5639
5640 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5641 break;
5642 }
5643 case Intrinsic::x86_avx512_vpdpwssd_128:
5644 case Intrinsic::x86_avx512_vpdpwssd_256:
5645 case Intrinsic::x86_avx512_vpdpwssd_512:
5646 case Intrinsic::x86_avx512_vpdpwssds_128:
5647 case Intrinsic::x86_avx512_vpdpwssds_256:
5648 case Intrinsic::x86_avx512_vpdpwssds_512:
5649 case Intrinsic::x86_avx2_vpdpwsud_128:
5650 case Intrinsic::x86_avx2_vpdpwsud_256:
5651 case Intrinsic::x86_avx10_vpdpwsud_512:
5652 case Intrinsic::x86_avx2_vpdpwsuds_128:
5653 case Intrinsic::x86_avx2_vpdpwsuds_256:
5654 case Intrinsic::x86_avx10_vpdpwsuds_512:
5655 case Intrinsic::x86_avx2_vpdpwusd_128:
5656 case Intrinsic::x86_avx2_vpdpwusd_256:
5657 case Intrinsic::x86_avx10_vpdpwusd_512:
5658 case Intrinsic::x86_avx2_vpdpwusds_128:
5659 case Intrinsic::x86_avx2_vpdpwusds_256:
5660 case Intrinsic::x86_avx10_vpdpwusds_512:
5661 case Intrinsic::x86_avx2_vpdpwuud_128:
5662 case Intrinsic::x86_avx2_vpdpwuud_256:
5663 case Intrinsic::x86_avx10_vpdpwuud_512:
5664 case Intrinsic::x86_avx2_vpdpwuuds_128:
5665 case Intrinsic::x86_avx2_vpdpwuuds_256:
5666 case Intrinsic::x86_avx10_vpdpwuuds_512:
5667 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5668 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5669 CI->getArgOperand(i: 2)};
5670 Type *NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: NumElts, Scalable: false);
5671 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5672 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5673
5674 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5675 break;
5676 }
5677 assert(NewCall && "Should have either set this variable or returned through "
5678 "the default case");
5679 NewCall->takeName(V: CI);
5680 CI->replaceAllUsesWith(V: NewCall);
5681 CI->eraseFromParent();
5682}
5683
5684void llvm::UpgradeCallsToIntrinsic(Function *F) {
5685 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5686
5687 // Check if this function should be upgraded and get the replacement function
5688 // if there is one.
5689 Function *NewFn;
5690 if (UpgradeIntrinsicFunction(F, NewFn)) {
5691 // Replace all users of the old function with the new function or new
5692 // instructions. This is not a range loop because the call is deleted.
5693 for (User *U : make_early_inc_range(Range: F->users()))
5694 if (CallBase *CB = dyn_cast<CallBase>(Val: U))
5695 UpgradeIntrinsicCall(CI: CB, NewFn);
5696
5697 // Remove old function, no longer used, from the module.
5698 if (F != NewFn)
5699 F->eraseFromParent();
5700 }
5701}
5702
5703MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5704 const unsigned NumOperands = MD.getNumOperands();
5705 if (NumOperands == 0)
5706 return &MD; // Invalid, punt to a verifier error.
5707
5708 // Check if the tag uses struct-path aware TBAA format.
5709 if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3)
5710 return &MD;
5711
5712 auto &Context = MD.getContext();
5713 if (NumOperands == 3) {
5714 Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)};
5715 MDNode *ScalarType = MDNode::get(Context, MDs: Elts);
5716 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5717 Metadata *Elts2[] = {ScalarType, ScalarType,
5718 ConstantAsMetadata::get(
5719 C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))),
5720 MD.getOperand(I: 2)};
5721 return MDNode::get(Context, MDs: Elts2);
5722 }
5723 // Create a MDNode <MD, MD, offset 0>
5724 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue(
5725 Ty: Type::getInt64Ty(C&: Context)))};
5726 return MDNode::get(Context, MDs: Elts);
5727}
5728
5729Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5730 Instruction *&Temp) {
5731 if (Opc != Instruction::BitCast)
5732 return nullptr;
5733
5734 Temp = nullptr;
5735 Type *SrcTy = V->getType();
5736 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5737 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5738 LLVMContext &Context = V->getContext();
5739
5740 // We have no information about target data layout, so we assume that
5741 // the maximum pointer size is 64bit.
5742 Type *MidTy = Type::getInt64Ty(C&: Context);
5743 Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy);
5744
5745 return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy);
5746 }
5747
5748 return nullptr;
5749}
5750
5751Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5752 if (Opc != Instruction::BitCast)
5753 return nullptr;
5754
5755 Type *SrcTy = C->getType();
5756 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5757 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5758 LLVMContext &Context = C->getContext();
5759
5760 // We have no information about target data layout, so we assume that
5761 // the maximum pointer size is 64bit.
5762 Type *MidTy = Type::getInt64Ty(C&: Context);
5763
5764 return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy),
5765 Ty: DestTy);
5766 }
5767
5768 return nullptr;
5769}
5770
5771/// Check the debug info version number, if it is out-dated, drop the debug
5772/// info. Return true if module is modified.
5773bool llvm::UpgradeDebugInfo(Module &M) {
5774 if (DisableAutoUpgradeDebugInfo)
5775 return false;
5776
5777 llvm::TimeTraceScope timeScope("Upgrade debug info");
5778 // We need to get metadata before the module is verified (i.e., getModuleFlag
5779 // makes assumptions that we haven't verified yet). Carefully extract the flag
5780 // from the metadata.
5781 unsigned Version = 0;
5782 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5783 auto OpIt = find_if(Range: ModFlags->operands(), P: [](const MDNode *Flag) {
5784 if (Flag->getNumOperands() < 3)
5785 return false;
5786 if (MDString *K = dyn_cast_or_null<MDString>(Val: Flag->getOperand(I: 1)))
5787 return K->getString() == "Debug Info Version";
5788 return false;
5789 });
5790 if (OpIt != ModFlags->op_end()) {
5791 const MDOperand &ValOp = (*OpIt)->getOperand(I: 2);
5792 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD: ValOp))
5793 Version = CI->getZExtValue();
5794 }
5795 }
5796
5797 if (Version == DEBUG_METADATA_VERSION) {
5798 bool BrokenDebugInfo = false;
5799 if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo))
5800 report_fatal_error(reason: "Broken module found, compilation aborted!");
5801 if (!BrokenDebugInfo)
5802 // Everything is ok.
5803 return false;
5804 else {
5805 // Diagnose malformed debug info.
5806 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5807 M.getContext().diagnose(DI: Diag);
5808 }
5809 }
5810 bool Modified = StripDebugInfo(M);
5811 if (Modified && Version != DEBUG_METADATA_VERSION) {
5812 // Diagnose a version mismatch.
5813 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5814 M.getContext().diagnose(DI: DiagVersion);
5815 }
5816 return Modified;
5817}
5818
5819static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5820 GlobalValue *GV, const Metadata *V) {
5821 Function *F = cast<Function>(Val: GV);
5822
5823 constexpr StringLiteral DefaultValue = "1";
5824 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5825 unsigned Length = 0;
5826
5827 if (F->hasFnAttribute(Kind: Attr)) {
5828 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5829 // parse these elements placing them into Vect3
5830 StringRef S = F->getFnAttribute(Kind: Attr).getValueAsString();
5831 for (; Length < 3 && !S.empty(); Length++) {
5832 auto [Part, Rest] = S.split(Separator: ',');
5833 Vect3[Length] = Part.trim();
5834 S = Rest;
5835 }
5836 }
5837
5838 const unsigned Dim = DimC - 'x';
5839 assert(Dim < 3 && "Unexpected dim char");
5840
5841 const uint64_t VInt = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5842
5843 // local variable required for StringRef in Vect3 to point to.
5844 const std::string VStr = llvm::utostr(X: VInt);
5845 Vect3[Dim] = VStr;
5846 Length = std::max(a: Length, b: Dim + 1);
5847
5848 const std::string NewAttr = llvm::join(R: ArrayRef(Vect3, Length), Separator: ",");
5849 F->addFnAttr(Kind: Attr, Val: NewAttr);
5850}
5851
5852static inline bool isXYZ(StringRef S) {
5853 return S == "x" || S == "y" || S == "z";
5854}
5855
5856bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5857 const Metadata *V) {
5858 if (K == "kernel") {
5859 if (!mdconst::extract<ConstantInt>(MD&: V)->isZero())
5860 cast<Function>(Val: GV)->setCallingConv(CallingConv::PTX_Kernel);
5861 return true;
5862 }
5863 if (K == "align") {
5864 // V is a bitfeild specifying two 16-bit values. The alignment value is
5865 // specfied in low 16-bits, The index is specified in the high bits. For the
5866 // index, 0 indicates the return value while higher values correspond to
5867 // each parameter (idx = param + 1).
5868 const uint64_t AlignIdxValuePair =
5869 mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5870 const unsigned Idx = (AlignIdxValuePair >> 16);
5871 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5872 cast<Function>(Val: GV)->addAttributeAtIndex(
5873 i: Idx, Attr: Attribute::getWithStackAlignment(Context&: GV->getContext(), Alignment: StackAlign));
5874 return true;
5875 }
5876 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5877 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5878 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxclusterrank", Val: llvm::utostr(X: CV));
5879 return true;
5880 }
5881 if (K == "minctasm") {
5882 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5883 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: CV));
5884 return true;
5885 }
5886 if (K == "maxnreg") {
5887 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5888 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxnreg", Val: llvm::utostr(X: CV));
5889 return true;
5890 }
5891 if (K.consume_front(Prefix: "maxntid") && isXYZ(S: K)) {
5892 upgradeNVVMFnVectorAttr(Attr: "nvvm.maxntid", DimC: K[0], GV, V);
5893 return true;
5894 }
5895 if (K.consume_front(Prefix: "reqntid") && isXYZ(S: K)) {
5896 upgradeNVVMFnVectorAttr(Attr: "nvvm.reqntid", DimC: K[0], GV, V);
5897 return true;
5898 }
5899 if (K.consume_front(Prefix: "cluster_dim_") && isXYZ(S: K)) {
5900 upgradeNVVMFnVectorAttr(Attr: "nvvm.cluster_dim", DimC: K[0], GV, V);
5901 return true;
5902 }
5903 if (K == "grid_constant") {
5904 const auto Attr = Attribute::get(Context&: GV->getContext(), Kind: "nvvm.grid_constant");
5905 for (const auto &Op : cast<MDNode>(Val: V)->operands()) {
5906 // For some reason, the index is 1-based in the metadata. Good thing we're
5907 // able to auto-upgrade it!
5908 const auto Index = mdconst::extract<ConstantInt>(MD: Op)->getZExtValue() - 1;
5909 cast<Function>(Val: GV)->addParamAttr(ArgNo: Index, Attr);
5910 }
5911 return true;
5912 }
5913
5914 return false;
5915}
5916
5917void llvm::UpgradeNVVMAnnotations(Module &M) {
5918 NamedMDNode *NamedMD = M.getNamedMetadata(Name: "nvvm.annotations");
5919 if (!NamedMD)
5920 return;
5921
5922 SmallVector<MDNode *, 8> NewNodes;
5923 SmallPtrSet<const MDNode *, 8> SeenNodes;
5924 for (MDNode *MD : NamedMD->operands()) {
5925 if (!SeenNodes.insert(Ptr: MD).second)
5926 continue;
5927
5928 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD: MD->getOperand(I: 0));
5929 if (!GV)
5930 continue;
5931
5932 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5933
5934 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(I: 0)};
5935 // Each nvvm.annotations metadata entry will be of the following form:
5936 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5937 // start index = 1, to skip the global variable key
5938 // increment = 2, to skip the value for each property-value pairs
5939 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5940 MDString *K = cast<MDString>(Val: MD->getOperand(I: j));
5941 const MDOperand &V = MD->getOperand(I: j + 1);
5942 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K: K->getString(), V);
5943 if (!Upgraded)
5944 NewOperands.append(IL: {K, V});
5945 }
5946
5947 if (NewOperands.size() > 1)
5948 NewNodes.push_back(Elt: MDNode::get(Context&: M.getContext(), MDs: NewOperands));
5949 }
5950
5951 NamedMD->clearOperands();
5952 for (MDNode *N : NewNodes)
5953 NamedMD->addOperand(M: N);
5954}
5955
5956/// This checks for objc retain release marker which should be upgraded. It
5957/// returns true if module is modified.
5958static bool upgradeRetainReleaseMarker(Module &M) {
5959 bool Changed = false;
5960 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5961 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey);
5962 if (ModRetainReleaseMarker) {
5963 MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0);
5964 if (Op) {
5965 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0));
5966 if (ID) {
5967 SmallVector<StringRef, 4> ValueComp;
5968 ID->getString().split(A&: ValueComp, Separator: "#");
5969 if (ValueComp.size() == 2) {
5970 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5971 ID = MDString::get(Context&: M.getContext(), Str: NewValue);
5972 }
5973 M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID);
5974 M.eraseNamedMetadata(NMD: ModRetainReleaseMarker);
5975 Changed = true;
5976 }
5977 }
5978 }
5979 return Changed;
5980}
5981
5982void llvm::UpgradeARCRuntime(Module &M) {
5983 // This lambda converts normal function calls to ARC runtime functions to
5984 // intrinsic calls.
5985 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5986 llvm::Intrinsic::ID IntrinsicFunc) {
5987 Function *Fn = M.getFunction(Name: OldFunc);
5988
5989 if (!Fn)
5990 return;
5991
5992 Function *NewFn =
5993 llvm::Intrinsic::getOrInsertDeclaration(M: &M, id: IntrinsicFunc);
5994
5995 for (User *U : make_early_inc_range(Range: Fn->users())) {
5996 CallInst *CI = dyn_cast<CallInst>(Val: U);
5997 if (!CI || CI->getCalledFunction() != Fn)
5998 continue;
5999
6000 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6001 FunctionType *NewFuncTy = NewFn->getFunctionType();
6002 SmallVector<Value *, 2> Args;
6003
6004 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6005 // value to the return type of the old function.
6006 if (NewFuncTy->getReturnType() != CI->getType() &&
6007 !CastInst::castIsValid(op: Instruction::BitCast, S: CI,
6008 DstTy: NewFuncTy->getReturnType()))
6009 continue;
6010
6011 bool InvalidCast = false;
6012
6013 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6014 Value *Arg = CI->getArgOperand(i: I);
6015
6016 // Bitcast argument to the parameter type of the new function if it's
6017 // not a variadic argument.
6018 if (I < NewFuncTy->getNumParams()) {
6019 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6020 // to the parameter type of the new function.
6021 if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg,
6022 DstTy: NewFuncTy->getParamType(i: I))) {
6023 InvalidCast = true;
6024 break;
6025 }
6026 Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I));
6027 }
6028 Args.push_back(Elt: Arg);
6029 }
6030
6031 if (InvalidCast)
6032 continue;
6033
6034 // Create a call instruction that calls the new function.
6035 CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args);
6036 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
6037 NewCall->takeName(V: CI);
6038
6039 // Bitcast the return value back to the type of the old call.
6040 Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType());
6041
6042 if (!CI->use_empty())
6043 CI->replaceAllUsesWith(V: NewRetVal);
6044 CI->eraseFromParent();
6045 }
6046
6047 if (Fn->use_empty())
6048 Fn->eraseFromParent();
6049 };
6050
6051 // Unconditionally convert a call to "clang.arc.use" to a call to
6052 // "llvm.objc.clang.arc.use".
6053 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6054
6055 // Upgrade the retain release marker. If there is no need to upgrade
6056 // the marker, that means either the module is already new enough to contain
6057 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6058 if (!upgradeRetainReleaseMarker(M))
6059 return;
6060
6061 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6062 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6063 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6064 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6065 {"objc_autoreleaseReturnValue",
6066 llvm::Intrinsic::objc_autoreleaseReturnValue},
6067 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6068 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6069 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6070 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6071 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6072 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6073 {"objc_release", llvm::Intrinsic::objc_release},
6074 {"objc_retain", llvm::Intrinsic::objc_retain},
6075 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6076 {"objc_retainAutoreleaseReturnValue",
6077 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6078 {"objc_retainAutoreleasedReturnValue",
6079 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6080 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6081 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6082 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6083 {"objc_unsafeClaimAutoreleasedReturnValue",
6084 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6085 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6086 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6087 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6088 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6089 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6090 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6091 {"objc_arc_annotation_topdown_bbstart",
6092 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6093 {"objc_arc_annotation_topdown_bbend",
6094 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6095 {"objc_arc_annotation_bottomup_bbstart",
6096 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6097 {"objc_arc_annotation_bottomup_bbend",
6098 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6099
6100 for (auto &I : RuntimeFuncs)
6101 UpgradeToIntrinsic(I.first, I.second);
6102}
6103
6104bool llvm::UpgradeModuleFlags(Module &M) {
6105 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6106 if (!ModFlags)
6107 return false;
6108
6109 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6110 bool HasSwiftVersionFlag = false;
6111 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6112 uint32_t SwiftABIVersion;
6113 auto Int8Ty = Type::getInt8Ty(C&: M.getContext());
6114 auto Int32Ty = Type::getInt32Ty(C&: M.getContext());
6115
6116 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6117 MDNode *Op = ModFlags->getOperand(i: I);
6118 if (Op->getNumOperands() != 3)
6119 continue;
6120 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6121 if (!ID)
6122 continue;
6123 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6124 Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get(
6125 Ty: Type::getInt32Ty(C&: M.getContext()), V: B)),
6126 MDString::get(Context&: M.getContext(), Str: ID->getString()),
6127 Op->getOperand(I: 2)};
6128 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6129 Changed = true;
6130 };
6131
6132 if (ID->getString() == "Objective-C Image Info Version")
6133 HasObjCFlag = true;
6134 if (ID->getString() == "Objective-C Class Properties")
6135 HasClassProperties = true;
6136 // Upgrade PIC from Error/Max to Min.
6137 if (ID->getString() == "PIC Level") {
6138 if (auto *Behavior =
6139 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6140 uint64_t V = Behavior->getLimitedValue();
6141 if (V == Module::Error || V == Module::Max)
6142 SetBehavior(Module::Min);
6143 }
6144 }
6145 // Upgrade "PIE Level" from Error to Max.
6146 if (ID->getString() == "PIE Level")
6147 if (auto *Behavior =
6148 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0)))
6149 if (Behavior->getLimitedValue() == Module::Error)
6150 SetBehavior(Module::Max);
6151
6152 // Upgrade branch protection and return address signing module flags. The
6153 // module flag behavior for these fields were Error and now they are Min.
6154 if (ID->getString() == "branch-target-enforcement" ||
6155 ID->getString().starts_with(Prefix: "sign-return-address")) {
6156 if (auto *Behavior =
6157 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6158 if (Behavior->getLimitedValue() == Module::Error) {
6159 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
6160 Metadata *Ops[3] = {
6161 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)),
6162 Op->getOperand(I: 1), Op->getOperand(I: 2)};
6163 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6164 Changed = true;
6165 }
6166 }
6167 }
6168
6169 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6170 // section name so that llvm-lto will not complain about mismatching
6171 // module flags that is functionally the same.
6172 if (ID->getString() == "Objective-C Image Info Section") {
6173 if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) {
6174 SmallVector<StringRef, 4> ValueComp;
6175 Value->getString().split(A&: ValueComp, Separator: " ");
6176 if (ValueComp.size() != 1) {
6177 std::string NewValue;
6178 for (auto &S : ValueComp)
6179 NewValue += S.str();
6180 Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1),
6181 MDString::get(Context&: M.getContext(), Str: NewValue)};
6182 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6183 Changed = true;
6184 }
6185 }
6186 }
6187
6188 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6189 // If the higher bits are set, it adds new module flag for swift info.
6190 if (ID->getString() == "Objective-C Garbage Collection") {
6191 auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2));
6192 if (Md) {
6193 assert(Md->getValue() && "Expected non-empty metadata");
6194 auto Type = Md->getValue()->getType();
6195 if (Type == Int8Ty)
6196 continue;
6197 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6198 if ((Val & 0xff) != Val) {
6199 HasSwiftVersionFlag = true;
6200 SwiftABIVersion = (Val & 0xff00) >> 8;
6201 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6202 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6203 }
6204 Metadata *Ops[3] = {
6205 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)),
6206 Op->getOperand(I: 1),
6207 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))};
6208 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6209 Changed = true;
6210 }
6211 }
6212
6213 if (ID->getString() == "amdgpu_code_object_version") {
6214 Metadata *Ops[3] = {
6215 Op->getOperand(I: 0),
6216 MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version"),
6217 Op->getOperand(I: 2)};
6218 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6219 Changed = true;
6220 }
6221 }
6222
6223 // "Objective-C Class Properties" is recently added for Objective-C. We
6224 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6225 // flag of value 0, so we can correclty downgrade this flag when trying to
6226 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6227 // this module flag.
6228 if (HasObjCFlag && !HasClassProperties) {
6229 M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties",
6230 Val: (uint32_t)0);
6231 Changed = true;
6232 }
6233
6234 if (HasSwiftVersionFlag) {
6235 M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version",
6236 Val: SwiftABIVersion);
6237 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version",
6238 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion));
6239 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version",
6240 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion));
6241 Changed = true;
6242 }
6243
6244 return Changed;
6245}
6246
6247void llvm::UpgradeSectionAttributes(Module &M) {
6248 auto TrimSpaces = [](StringRef Section) -> std::string {
6249 SmallVector<StringRef, 5> Components;
6250 Section.split(A&: Components, Separator: ',');
6251
6252 SmallString<32> Buffer;
6253 raw_svector_ostream OS(Buffer);
6254
6255 for (auto Component : Components)
6256 OS << ',' << Component.trim();
6257
6258 return std::string(OS.str().substr(Start: 1));
6259 };
6260
6261 for (auto &GV : M.globals()) {
6262 if (!GV.hasSection())
6263 continue;
6264
6265 StringRef Section = GV.getSection();
6266
6267 if (!Section.starts_with(Prefix: "__DATA, __objc_catlist"))
6268 continue;
6269
6270 // __DATA, __objc_catlist, regular, no_dead_strip
6271 // __DATA,__objc_catlist,regular,no_dead_strip
6272 GV.setSection(TrimSpaces(Section));
6273 }
6274}
6275
6276namespace {
6277// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6278// callsites within a function that did not also have the strictfp attribute.
6279// Since 10.0, if strict FP semantics are needed within a function, the
6280// function must have the strictfp attribute and all calls within the function
6281// must also have the strictfp attribute. This latter restriction is
6282// necessary to prevent unwanted libcall simplification when a function is
6283// being cloned (such as for inlining).
6284//
6285// The "dangling" strictfp attribute usage was only used to prevent constant
6286// folding and other libcall simplification. The nobuiltin attribute on the
6287// callsite has the same effect.
6288struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6289 StrictFPUpgradeVisitor() = default;
6290
6291 void visitCallBase(CallBase &Call) {
6292 if (!Call.isStrictFP())
6293 return;
6294 if (isa<ConstrainedFPIntrinsic>(Val: &Call))
6295 return;
6296 // If we get here, the caller doesn't have the strictfp attribute
6297 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6298 Call.removeFnAttr(Kind: Attribute::StrictFP);
6299 Call.addFnAttr(Kind: Attribute::NoBuiltin);
6300 }
6301};
6302
6303/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6304struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6305 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6306 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6307
6308 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6309 if (!RMW.isFloatingPointOperation())
6310 return;
6311
6312 MDNode *Empty = MDNode::get(Context&: RMW.getContext(), MDs: {});
6313 RMW.setMetadata(Kind: "amdgpu.no.fine.grained.host.memory", Node: Empty);
6314 RMW.setMetadata(Kind: "amdgpu.no.remote.memory.access", Node: Empty);
6315 RMW.setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: Empty);
6316 }
6317};
6318} // namespace
6319
6320void llvm::UpgradeFunctionAttributes(Function &F) {
6321 // If a function definition doesn't have the strictfp attribute,
6322 // convert any callsite strictfp attributes to nobuiltin.
6323 if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) {
6324 StrictFPUpgradeVisitor SFPV;
6325 SFPV.visit(F);
6326 }
6327
6328 // Remove all incompatibile attributes from function.
6329 F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(
6330 Ty: F.getReturnType(), AS: F.getAttributes().getRetAttrs()));
6331 for (auto &Arg : F.args())
6332 Arg.removeAttrs(
6333 AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType(), AS: Arg.getAttributes()));
6334
6335 bool AddingAttrs = false, RemovingAttrs = false;
6336 AttrBuilder AttrsToAdd(F.getContext());
6337 AttributeMask AttrsToRemove;
6338
6339 // Older versions of LLVM treated an "implicit-section-name" attribute
6340 // similarly to directly setting the section on a Function.
6341 if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name");
6342 A.isValid() && A.isStringAttribute()) {
6343 F.setSection(A.getValueAsString());
6344 AttrsToRemove.addAttribute(A: "implicit-section-name");
6345 RemovingAttrs = true;
6346 }
6347
6348 if (Attribute A = F.getFnAttribute(Kind: "nooutline");
6349 A.isValid() && A.isStringAttribute()) {
6350 AttrsToRemove.addAttribute(A: "nooutline");
6351 AttrsToAdd.addAttribute(Val: Attribute::NoOutline);
6352 AddingAttrs = RemovingAttrs = true;
6353 }
6354
6355 if (!F.empty()) {
6356 // For some reason this is called twice, and the first time is before any
6357 // instructions are loaded into the body.
6358
6359 if (Attribute A = F.getFnAttribute(Kind: "amdgpu-unsafe-fp-atomics");
6360 A.isValid()) {
6361
6362 if (A.getValueAsBool()) {
6363 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6364 Visitor.visit(F);
6365 }
6366
6367 // We will leave behind dead attribute uses on external declarations, but
6368 // clang never added these to declarations anyway.
6369 AttrsToRemove.addAttribute(A: "amdgpu-unsafe-fp-atomics");
6370 RemovingAttrs = true;
6371 }
6372 }
6373
6374 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6375 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6376
6377 bool HandleDenormalMode = false;
6378
6379 if (Attribute Attr = F.getFnAttribute(Kind: "denormal-fp-math"); Attr.isValid()) {
6380 DenormalMode ParsedMode = parseDenormalFPAttribute(Str: Attr.getValueAsString());
6381 if (ParsedMode.isValid()) {
6382 DenormalFPMath = ParsedMode;
6383 AttrsToRemove.addAttribute(A: "denormal-fp-math");
6384 AddingAttrs = RemovingAttrs = true;
6385 HandleDenormalMode = true;
6386 }
6387 }
6388
6389 if (Attribute Attr = F.getFnAttribute(Kind: "denormal-fp-math-f32");
6390 Attr.isValid()) {
6391 DenormalMode ParsedMode = parseDenormalFPAttribute(Str: Attr.getValueAsString());
6392 if (ParsedMode.isValid()) {
6393 DenormalFPMathF32 = ParsedMode;
6394 AttrsToRemove.addAttribute(A: "denormal-fp-math-f32");
6395 AddingAttrs = RemovingAttrs = true;
6396 HandleDenormalMode = true;
6397 }
6398 }
6399
6400 if (HandleDenormalMode)
6401 AttrsToAdd.addDenormalFPEnvAttr(
6402 Mode: DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6403
6404 if (RemovingAttrs)
6405 F.removeFnAttrs(Attrs: AttrsToRemove);
6406
6407 if (AddingAttrs)
6408 F.addFnAttrs(Attrs: AttrsToAdd);
6409}
6410
6411// Check if the function attribute is not present and set it.
6412static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName,
6413 StringRef Value) {
6414 if (!F.hasFnAttribute(Kind: FnAttrName))
6415 F.addFnAttr(Kind: FnAttrName, Val: Value);
6416}
6417
6418// Check if the function attribute is not present and set it if needed.
6419// If the attribute is "false" then removes it.
6420// If the attribute is "true" resets it to a valueless attribute.
6421static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6422 if (!F.hasFnAttribute(Kind: FnAttrName)) {
6423 if (Set)
6424 F.addFnAttr(Kind: FnAttrName);
6425 } else {
6426 auto A = F.getFnAttribute(Kind: FnAttrName);
6427 if ("false" == A.getValueAsString())
6428 F.removeFnAttr(Kind: FnAttrName);
6429 else if ("true" == A.getValueAsString()) {
6430 F.removeFnAttr(Kind: FnAttrName);
6431 F.addFnAttr(Kind: FnAttrName);
6432 }
6433 }
6434}
6435
6436void llvm::copyModuleAttrToFunctions(Module &M) {
6437 Triple T(M.getTargetTriple());
6438 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6439 return;
6440
6441 uint64_t BTEValue = 0;
6442 uint64_t BPPLRValue = 0;
6443 uint64_t GCSValue = 0;
6444 uint64_t SRAValue = 0;
6445 uint64_t SRAALLValue = 0;
6446 uint64_t SRABKeyValue = 0;
6447
6448 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6449 if (ModFlags) {
6450 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6451 MDNode *Op = ModFlags->getOperand(i: I);
6452 if (Op->getNumOperands() != 3)
6453 continue;
6454
6455 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6456 auto *CI = mdconst::dyn_extract<ConstantInt>(MD: Op->getOperand(I: 2));
6457 if (!ID || !CI)
6458 continue;
6459
6460 StringRef IDStr = ID->getString();
6461 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6462 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6463 : IDStr == "guarded-control-stack" ? &GCSValue
6464 : IDStr == "sign-return-address" ? &SRAValue
6465 : IDStr == "sign-return-address-all" ? &SRAALLValue
6466 : IDStr == "sign-return-address-with-bkey"
6467 ? &SRABKeyValue
6468 : nullptr;
6469 if (!ValPtr)
6470 continue;
6471
6472 *ValPtr = CI->getZExtValue();
6473 if (*ValPtr == 2)
6474 return;
6475 }
6476 }
6477
6478 bool BTE = BTEValue == 1;
6479 bool BPPLR = BPPLRValue == 1;
6480 bool GCS = GCSValue == 1;
6481 bool SRA = SRAValue == 1;
6482
6483 StringRef SignTypeValue = "non-leaf";
6484 if (SRA && SRAALLValue == 1)
6485 SignTypeValue = "all";
6486
6487 StringRef SignKeyValue = "a_key";
6488 if (SRA && SRABKeyValue == 1)
6489 SignKeyValue = "b_key";
6490
6491 for (Function &F : M.getFunctionList()) {
6492 if (F.isDeclaration())
6493 continue;
6494
6495 if (SRA) {
6496 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address", Value: SignTypeValue);
6497 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address-key", Value: SignKeyValue);
6498 } else {
6499 if (auto A = F.getFnAttribute(Kind: "sign-return-address");
6500 A.isValid() && "none" == A.getValueAsString()) {
6501 F.removeFnAttr(Kind: "sign-return-address");
6502 F.removeFnAttr(Kind: "sign-return-address-key");
6503 }
6504 }
6505 ConvertFunctionAttr(F, Set: BTE, FnAttrName: "branch-target-enforcement");
6506 ConvertFunctionAttr(F, Set: BPPLR, FnAttrName: "branch-protection-pauth-lr");
6507 ConvertFunctionAttr(F, Set: GCS, FnAttrName: "guarded-control-stack");
6508 }
6509
6510 if (BTE)
6511 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-target-enforcement", Val: 2);
6512 if (BPPLR)
6513 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-protection-pauth-lr", Val: 2);
6514 if (GCS)
6515 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "guarded-control-stack", Val: 2);
6516 if (SRA) {
6517 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address", Val: 2);
6518 if (SRAALLValue == 1)
6519 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-all", Val: 2);
6520 if (SRABKeyValue == 1)
6521 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-with-bkey", Val: 2);
6522 }
6523}
6524
6525static bool isOldLoopArgument(Metadata *MD) {
6526 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6527 if (!T)
6528 return false;
6529 if (T->getNumOperands() < 1)
6530 return false;
6531 auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6532 if (!S)
6533 return false;
6534 return S->getString().starts_with(Prefix: "llvm.vectorizer.");
6535}
6536
6537static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
6538 StringRef OldPrefix = "llvm.vectorizer.";
6539 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6540
6541 if (OldTag == "llvm.vectorizer.unroll")
6542 return MDString::get(Context&: C, Str: "llvm.loop.interleave.count");
6543
6544 return MDString::get(
6545 Context&: C, Str: (Twine("llvm.loop.vectorize.") + OldTag.drop_front(N: OldPrefix.size()))
6546 .str());
6547}
6548
6549static Metadata *upgradeLoopArgument(Metadata *MD) {
6550 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6551 if (!T)
6552 return MD;
6553 if (T->getNumOperands() < 1)
6554 return MD;
6555 auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6556 if (!OldTag)
6557 return MD;
6558 if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer."))
6559 return MD;
6560
6561 // This has an old tag. Upgrade it.
6562 SmallVector<Metadata *, 8> Ops;
6563 Ops.reserve(N: T->getNumOperands());
6564 Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString()));
6565 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6566 Ops.push_back(Elt: T->getOperand(I));
6567
6568 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6569}
6570
6571MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
6572 auto *T = dyn_cast<MDTuple>(Val: &N);
6573 if (!T)
6574 return &N;
6575
6576 if (none_of(Range: T->operands(), P: isOldLoopArgument))
6577 return &N;
6578
6579 SmallVector<Metadata *, 8> Ops;
6580 Ops.reserve(N: T->getNumOperands());
6581 for (Metadata *MD : T->operands())
6582 Ops.push_back(Elt: upgradeLoopArgument(MD));
6583
6584 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6585}
6586
6587std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
6588 Triple T(TT);
6589 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6590 // the address space of globals to 1. This does not apply to SPIRV Logical.
6591 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6592 !DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G")) {
6593 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6594 }
6595
6596 if (T.isLoongArch64() || T.isRISCV64()) {
6597 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6598 auto I = DL.find(Str: "-n64-");
6599 if (I != StringRef::npos)
6600 return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str();
6601 return DL.str();
6602 }
6603
6604 // AMDGPU data layout upgrades.
6605 std::string Res = DL.str();
6606 if (T.isAMDGPU()) {
6607 // Define address spaces for constants.
6608 if (!DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G"))
6609 Res.append(s: Res.empty() ? "G1" : "-G1");
6610
6611 // AMDGCN data layout upgrades.
6612 if (T.isAMDGCN()) {
6613
6614 // Add missing non-integral declarations.
6615 // This goes before adding new address spaces to prevent incoherent string
6616 // values.
6617 if (!DL.contains(Other: "-ni") && !DL.starts_with(Prefix: "ni"))
6618 Res.append(s: "-ni:7:8:9");
6619 // Update ni:7 to ni:7:8:9.
6620 if (DL.ends_with(Suffix: "ni:7"))
6621 Res.append(s: ":8:9");
6622 if (DL.ends_with(Suffix: "ni:7:8"))
6623 Res.append(s: ":9");
6624
6625 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6626 // resources) An empty data layout has already been upgraded to G1 by now.
6627 if (!DL.contains(Other: "-p7") && !DL.starts_with(Prefix: "p7"))
6628 Res.append(s: "-p7:160:256:256:32");
6629 if (!DL.contains(Other: "-p8") && !DL.starts_with(Prefix: "p8"))
6630 Res.append(s: "-p8:128:128:128:48");
6631 constexpr StringRef OldP8("-p8:128:128-");
6632 if (DL.contains(Other: OldP8))
6633 Res.replace(pos: Res.find(svt: OldP8), n1: OldP8.size(), s: "-p8:128:128:128:48-");
6634 if (!DL.contains(Other: "-p9") && !DL.starts_with(Prefix: "p9"))
6635 Res.append(s: "-p9:192:256:256:32");
6636 }
6637
6638 // Upgrade the ELF mangling mode.
6639 if (!DL.contains(Other: "m:e"))
6640 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6641
6642 return Res;
6643 }
6644
6645 if (T.isSystemZ() && !DL.empty()) {
6646 // Make sure the stack alignment is present.
6647 if (!DL.contains(Other: "-S64"))
6648 return "E-S64" + DL.drop_front(N: 1).str();
6649 return DL.str();
6650 }
6651
6652 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6653 // If the datalayout matches the expected format, add pointer size address
6654 // spaces to the datalayout.
6655 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6656 if (!DL.contains(Other: AddrSpaces)) {
6657 SmallVector<StringRef, 4> Groups;
6658 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6659 if (R.match(String: Res, Matches: &Groups))
6660 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6661 }
6662 };
6663
6664 // AArch64 data layout upgrades.
6665 if (T.isAArch64()) {
6666 // Add "-Fn32"
6667 if (!DL.empty() && !DL.contains(Other: "-Fn32"))
6668 Res.append(s: "-Fn32");
6669 AddPtr32Ptr64AddrSpaces();
6670 return Res;
6671 }
6672
6673 if (T.isSPARC() || (T.isMIPS64() && !DL.contains(Other: "m:m")) || T.isPPC64() ||
6674 T.isWasm()) {
6675 // Mips64 with o32 ABI did not add "-i128:128".
6676 // Add "-i128:128"
6677 std::string I64 = "-i64:64";
6678 std::string I128 = "-i128:128";
6679 if (!StringRef(Res).contains(Other: I128)) {
6680 size_t Pos = Res.find(str: I64);
6681 if (Pos != size_t(-1))
6682 Res.insert(pos1: Pos + I64.size(), str: I128);
6683 }
6684 }
6685
6686 if (T.isPPC() && T.isOSAIX() && !DL.contains(Other: "f64:32:64") && !DL.empty()) {
6687 size_t Pos = Res.find(s: "-S128");
6688 if (Pos == StringRef::npos)
6689 Pos = Res.size();
6690 Res.insert(pos: Pos, s: "-f64:32:64");
6691 }
6692
6693 if (!T.isX86())
6694 return Res;
6695
6696 AddPtr32Ptr64AddrSpaces();
6697
6698 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6699 // for i128 operations prior to this being reflected in the data layout, and
6700 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6701 // boundaries, so although this is a breaking change, the upgrade is expected
6702 // to fix more IR than it breaks.
6703 // Intel MCU is an exception and uses 4-byte-alignment.
6704 if (!T.isOSIAMCU()) {
6705 std::string I128 = "-i128:128";
6706 if (StringRef Ref = Res; !Ref.contains(Other: I128)) {
6707 SmallVector<StringRef, 4> Groups;
6708 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6709 if (R.match(String: Res, Matches: &Groups))
6710 Res = (Groups[1] + I128 + Groups[3]).str();
6711 }
6712 }
6713
6714 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6715 // Raising the alignment is safe because Clang did not produce f80 values in
6716 // the MSVC environment before this upgrade was added.
6717 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6718 StringRef Ref = Res;
6719 auto I = Ref.find(Str: "-f80:32-");
6720 if (I != StringRef::npos)
6721 Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str();
6722 }
6723
6724 return Res;
6725}
6726
6727void llvm::UpgradeAttributes(AttrBuilder &B) {
6728 StringRef FramePointer;
6729 Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim");
6730 if (A.isValid()) {
6731 // The value can be "true" or "false".
6732 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6733 B.removeAttribute(A: "no-frame-pointer-elim");
6734 }
6735 if (B.contains(A: "no-frame-pointer-elim-non-leaf")) {
6736 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6737 if (FramePointer != "all")
6738 FramePointer = "non-leaf";
6739 B.removeAttribute(A: "no-frame-pointer-elim-non-leaf");
6740 }
6741 if (!FramePointer.empty())
6742 B.addAttribute(A: "frame-pointer", V: FramePointer);
6743
6744 A = B.getAttribute(Kind: "null-pointer-is-valid");
6745 if (A.isValid()) {
6746 // The value can be "true" or "false".
6747 bool NullPointerIsValid = A.getValueAsString() == "true";
6748 B.removeAttribute(A: "null-pointer-is-valid");
6749 if (NullPointerIsValid)
6750 B.addAttribute(Val: Attribute::NullPointerIsValid);
6751 }
6752}
6753
6754void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6755 // clang.arc.attachedcall bundles are now required to have an operand.
6756 // If they don't, it's okay to drop them entirely: when there is an operand,
6757 // the "attachedcall" is meaningful and required, but without an operand,
6758 // it's just a marker NOP. Dropping it merely prevents an optimization.
6759 erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) {
6760 return OBD.getTag() == "clang.arc.attachedcall" &&
6761 OBD.inputs().empty();
6762 });
6763}
6764