1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/BinaryFormat/Dwarf.h"
21#include "llvm/IR/AttributeMask.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
26#include "llvm/IR/DebugInfoMetadata.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/IntrinsicInst.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
47#include "llvm/Support/AMDGPUAddrSpace.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/NVPTXAddrSpace.h"
51#include "llvm/Support/Regex.h"
52#include "llvm/Support/TimeProfiler.h"
53#include "llvm/TargetParser/Triple.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
68static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(i: 0);
73 if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(GV: F);
78 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
84static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 i: F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(Bitwidth: 32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(GV: F);
94 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
100static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(GV: F);
107 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
113static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID,
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(Bitwidth: 8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(Bitwidth: 8))
122 return false;
123
124 rename(GV: F);
125 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
131static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID,
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(i: 1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(i: 2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Val: Arg1Type)->getElementType()->isIntegerTy(Bitwidth: 16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Val: Arg2Type)->getElementType()->isIntegerTy(Bitwidth: 16))
140 return false;
141
142 rename(GV: F);
143 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
144 return true;
145}
146
147static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(GV: F);
153 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
154 return true;
155}
156
157static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(GV: F);
163 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
164 return true;
165}
166
167static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front(Prefix: "avx."))
174 return (Name.starts_with(Prefix: "blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with(Prefix: "movnt.") || // Added in 3.2
179 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
180 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
181 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 3.5
182 Name.starts_with(Prefix: "vbroadcastf128") || // Added in 4.0
183 Name.starts_with(Prefix: "vextractf128.") || // Added in 3.7
184 Name.starts_with(Prefix: "vinsertf128.") || // Added in 3.7
185 Name.starts_with(Prefix: "vperm2f128.") || // Added in 6.0
186 Name.starts_with(Prefix: "vpermil.")); // Added in 3.1
187
188 if (Name.consume_front(Prefix: "avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
191 Name.starts_with(Prefix: "padds.") || // Added in 8.0
192 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
193 Name.starts_with(Prefix: "pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.8
196 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
197 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
198 Name.starts_with(Prefix: "pmax") || // Added in 3.9
199 Name.starts_with(Prefix: "pmin") || // Added in 3.9
200 Name.starts_with(Prefix: "pmovsx") || // Added in 3.9
201 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
205 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
206 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
207 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
208 Name.starts_with(Prefix: "vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front(Prefix: "avx512.")) {
215 if (Name.consume_front(Prefix: "mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with(Prefix: "add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with(Prefix: "and.") || // Added in 3.9
219 Name.starts_with(Prefix: "andn.") || // Added in 3.9
220 Name.starts_with(Prefix: "broadcast.s") || // Added in 3.9
221 Name.starts_with(Prefix: "broadcastf32x4.") || // Added in 6.0
222 Name.starts_with(Prefix: "broadcastf32x8.") || // Added in 6.0
223 Name.starts_with(Prefix: "broadcastf64x2.") || // Added in 6.0
224 Name.starts_with(Prefix: "broadcastf64x4.") || // Added in 6.0
225 Name.starts_with(Prefix: "broadcasti32x4.") || // Added in 6.0
226 Name.starts_with(Prefix: "broadcasti32x8.") || // Added in 6.0
227 Name.starts_with(Prefix: "broadcasti64x2.") || // Added in 6.0
228 Name.starts_with(Prefix: "broadcasti64x4.") || // Added in 6.0
229 Name.starts_with(Prefix: "cmp.b") || // Added in 5.0
230 Name.starts_with(Prefix: "cmp.d") || // Added in 5.0
231 Name.starts_with(Prefix: "cmp.q") || // Added in 5.0
232 Name.starts_with(Prefix: "cmp.w") || // Added in 5.0
233 Name.starts_with(Prefix: "compress.b") || // Added in 9.0
234 Name.starts_with(Prefix: "compress.d") || // Added in 9.0
235 Name.starts_with(Prefix: "compress.p") || // Added in 9.0
236 Name.starts_with(Prefix: "compress.q") || // Added in 9.0
237 Name.starts_with(Prefix: "compress.store.") || // Added in 7.0
238 Name.starts_with(Prefix: "compress.w") || // Added in 9.0
239 Name.starts_with(Prefix: "conflict.") || // Added in 9.0
240 Name.starts_with(Prefix: "cvtdq2pd.") || // Added in 4.0
241 Name.starts_with(Prefix: "cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with(Prefix: "cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with(Prefix: "cvtudq2pd.") || // Added in 4.0
253 Name.starts_with(Prefix: "cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with(Prefix: "cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with(Prefix: "dbpsadbw.") || // Added in 7.0
258 Name.starts_with(Prefix: "div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with(Prefix: "expand.b") || // Added in 9.0
260 Name.starts_with(Prefix: "expand.d") || // Added in 9.0
261 Name.starts_with(Prefix: "expand.load.") || // Added in 7.0
262 Name.starts_with(Prefix: "expand.p") || // Added in 9.0
263 Name.starts_with(Prefix: "expand.q") || // Added in 9.0
264 Name.starts_with(Prefix: "expand.w") || // Added in 9.0
265 Name.starts_with(Prefix: "fpclass.p") || // Added in 7.0
266 Name.starts_with(Prefix: "insert") || // Added in 4.0
267 Name.starts_with(Prefix: "load.") || // Added in 3.9
268 Name.starts_with(Prefix: "loadu.") || // Added in 3.9
269 Name.starts_with(Prefix: "lzcnt.") || // Added in 5.0
270 Name.starts_with(Prefix: "max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with(Prefix: "min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with(Prefix: "movddup") || // Added in 3.9
273 Name.starts_with(Prefix: "move.s") || // Added in 4.0
274 Name.starts_with(Prefix: "movshdup") || // Added in 3.9
275 Name.starts_with(Prefix: "movsldup") || // Added in 3.9
276 Name.starts_with(Prefix: "mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with(Prefix: "or.") || // Added in 3.9
278 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
279 Name.starts_with(Prefix: "packssdw.") || // Added in 5.0
280 Name.starts_with(Prefix: "packsswb.") || // Added in 5.0
281 Name.starts_with(Prefix: "packusdw.") || // Added in 5.0
282 Name.starts_with(Prefix: "packuswb.") || // Added in 5.0
283 Name.starts_with(Prefix: "padd.") || // Added in 4.0
284 Name.starts_with(Prefix: "padds.") || // Added in 8.0
285 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
286 Name.starts_with(Prefix: "palignr.") || // Added in 3.9
287 Name.starts_with(Prefix: "pand.") || // Added in 3.9
288 Name.starts_with(Prefix: "pandn.") || // Added in 3.9
289 Name.starts_with(Prefix: "pavg") || // Added in 6.0
290 Name.starts_with(Prefix: "pbroadcast") || // Added in 6.0
291 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.9
292 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.9
293 Name.starts_with(Prefix: "perm.df.") || // Added in 3.9
294 Name.starts_with(Prefix: "perm.di.") || // Added in 3.9
295 Name.starts_with(Prefix: "permvar.") || // Added in 7.0
296 Name.starts_with(Prefix: "pmaddubs.w.") || // Added in 7.0
297 Name.starts_with(Prefix: "pmaddw.d.") || // Added in 7.0
298 Name.starts_with(Prefix: "pmax") || // Added in 4.0
299 Name.starts_with(Prefix: "pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with(Prefix: "pmovsx") || // Added in 4.0
305 Name.starts_with(Prefix: "pmovzx") || // Added in 4.0
306 Name.starts_with(Prefix: "pmul.dq.") || // Added in 4.0
307 Name.starts_with(Prefix: "pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with(Prefix: "pmulh.w.") || // Added in 7.0
309 Name.starts_with(Prefix: "pmulhu.w.") || // Added in 7.0
310 Name.starts_with(Prefix: "pmull.") || // Added in 4.0
311 Name.starts_with(Prefix: "pmultishift.qb.") || // Added in 8.0
312 Name.starts_with(Prefix: "pmulu.dq.") || // Added in 4.0
313 Name.starts_with(Prefix: "por.") || // Added in 3.9
314 Name.starts_with(Prefix: "prol.") || // Added in 8.0
315 Name.starts_with(Prefix: "prolv.") || // Added in 8.0
316 Name.starts_with(Prefix: "pror.") || // Added in 8.0
317 Name.starts_with(Prefix: "prorv.") || // Added in 8.0
318 Name.starts_with(Prefix: "pshuf.b.") || // Added in 4.0
319 Name.starts_with(Prefix: "pshuf.d.") || // Added in 3.9
320 Name.starts_with(Prefix: "pshufh.w.") || // Added in 3.9
321 Name.starts_with(Prefix: "pshufl.w.") || // Added in 3.9
322 Name.starts_with(Prefix: "psll.d") || // Added in 4.0
323 Name.starts_with(Prefix: "psll.q") || // Added in 4.0
324 Name.starts_with(Prefix: "psll.w") || // Added in 4.0
325 Name.starts_with(Prefix: "pslli") || // Added in 4.0
326 Name.starts_with(Prefix: "psllv") || // Added in 4.0
327 Name.starts_with(Prefix: "psra.d") || // Added in 4.0
328 Name.starts_with(Prefix: "psra.q") || // Added in 4.0
329 Name.starts_with(Prefix: "psra.w") || // Added in 4.0
330 Name.starts_with(Prefix: "psrai") || // Added in 4.0
331 Name.starts_with(Prefix: "psrav") || // Added in 4.0
332 Name.starts_with(Prefix: "psrl.d") || // Added in 4.0
333 Name.starts_with(Prefix: "psrl.q") || // Added in 4.0
334 Name.starts_with(Prefix: "psrl.w") || // Added in 4.0
335 Name.starts_with(Prefix: "psrli") || // Added in 4.0
336 Name.starts_with(Prefix: "psrlv") || // Added in 4.0
337 Name.starts_with(Prefix: "psub.") || // Added in 4.0
338 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
339 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
340 Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
341 Name.starts_with(Prefix: "punpckh") || // Added in 3.9
342 Name.starts_with(Prefix: "punpckl") || // Added in 3.9
343 Name.starts_with(Prefix: "pxor.") || // Added in 3.9
344 Name.starts_with(Prefix: "shuf.f") || // Added in 6.0
345 Name.starts_with(Prefix: "shuf.i") || // Added in 6.0
346 Name.starts_with(Prefix: "shuf.p") || // Added in 4.0
347 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
348 Name.starts_with(Prefix: "store.b.") || // Added in 3.9
349 Name.starts_with(Prefix: "store.d.") || // Added in 3.9
350 Name.starts_with(Prefix: "store.p") || // Added in 3.9
351 Name.starts_with(Prefix: "store.q.") || // Added in 3.9
352 Name.starts_with(Prefix: "store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
355 Name.starts_with(Prefix: "sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with(Prefix: "ucmp.") || // Added in 5.0
357 Name.starts_with(Prefix: "unpckh.") || // Added in 3.9
358 Name.starts_with(Prefix: "unpckl.") || // Added in 3.9
359 Name.starts_with(Prefix: "valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with(Prefix: "vextract") || // Added in 4.0
363 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
364 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
365 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
366 Name.starts_with(Prefix: "vfnmsub.") || // Added in 7.0
367 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
368 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
369 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
370 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
371 Name.starts_with(Prefix: "vpermi2var.") || // Added in 7.0
372 Name.starts_with(Prefix: "vpermil.p") || // Added in 3.9
373 Name.starts_with(Prefix: "vpermilvar.") || // Added in 4.0
374 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
375 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
376 Name.starts_with(Prefix: "vpshld.") || // Added in 7.0
377 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
378 Name.starts_with(Prefix: "vpshrd.") || // Added in 7.0
379 Name.starts_with(Prefix: "vpshrdv.") || // Added in 8.0
380 Name.starts_with(Prefix: "vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with(Prefix: "xor.")); // Added in 3.9
382
383 if (Name.consume_front(Prefix: "mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
386 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
387 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
388 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
389 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front(Prefix: "maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
394 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
395 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
396 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
397 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
398 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
399 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
400 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
401 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
402 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
403 Name.starts_with(Prefix: "vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with(Prefix: "broadcastm") || // Added in 6.0
410 Name.starts_with(Prefix: "cmp.p") || // Added in 12.0
411 Name.starts_with(Prefix: "cvtb2mask.") || // Added in 7.0
412 Name.starts_with(Prefix: "cvtd2mask.") || // Added in 7.0
413 Name.starts_with(Prefix: "cvtmask2") || // Added in 5.0
414 Name.starts_with(Prefix: "cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with(Prefix: "cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with(Prefix: "kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with(Prefix: "padds.") || // Added in 8.0
427 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.9
428 Name.starts_with(Prefix: "prol") || // Added in 8.0
429 Name.starts_with(Prefix: "pror") || // Added in 8.0
430 Name.starts_with(Prefix: "psll.dq") || // Added in 3.9
431 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.9
432 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
433 Name.starts_with(Prefix: "ptestm") || // Added in 6.0
434 Name.starts_with(Prefix: "ptestnm") || // Added in 6.0
435 Name.starts_with(Prefix: "storent.") || // Added in 3.9
436 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 7.0
437 Name.starts_with(Prefix: "vpshld.") || // Added in 8.0
438 Name.starts_with(Prefix: "vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front(Prefix: "fma."))
442 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
443 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
444 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
445 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
446 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front(Prefix: "fma4."))
449 return Name.starts_with(Prefix: "vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front(Prefix: "sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front(Prefix: "sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with(Prefix: "padds.") || // Added in 8.0
473 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
474 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
475 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with(Prefix: "pshuf") || // Added in 3.9
482 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
483 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
484 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
485 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
486 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front(Prefix: "sse41."))
493 return (Name.starts_with(Prefix: "blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with(Prefix: "pmovsx") || // Added in 3.8
505 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front(Prefix: "sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front(Prefix: "sse4a."))
512 return Name.starts_with(Prefix: "movnt."); // Added in 3.9
513
514 if (Name.consume_front(Prefix: "ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front(Prefix: "xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with(Prefix: "vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with(Prefix: "vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with(Prefix: "vcvtph2ps.")); // Added in 11.0
532}
533
534static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front(Prefix: "x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(GV: F);
551 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
552 id: Intrinsic::x86_rdtscp);
553 return true;
554 }
555
556 Intrinsic::ID ID;
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front(Prefix: "sse41.ptest")) { // Added in 3.2
560 ID = StringSwitch<Intrinsic::ID>(Name)
561 .Case(S: "c", Value: Intrinsic::x86_sse41_ptestc)
562 .Case(S: "z", Value: Intrinsic::x86_sse41_ptestz)
563 .Case(S: "nzc", Value: Intrinsic::x86_sse41_ptestnzc)
564 .Default(Value: Intrinsic::not_intrinsic);
565 if (ID != Intrinsic::not_intrinsic)
566 return upgradePTESTIntrinsic(F, IID: ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
575 ID = StringSwitch<Intrinsic::ID>(Name)
576 .Case(S: "sse41.insertps", Value: Intrinsic::x86_sse41_insertps)
577 .Case(S: "sse41.dppd", Value: Intrinsic::x86_sse41_dppd)
578 .Case(S: "sse41.dpps", Value: Intrinsic::x86_sse41_dpps)
579 .Case(S: "sse41.mpsadbw", Value: Intrinsic::x86_sse41_mpsadbw)
580 .Case(S: "avx.dp.ps.256", Value: Intrinsic::x86_avx_dp_ps_256)
581 .Case(S: "avx2.mpsadbw", Value: Intrinsic::x86_avx2_mpsadbw)
582 .Default(Value: Intrinsic::not_intrinsic);
583 if (ID != Intrinsic::not_intrinsic)
584 return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn);
585
586 if (Name.consume_front(Prefix: "avx512.")) {
587 if (Name.consume_front(Prefix: "mask.cmp.")) {
588 // Added in 7.0
589 ID = StringSwitch<Intrinsic::ID>(Name)
590 .Case(S: "pd.128", Value: Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case(S: "pd.256", Value: Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case(S: "pd.512", Value: Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case(S: "ps.128", Value: Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case(S: "ps.256", Value: Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case(S: "ps.512", Value: Intrinsic::x86_avx512_mask_cmp_ps_512)
596 .Default(Value: Intrinsic::not_intrinsic);
597 if (ID != Intrinsic::not_intrinsic)
598 return upgradeX86MaskedFPCompare(F, IID: ID, NewFn);
599 } else if (Name.starts_with(Prefix: "vpdpbusd.") ||
600 Name.starts_with(Prefix: "vpdpbusds.")) {
601 // Added in 21.1
602 ID = StringSwitch<Intrinsic::ID>(Name)
603 .Case(S: "vpdpbusd.128", Value: Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case(S: "vpdpbusd.256", Value: Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case(S: "vpdpbusd.512", Value: Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case(S: "vpdpbusds.128", Value: Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case(S: "vpdpbusds.256", Value: Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case(S: "vpdpbusds.512", Value: Intrinsic::x86_avx512_vpdpbusds_512)
609 .Default(Value: Intrinsic::not_intrinsic);
610 if (ID != Intrinsic::not_intrinsic)
611 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
612 } else if (Name.starts_with(Prefix: "vpdpwssd.") ||
613 Name.starts_with(Prefix: "vpdpwssds.")) {
614 // Added in 21.1
615 ID = StringSwitch<Intrinsic::ID>(Name)
616 .Case(S: "vpdpwssd.128", Value: Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case(S: "vpdpwssd.256", Value: Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case(S: "vpdpwssd.512", Value: Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case(S: "vpdpwssds.128", Value: Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case(S: "vpdpwssds.256", Value: Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case(S: "vpdpwssds.512", Value: Intrinsic::x86_avx512_vpdpwssds_512)
622 .Default(Value: Intrinsic::not_intrinsic);
623 if (ID != Intrinsic::not_intrinsic)
624 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front(Prefix: "avx2.")) {
630 if (Name.consume_front(Prefix: "vpdpb")) {
631 // Added in 21.1
632 ID = StringSwitch<Intrinsic::ID>(Name)
633 .Case(S: "ssd.128", Value: Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case(S: "ssd.256", Value: Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case(S: "ssds.128", Value: Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case(S: "ssds.256", Value: Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpbuuds_256)
645 .Default(Value: Intrinsic::not_intrinsic);
646 if (ID != Intrinsic::not_intrinsic)
647 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
648 } else if (Name.consume_front(Prefix: "vpdpw")) {
649 // Added in 21.1
650 ID = StringSwitch<Intrinsic::ID>(Name)
651 .Case(S: "sud.128", Value: Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case(S: "sud.256", Value: Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case(S: "suds.128", Value: Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case(S: "suds.256", Value: Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case(S: "usd.128", Value: Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case(S: "usd.256", Value: Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case(S: "usds.128", Value: Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case(S: "usds.256", Value: Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case(S: "uud.128", Value: Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case(S: "uud.256", Value: Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case(S: "uuds.128", Value: Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case(S: "uuds.256", Value: Intrinsic::x86_avx2_vpdpwuuds_256)
663 .Default(Value: Intrinsic::not_intrinsic);
664 if (ID != Intrinsic::not_intrinsic)
665 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front(Prefix: "avx10.")) {
671 if (Name.consume_front(Prefix: "vpdpb")) {
672 // Added in 21.1
673 ID = StringSwitch<Intrinsic::ID>(Name)
674 .Case(S: "ssd.512", Value: Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case(S: "ssds.512", Value: Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpbuuds_512)
680 .Default(Value: Intrinsic::not_intrinsic);
681 if (ID != Intrinsic::not_intrinsic)
682 return upgradeX86MultiplyAddBytes(F, IID: ID, NewFn);
683 } else if (Name.consume_front(Prefix: "vpdpw")) {
684 ID = StringSwitch<Intrinsic::ID>(Name)
685 .Case(S: "sud.512", Value: Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case(S: "suds.512", Value: Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case(S: "usd.512", Value: Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case(S: "usds.512", Value: Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case(S: "uud.512", Value: Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case(S: "uuds.512", Value: Intrinsic::x86_avx10_vpdpwuuds_512)
691 .Default(Value: Intrinsic::not_intrinsic);
692 if (ID != Intrinsic::not_intrinsic)
693 return upgradeX86MultiplyAddWords(F, IID: ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front(Prefix: "avx512bf16.")) {
699 // Added in 9.0
700 ID = StringSwitch<Intrinsic::ID>(Name)
701 .Case(S: "cvtne2ps2bf16.128",
702 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case(S: "cvtne2ps2bf16.256",
704 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case(S: "cvtne2ps2bf16.512",
706 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case(S: "mask.cvtneps2bf16.128",
708 Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case(S: "cvtneps2bf16.256",
710 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case(S: "cvtneps2bf16.512",
712 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
713 .Default(Value: Intrinsic::not_intrinsic);
714 if (ID != Intrinsic::not_intrinsic)
715 return upgradeX86BF16Intrinsic(F, IID: ID, NewFn);
716
717 // Added in 9.0
718 ID = StringSwitch<Intrinsic::ID>(Name)
719 .Case(S: "dpbf16ps.128", Value: Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case(S: "dpbf16ps.256", Value: Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case(S: "dpbf16ps.512", Value: Intrinsic::x86_avx512bf16_dpbf16ps_512)
722 .Default(Value: Intrinsic::not_intrinsic);
723 if (ID != Intrinsic::not_intrinsic)
724 return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front(Prefix: "xop.")) {
729 Intrinsic::ID ID = Intrinsic::not_intrinsic;
730 if (Name.starts_with(Prefix: "vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(i: 2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
748 ID = StringSwitch<Intrinsic::ID>(Name)
749 .Case(S: "vfrcz.ss", Value: Intrinsic::x86_xop_vfrcz_ss)
750 .Case(S: "vfrcz.sd", Value: Intrinsic::x86_xop_vfrcz_sd)
751 .Default(Value: Intrinsic::not_intrinsic);
752
753 if (ID != Intrinsic::not_intrinsic) {
754 rename(GV: F);
755 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
763 id: Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
772static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with(Prefix: "rbit")) {
776 // '(arm|aarch64).rbit'.
777 NewFn = Intrinsic::getOrInsertDeclaration(
778 M: F->getParent(), id: Intrinsic::bitreverse, Tys: F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
784 NewFn = Intrinsic::getOrInsertDeclaration(
785 M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front(Prefix: "neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front(Prefix: "bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
796 Intrinsic::ID ID =
797 StringSwitch<Intrinsic::ID>(Name)
798 .Cases(CaseStrings: {"v2f32.v8i8", "v4f32.v16i8"},
799 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
801 .Default(Value: Intrinsic::not_intrinsic);
802 if (ID != Intrinsic::not_intrinsic) {
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 ._M_elems: {F->getReturnType(),
809 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front(Prefix: "bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(Suffix: ".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
822 Intrinsic::ID ID =
823 StringSwitch<Intrinsic::ID>(Name)
824 .Case(S: "mla",
825 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case(S: "lalb",
828 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case(S: "lalt",
831 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
833 .Default(Value: Intrinsic::not_intrinsic);
834 if (ID != Intrinsic::not_intrinsic) {
835 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
850 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
851 .StartsWith(S: "vclz.", Value: Intrinsic::ctlz)
852 .StartsWith(S: "vcnt.", Value: Intrinsic::ctpop)
853 .StartsWith(S: "vqadds.", Value: Intrinsic::sadd_sat)
854 .StartsWith(S: "vqaddu.", Value: Intrinsic::uadd_sat)
855 .StartsWith(S: "vqsubs.", Value: Intrinsic::ssub_sat)
856 .StartsWith(S: "vqsubu.", Value: Intrinsic::usub_sat)
857 .StartsWith(S: "vrinta.", Value: Intrinsic::round)
858 .StartsWith(S: "vrintn.", Value: Intrinsic::roundeven)
859 .StartsWith(S: "vrintm.", Value: Intrinsic::floor)
860 .StartsWith(S: "vrintp.", Value: Intrinsic::ceil)
861 .StartsWith(S: "vrintx.", Value: Intrinsic::rint)
862 .StartsWith(S: "vrintz.", Value: Intrinsic::trunc)
863 .Default(Value: Intrinsic::not_intrinsic);
864 if (ID != Intrinsic::not_intrinsic) {
865 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
866 Tys: F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front(Prefix: "vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
873 SmallVector<StringRef, 2> Groups;
874 if (vstRegex.match(String: Name, Matches: &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
886 NewFn = Intrinsic::getOrInsertDeclaration(
887 M: F->getParent(), id: StoreInts[fArgs.size() - 3], Tys);
888 else
889 NewFn = Intrinsic::getOrInsertDeclaration(
890 M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front(Prefix: "mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(GV: F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with(Prefix: "vrintn.v")) {
912 NewFn = Intrinsic::getOrInsertDeclaration(
913 M: F->getParent(), id: Intrinsic::roundeven, Tys: F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(Suffix: ".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(Suffix: ".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front(Prefix: "vldr.gather.");
927 if (IsGather || Name.consume_front(Prefix: "vstr.scatter.")) {
928 if (Name.consume_front(Prefix: "base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front(Prefix: "wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front(Prefix: "offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front(Prefix: "cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
964 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
965 .StartsWith(S: "frintn", Value: Intrinsic::roundeven)
966 .StartsWith(S: "rbit", Value: Intrinsic::bitreverse)
967 .Default(Value: Intrinsic::not_intrinsic);
968 if (ID != Intrinsic::not_intrinsic) {
969 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
970 Tys: F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with(Prefix: "addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
980 NewFn = Intrinsic::getOrInsertDeclaration(
981 M: F->getParent(), id: Intrinsic::aarch64_neon_faddp, Tys: Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with(Prefix: "bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front(Prefix: "sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front(Prefix: "bf")) {
997 if (Name.consume_back(Suffix: ".lane")) {
998 // 'aarch64.sve.bf*.lane'.
999 Intrinsic::ID ID =
1000 StringSwitch<Intrinsic::ID>(Name)
1001 .Case(S: "dot", Value: Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case(S: "mlalb", Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case(S: "mlalt", Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1004 .Default(Value: Intrinsic::not_intrinsic);
1005 if (ID != Intrinsic::not_intrinsic) {
1006 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front(Prefix: "addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1027 NewFn = Intrinsic::getOrInsertDeclaration(
1028 M: F->getParent(), id: Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front(Prefix: "ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(String: Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(Val: F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ElementType: ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1047 id: LoadIDs[Name[0] - '2'], Tys: Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front(Prefix: "tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with(Prefix: "get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1058 NewFn = Intrinsic::getOrInsertDeclaration(
1059 M: F->getParent(), id: Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with(Prefix: "set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1067 NewFn = Intrinsic::getOrInsertDeclaration(
1068 M: F->getParent(), id: Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(String: Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1077 NewFn = Intrinsic::getOrInsertDeclaration(
1078 M: F->getParent(), id: Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with(Prefix: "rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1086 NewFn = Intrinsic::getOrInsertDeclaration(
1087 M: F->getParent(), id: Intrinsic::vector_reverse, Tys: F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1097static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
1098 StringRef Name) {
1099 if (Name.consume_front(Prefix: "cp.async.bulk.tensor.g2s.")) {
1100 Intrinsic::ID ID =
1101 StringSwitch<Intrinsic::ID>(Name)
1102 .Case(S: "im2col.3d",
1103 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case(S: "im2col.4d",
1105 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case(S: "im2col.5d",
1107 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case(S: "tile.1d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case(S: "tile.2d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case(S: "tile.3d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case(S: "tile.4d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case(S: "tile.5d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1113 .Default(Value: Intrinsic::not_intrinsic);
1114
1115 if (ID == Intrinsic::not_intrinsic)
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1122 NVPTXAS::ADDRESS_SPACE_SHARED)
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(i: FlagStartIndex);
1137 if (!ArgType->isIntegerTy(Bitwidth: 1))
1138 return ID;
1139 }
1140
1141 return Intrinsic::not_intrinsic;
1142}
1143
1144static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1145 StringRef Name) {
1146 if (Name.consume_front(Prefix: "mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1148 NVPTXAS::ADDRESS_SPACE_SHARED)
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front(Prefix: "cp.async.bulk.")) {
1152 Intrinsic::ID ID =
1153 StringSwitch<Intrinsic::ID>(Name)
1154 .Case(S: "global.to.shared.cluster",
1155 Value: Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case(S: "shared.cta.to.cluster",
1157 Value: Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1158 .Default(Value: Intrinsic::not_intrinsic);
1159
1160 if (ID != Intrinsic::not_intrinsic)
1161 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1162 NVPTXAS::ADDRESS_SPACE_SHARED)
1163 return ID;
1164 }
1165
1166 return Intrinsic::not_intrinsic;
1167}
1168
1169static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1170 if (Name.consume_front(Prefix: "fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case(S: "bf16", Value: Intrinsic::nvvm_fma_rn_bf16)
1173 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case(S: "relu.bf16", Value: Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .Case(S: "relu.bf16x2", Value: Intrinsic::nvvm_fma_rn_relu_bf16x2)
1176 .Default(Value: Intrinsic::not_intrinsic);
1177
1178 if (Name.consume_front(Prefix: "fmax."))
1179 return StringSwitch<Intrinsic::ID>(Name)
1180 .Case(S: "bf16", Value: Intrinsic::nvvm_fmax_bf16)
1181 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmax_bf16x2)
1182 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmax_ftz_bf16)
1183 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .Case(S: "ftz.nan.xorsign.abs.bf16",
1187 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1189 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .Case(S: "ftz.xorsign.abs.bf16x2",
1192 Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmax_nan_bf16)
1194 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .Case(S: "nan.xorsign.abs.bf16x2",
1197 Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1200 .Default(Value: Intrinsic::not_intrinsic);
1201
1202 if (Name.consume_front(Prefix: "fmin."))
1203 return StringSwitch<Intrinsic::ID>(Name)
1204 .Case(S: "bf16", Value: Intrinsic::nvvm_fmin_bf16)
1205 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmin_bf16x2)
1206 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmin_ftz_bf16)
1207 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .Case(S: "ftz.nan.xorsign.abs.bf16",
1211 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1213 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .Case(S: "ftz.xorsign.abs.bf16x2",
1216 Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmin_nan_bf16)
1218 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .Case(S: "nan.xorsign.abs.bf16x2",
1221 Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1224 .Default(Value: Intrinsic::not_intrinsic);
1225
1226 if (Name.consume_front(Prefix: "neg."))
1227 return StringSwitch<Intrinsic::ID>(Name)
1228 .Case(S: "bf16", Value: Intrinsic::nvvm_neg_bf16)
1229 .Case(S: "bf16x2", Value: Intrinsic::nvvm_neg_bf16x2)
1230 .Default(Value: Intrinsic::not_intrinsic);
1231
1232 return Intrinsic::not_intrinsic;
1233}
1234
1235static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1236 return Name.consume_front(Prefix: "local") || Name.consume_front(Prefix: "shared") ||
1237 Name.consume_front(Prefix: "global") || Name.consume_front(Prefix: "constant") ||
1238 Name.consume_front(Prefix: "param");
1239}
1240
1241static bool convertIntrinsicValidType(StringRef Name,
1242 const FunctionType *FuncTy) {
1243 Type *HalfTy = Type::getHalfTy(C&: FuncTy->getContext());
1244 if (Name.starts_with(Prefix: "to.fp16")) {
1245 return CastInst::castIsValid(op: Instruction::FPTrunc, SrcTy: FuncTy->getParamType(i: 0),
1246 DstTy: HalfTy) &&
1247 CastInst::castIsValid(op: Instruction::BitCast, SrcTy: HalfTy,
1248 DstTy: FuncTy->getReturnType());
1249 }
1250
1251 if (Name.starts_with(Prefix: "from.fp16")) {
1252 return CastInst::castIsValid(op: Instruction::BitCast, SrcTy: FuncTy->getParamType(i: 0),
1253 DstTy: HalfTy) &&
1254 CastInst::castIsValid(op: Instruction::FPExt, SrcTy: HalfTy,
1255 DstTy: FuncTy->getReturnType());
1256 }
1257
1258 return false;
1259}
1260
1261static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1262 bool CanUpgradeDebugIntrinsicsToRecords) {
1263 assert(F && "Illegal to upgrade a non-existent Function.");
1264
1265 StringRef Name = F->getName();
1266
1267 // Quickly eliminate it, if it's not a candidate.
1268 if (!Name.consume_front(Prefix: "llvm.") || Name.empty())
1269 return false;
1270
1271 switch (Name[0]) {
1272 default: break;
1273 case 'a': {
1274 bool IsArm = Name.consume_front(Prefix: "arm.");
1275 if (IsArm || Name.consume_front(Prefix: "aarch64.")) {
1276 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1277 return true;
1278 break;
1279 }
1280
1281 if (Name.consume_front(Prefix: "amdgcn.")) {
1282 if (Name == "alignbit") {
1283 // Target specific intrinsic became redundant
1284 NewFn = Intrinsic::getOrInsertDeclaration(
1285 M: F->getParent(), id: Intrinsic::fshr, Tys: {F->getReturnType()});
1286 return true;
1287 }
1288
1289 if (Name.consume_front(Prefix: "atomic.")) {
1290 if (Name.starts_with(Prefix: "inc") || Name.starts_with(Prefix: "dec") ||
1291 Name.starts_with(Prefix: "cond.sub") || Name.starts_with(Prefix: "csub")) {
1292 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1293 // and usub_sat so there's no new declaration.
1294 NewFn = nullptr;
1295 return true;
1296 }
1297 break; // No other 'amdgcn.atomic.*'
1298 }
1299
1300 // Legacy wmma iu intrinsics without the optional clamp operand.
1301 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1302 F->arg_size() == 7) {
1303 NewFn = nullptr;
1304 return true;
1305 }
1306 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1307 F->arg_size() == 8) {
1308 NewFn = nullptr;
1309 return true;
1310 }
1311
1312 if (Name.consume_front(Prefix: "ds.") || Name.consume_front(Prefix: "global.atomic.") ||
1313 Name.consume_front(Prefix: "flat.atomic.")) {
1314 if (Name.starts_with(Prefix: "fadd") ||
1315 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1316 (Name.starts_with(Prefix: "fmin") && !Name.starts_with(Prefix: "fmin.num")) ||
1317 (Name.starts_with(Prefix: "fmax") && !Name.starts_with(Prefix: "fmax.num"))) {
1318 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1319 // declaration.
1320 NewFn = nullptr;
1321 return true;
1322 }
1323 }
1324
1325 if (Name.starts_with(Prefix: "ldexp.")) {
1326 // Target specific intrinsic became redundant
1327 NewFn = Intrinsic::getOrInsertDeclaration(
1328 M: F->getParent(), id: Intrinsic::ldexp,
1329 Tys: {F->getReturnType(), F->getArg(i: 1)->getType()});
1330 return true;
1331 }
1332 break; // No other 'amdgcn.*'
1333 }
1334
1335 break;
1336 }
1337 case 'c': {
1338 if (F->arg_size() == 1) {
1339 if (Name.consume_front(Prefix: "convert.")) {
1340 if (convertIntrinsicValidType(Name, FuncTy: F->getFunctionType())) {
1341 NewFn = nullptr;
1342 return true;
1343 }
1344 }
1345
1346 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1347 .StartsWith(S: "ctlz.", Value: Intrinsic::ctlz)
1348 .StartsWith(S: "cttz.", Value: Intrinsic::cttz)
1349 .Default(Value: Intrinsic::not_intrinsic);
1350 if (ID != Intrinsic::not_intrinsic) {
1351 rename(GV: F);
1352 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1353 Tys: F->arg_begin()->getType());
1354 return true;
1355 }
1356 }
1357
1358 if (F->arg_size() == 2 && Name == "coro.end") {
1359 rename(GV: F);
1360 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1361 id: Intrinsic::coro_end);
1362 return true;
1363 }
1364
1365 break;
1366 }
1367 case 'd':
1368 if (Name.consume_front(Prefix: "dbg.")) {
1369 // Mark debug intrinsics for upgrade to new debug format.
1370 if (CanUpgradeDebugIntrinsicsToRecords) {
1371 if (Name == "addr" || Name == "value" || Name == "assign" ||
1372 Name == "declare" || Name == "label") {
1373 // There's no function to replace these with.
1374 NewFn = nullptr;
1375 // But we do want these to get upgraded.
1376 return true;
1377 }
1378 }
1379 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1380 // converted to DbgVariableRecords later.
1381 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1382 rename(GV: F);
1383 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1384 id: Intrinsic::dbg_value);
1385 return true;
1386 }
1387 break; // No other 'dbg.*'.
1388 }
1389 break;
1390 case 'e':
1391 if (Name.consume_front(Prefix: "experimental.vector.")) {
1392 Intrinsic::ID ID =
1393 StringSwitch<Intrinsic::ID>(Name)
1394 // Skip over extract.last.active, otherwise it will be 'upgraded'
1395 // to a regular vector extract which is a different operation.
1396 .StartsWith(S: "extract.last.active.", Value: Intrinsic::not_intrinsic)
1397 .StartsWith(S: "extract.", Value: Intrinsic::vector_extract)
1398 .StartsWith(S: "insert.", Value: Intrinsic::vector_insert)
1399 .StartsWith(S: "reverse.", Value: Intrinsic::vector_reverse)
1400 .StartsWith(S: "interleave2.", Value: Intrinsic::vector_interleave2)
1401 .StartsWith(S: "deinterleave2.", Value: Intrinsic::vector_deinterleave2)
1402 .StartsWith(S: "partial.reduce.add",
1403 Value: Intrinsic::vector_partial_reduce_add)
1404 .Default(Value: Intrinsic::not_intrinsic);
1405 if (ID != Intrinsic::not_intrinsic) {
1406 const auto *FT = F->getFunctionType();
1407 SmallVector<Type *, 2> Tys;
1408 if (ID == Intrinsic::vector_extract ||
1409 ID == Intrinsic::vector_interleave2)
1410 // Extracting overloads the return type.
1411 Tys.push_back(Elt: FT->getReturnType());
1412 if (ID != Intrinsic::vector_interleave2)
1413 Tys.push_back(Elt: FT->getParamType(i: 0));
1414 if (ID == Intrinsic::vector_insert ||
1415 ID == Intrinsic::vector_partial_reduce_add)
1416 // Inserting overloads the inserted type.
1417 Tys.push_back(Elt: FT->getParamType(i: 1));
1418 rename(GV: F);
1419 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
1420 return true;
1421 }
1422
1423 if (Name.consume_front(Prefix: "reduce.")) {
1424 SmallVector<StringRef, 2> Groups;
1425 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1426 if (R.match(String: Name, Matches: &Groups))
1427 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1428 .Case(S: "add", Value: Intrinsic::vector_reduce_add)
1429 .Case(S: "mul", Value: Intrinsic::vector_reduce_mul)
1430 .Case(S: "and", Value: Intrinsic::vector_reduce_and)
1431 .Case(S: "or", Value: Intrinsic::vector_reduce_or)
1432 .Case(S: "xor", Value: Intrinsic::vector_reduce_xor)
1433 .Case(S: "smax", Value: Intrinsic::vector_reduce_smax)
1434 .Case(S: "smin", Value: Intrinsic::vector_reduce_smin)
1435 .Case(S: "umax", Value: Intrinsic::vector_reduce_umax)
1436 .Case(S: "umin", Value: Intrinsic::vector_reduce_umin)
1437 .Case(S: "fmax", Value: Intrinsic::vector_reduce_fmax)
1438 .Case(S: "fmin", Value: Intrinsic::vector_reduce_fmin)
1439 .Default(Value: Intrinsic::not_intrinsic);
1440
1441 bool V2 = false;
1442 if (ID == Intrinsic::not_intrinsic) {
1443 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1444 Groups.clear();
1445 V2 = true;
1446 if (R2.match(String: Name, Matches: &Groups))
1447 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1448 .Case(S: "fadd", Value: Intrinsic::vector_reduce_fadd)
1449 .Case(S: "fmul", Value: Intrinsic::vector_reduce_fmul)
1450 .Default(Value: Intrinsic::not_intrinsic);
1451 }
1452 if (ID != Intrinsic::not_intrinsic) {
1453 rename(GV: F);
1454 auto Args = F->getFunctionType()->params();
1455 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1456 Tys: {Args[V2 ? 1 : 0]});
1457 return true;
1458 }
1459 break; // No other 'expermental.vector.reduce.*'.
1460 }
1461
1462 if (Name.consume_front(Prefix: "splice"))
1463 return true;
1464 break; // No other 'experimental.vector.*'.
1465 }
1466 if (Name.consume_front(Prefix: "experimental.stepvector.")) {
1467 Intrinsic::ID ID = Intrinsic::stepvector;
1468 rename(GV: F);
1469 NewFn = Intrinsic::getOrInsertDeclaration(
1470 M: F->getParent(), id: ID, Tys: F->getFunctionType()->getReturnType());
1471 return true;
1472 }
1473 break; // No other 'e*'.
1474 case 'f':
1475 if (Name.starts_with(Prefix: "flt.rounds")) {
1476 rename(GV: F);
1477 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1478 id: Intrinsic::get_rounding);
1479 return true;
1480 }
1481 break;
1482 case 'i':
1483 if (Name.starts_with(Prefix: "invariant.group.barrier")) {
1484 // Rename invariant.group.barrier to launder.invariant.group
1485 auto Args = F->getFunctionType()->params();
1486 Type* ObjectPtr[1] = {Args[0]};
1487 rename(GV: F);
1488 NewFn = Intrinsic::getOrInsertDeclaration(
1489 M: F->getParent(), id: Intrinsic::launder_invariant_group, Tys: ObjectPtr);
1490 return true;
1491 }
1492 break;
1493 case 'l':
1494 if ((Name.starts_with(Prefix: "lifetime.start") ||
1495 Name.starts_with(Prefix: "lifetime.end")) &&
1496 F->arg_size() == 2) {
1497 Intrinsic::ID IID = Name.starts_with(Prefix: "lifetime.start")
1498 ? Intrinsic::lifetime_start
1499 : Intrinsic::lifetime_end;
1500 rename(GV: F);
1501 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1502 Tys: F->getArg(i: 0)->getType());
1503 return true;
1504 }
1505 break;
1506 case 'm': {
1507 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1508 // alignment parameter to embedding the alignment as an attribute of
1509 // the pointer args.
1510 if (unsigned ID = StringSwitch<unsigned>(Name)
1511 .StartsWith(S: "memcpy.", Value: Intrinsic::memcpy)
1512 .StartsWith(S: "memmove.", Value: Intrinsic::memmove)
1513 .Default(Value: 0)) {
1514 if (F->arg_size() == 5) {
1515 rename(GV: F);
1516 // Get the types of dest, src, and len
1517 ArrayRef<Type *> ParamTypes =
1518 F->getFunctionType()->params().slice(N: 0, M: 3);
1519 NewFn =
1520 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes);
1521 return true;
1522 }
1523 }
1524 if (Name.starts_with(Prefix: "memset.") && F->arg_size() == 5) {
1525 rename(GV: F);
1526 // Get the types of dest, and len
1527 const auto *FT = F->getFunctionType();
1528 Type *ParamTypes[2] = {
1529 FT->getParamType(i: 0), // Dest
1530 FT->getParamType(i: 2) // len
1531 };
1532 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1533 id: Intrinsic::memset, Tys: ParamTypes);
1534 return true;
1535 }
1536
1537 unsigned MaskedID =
1538 StringSwitch<unsigned>(Name)
1539 .StartsWith(S: "masked.load", Value: Intrinsic::masked_load)
1540 .StartsWith(S: "masked.gather", Value: Intrinsic::masked_gather)
1541 .StartsWith(S: "masked.store", Value: Intrinsic::masked_store)
1542 .StartsWith(S: "masked.scatter", Value: Intrinsic::masked_scatter)
1543 .Default(Value: 0);
1544 if (MaskedID && F->arg_size() == 4) {
1545 rename(GV: F);
1546 if (MaskedID == Intrinsic::masked_load ||
1547 MaskedID == Intrinsic::masked_gather) {
1548 NewFn = Intrinsic::getOrInsertDeclaration(
1549 M: F->getParent(), id: MaskedID,
1550 Tys: {F->getReturnType(), F->getArg(i: 0)->getType()});
1551 return true;
1552 }
1553 NewFn = Intrinsic::getOrInsertDeclaration(
1554 M: F->getParent(), id: MaskedID,
1555 Tys: {F->getArg(i: 0)->getType(), F->getArg(i: 1)->getType()});
1556 return true;
1557 }
1558 break;
1559 }
1560 case 'n': {
1561 if (Name.consume_front(Prefix: "nvvm.")) {
1562 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1563 if (F->arg_size() == 1) {
1564 Intrinsic::ID IID =
1565 StringSwitch<Intrinsic::ID>(Name)
1566 .Cases(CaseStrings: {"brev32", "brev64"}, Value: Intrinsic::bitreverse)
1567 .Case(S: "clz.i", Value: Intrinsic::ctlz)
1568 .Case(S: "popc.i", Value: Intrinsic::ctpop)
1569 .Default(Value: Intrinsic::not_intrinsic);
1570 if (IID != Intrinsic::not_intrinsic) {
1571 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1572 Tys: {F->getReturnType()});
1573 return true;
1574 }
1575 } else if (F->arg_size() == 2) {
1576 Intrinsic::ID IID =
1577 StringSwitch<Intrinsic::ID>(Name)
1578 .Cases(CaseStrings: {"max.s", "max.i", "max.ll"}, Value: Intrinsic::smax)
1579 .Cases(CaseStrings: {"min.s", "min.i", "min.ll"}, Value: Intrinsic::smin)
1580 .Cases(CaseStrings: {"max.us", "max.ui", "max.ull"}, Value: Intrinsic::umax)
1581 .Cases(CaseStrings: {"min.us", "min.ui", "min.ull"}, Value: Intrinsic::umin)
1582 .Default(Value: Intrinsic::not_intrinsic);
1583 if (IID != Intrinsic::not_intrinsic) {
1584 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1585 Tys: {F->getReturnType()});
1586 return true;
1587 }
1588 }
1589
1590 // Check for nvvm intrinsics that need a return type adjustment.
1591 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1592 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1593 if (IID != Intrinsic::not_intrinsic) {
1594 NewFn = nullptr;
1595 return true;
1596 }
1597 }
1598
1599 // Upgrade Distributed Shared Memory Intrinsics
1600 Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1601 if (IID != Intrinsic::not_intrinsic) {
1602 rename(GV: F);
1603 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1604 return true;
1605 }
1606
1607 // Upgrade TMA copy G2S Intrinsics
1608 IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1609 if (IID != Intrinsic::not_intrinsic) {
1610 rename(GV: F);
1611 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1612 return true;
1613 }
1614
1615 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1616 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1617 //
1618 // TODO: We could add lohi.i2d.
1619 bool Expand = false;
1620 if (Name.consume_front(Prefix: "abs."))
1621 // nvvm.abs.{i,ii}
1622 Expand =
1623 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1624 else if (Name.consume_front(Prefix: "fabs."))
1625 // nvvm.fabs.{f,ftz.f,d}
1626 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1627 else if (Name.consume_front(Prefix: "ex2.approx."))
1628 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1629 Expand =
1630 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1631 else if (Name.consume_front(Prefix: "atomic.load."))
1632 // nvvm.atomic.load.add.{f32,f64}.p
1633 // nvvm.atomic.load.{inc,dec}.32.p
1634 Expand = StringSwitch<bool>(Name)
1635 .StartsWith(S: "add.f32.p", Value: true)
1636 .StartsWith(S: "add.f64.p", Value: true)
1637 .StartsWith(S: "inc.32.p", Value: true)
1638 .StartsWith(S: "dec.32.p", Value: true)
1639 .Default(Value: false);
1640 else if (Name.consume_front(Prefix: "bitcast."))
1641 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1642 Expand =
1643 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1644 else if (Name.consume_front(Prefix: "rotate."))
1645 // nvvm.rotate.{b32,b64,right.b64}
1646 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1647 else if (Name.consume_front(Prefix: "ptr.gen.to."))
1648 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1649 Expand = consumeNVVMPtrAddrSpace(Name);
1650 else if (Name.consume_front(Prefix: "ptr."))
1651 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1652 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(Prefix: ".to.gen");
1653 else if (Name.consume_front(Prefix: "ldg.global."))
1654 // nvvm.ldg.global.{i,p,f}
1655 Expand = (Name.starts_with(Prefix: "i.") || Name.starts_with(Prefix: "f.") ||
1656 Name.starts_with(Prefix: "p."));
1657 else
1658 Expand = StringSwitch<bool>(Name)
1659 .Case(S: "barrier0", Value: true)
1660 .Case(S: "barrier.n", Value: true)
1661 .Case(S: "barrier.sync.cnt", Value: true)
1662 .Case(S: "barrier.sync", Value: true)
1663 .Case(S: "barrier", Value: true)
1664 .Case(S: "bar.sync", Value: true)
1665 .Case(S: "barrier0.popc", Value: true)
1666 .Case(S: "barrier0.and", Value: true)
1667 .Case(S: "barrier0.or", Value: true)
1668 .Case(S: "clz.ll", Value: true)
1669 .Case(S: "popc.ll", Value: true)
1670 .Case(S: "h2f", Value: true)
1671 .Case(S: "swap.lo.hi.b64", Value: true)
1672 .Case(S: "tanh.approx.f32", Value: true)
1673 .Default(Value: false);
1674
1675 if (Expand) {
1676 NewFn = nullptr;
1677 return true;
1678 }
1679 break; // No other 'nvvm.*'.
1680 }
1681 break;
1682 }
1683 case 'o':
1684 if (Name.starts_with(Prefix: "objectsize.")) {
1685 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1686 if (F->arg_size() == 2 || F->arg_size() == 3) {
1687 rename(GV: F);
1688 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1689 id: Intrinsic::objectsize, Tys);
1690 return true;
1691 }
1692 }
1693 break;
1694
1695 case 'p':
1696 if (Name.starts_with(Prefix: "ptr.annotation.") && F->arg_size() == 4) {
1697 rename(GV: F);
1698 NewFn = Intrinsic::getOrInsertDeclaration(
1699 M: F->getParent(), id: Intrinsic::ptr_annotation,
1700 Tys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()});
1701 return true;
1702 }
1703 break;
1704
1705 case 'r': {
1706 if (Name.consume_front(Prefix: "riscv.")) {
1707 Intrinsic::ID ID;
1708 ID = StringSwitch<Intrinsic::ID>(Name)
1709 .Case(S: "aes32dsi", Value: Intrinsic::riscv_aes32dsi)
1710 .Case(S: "aes32dsmi", Value: Intrinsic::riscv_aes32dsmi)
1711 .Case(S: "aes32esi", Value: Intrinsic::riscv_aes32esi)
1712 .Case(S: "aes32esmi", Value: Intrinsic::riscv_aes32esmi)
1713 .Default(Value: Intrinsic::not_intrinsic);
1714 if (ID != Intrinsic::not_intrinsic) {
1715 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) {
1716 rename(GV: F);
1717 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1718 return true;
1719 }
1720 break; // No other applicable upgrades.
1721 }
1722
1723 ID = StringSwitch<Intrinsic::ID>(Name)
1724 .StartsWith(S: "sm4ks", Value: Intrinsic::riscv_sm4ks)
1725 .StartsWith(S: "sm4ed", Value: Intrinsic::riscv_sm4ed)
1726 .Default(Value: Intrinsic::not_intrinsic);
1727 if (ID != Intrinsic::not_intrinsic) {
1728 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) ||
1729 F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1730 rename(GV: F);
1731 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1732 return true;
1733 }
1734 break; // No other applicable upgrades.
1735 }
1736
1737 ID = StringSwitch<Intrinsic::ID>(Name)
1738 .StartsWith(S: "sha256sig0", Value: Intrinsic::riscv_sha256sig0)
1739 .StartsWith(S: "sha256sig1", Value: Intrinsic::riscv_sha256sig1)
1740 .StartsWith(S: "sha256sum0", Value: Intrinsic::riscv_sha256sum0)
1741 .StartsWith(S: "sha256sum1", Value: Intrinsic::riscv_sha256sum1)
1742 .StartsWith(S: "sm3p0", Value: Intrinsic::riscv_sm3p0)
1743 .StartsWith(S: "sm3p1", Value: Intrinsic::riscv_sm3p1)
1744 .Default(Value: Intrinsic::not_intrinsic);
1745 if (ID != Intrinsic::not_intrinsic) {
1746 if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1747 rename(GV: F);
1748 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1749 return true;
1750 }
1751 break; // No other applicable upgrades.
1752 }
1753
1754 // Replace llvm.riscv.clmul with llvm.clmul.
1755 if (Name == "clmul.i32" || Name == "clmul.i64") {
1756 NewFn = Intrinsic::getOrInsertDeclaration(
1757 M: F->getParent(), id: Intrinsic::clmul, Tys: {F->getReturnType()});
1758 return true;
1759 }
1760
1761 break; // No other 'riscv.*' intrinsics
1762 }
1763 } break;
1764
1765 case 's':
1766 if (Name == "stackprotectorcheck") {
1767 NewFn = nullptr;
1768 return true;
1769 }
1770 break;
1771
1772 case 't':
1773 if (Name == "thread.pointer") {
1774 NewFn = Intrinsic::getOrInsertDeclaration(
1775 M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType());
1776 return true;
1777 }
1778 break;
1779
1780 case 'v': {
1781 if (Name == "var.annotation" && F->arg_size() == 4) {
1782 rename(GV: F);
1783 NewFn = Intrinsic::getOrInsertDeclaration(
1784 M: F->getParent(), id: Intrinsic::var_annotation,
1785 Tys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}});
1786 return true;
1787 }
1788 if (Name.consume_front(Prefix: "vector.splice")) {
1789 if (Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"))
1790 break;
1791 return true;
1792 }
1793 break;
1794 }
1795
1796 case 'w':
1797 if (Name.consume_front(Prefix: "wasm.")) {
1798 Intrinsic::ID ID =
1799 StringSwitch<Intrinsic::ID>(Name)
1800 .StartsWith(S: "fma.", Value: Intrinsic::wasm_relaxed_madd)
1801 .StartsWith(S: "fms.", Value: Intrinsic::wasm_relaxed_nmadd)
1802 .StartsWith(S: "laneselect.", Value: Intrinsic::wasm_relaxed_laneselect)
1803 .Default(Value: Intrinsic::not_intrinsic);
1804 if (ID != Intrinsic::not_intrinsic) {
1805 rename(GV: F);
1806 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1807 Tys: F->getReturnType());
1808 return true;
1809 }
1810
1811 if (Name.consume_front(Prefix: "dot.i8x16.i7x16.")) {
1812 ID = StringSwitch<Intrinsic::ID>(Name)
1813 .Case(S: "signed", Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1814 .Case(S: "add.signed",
1815 Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1816 .Default(Value: Intrinsic::not_intrinsic);
1817 if (ID != Intrinsic::not_intrinsic) {
1818 rename(GV: F);
1819 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1820 return true;
1821 }
1822 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1823 }
1824 break; // No other 'wasm.*'.
1825 }
1826 break;
1827
1828 case 'x':
1829 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1830 return true;
1831 }
1832
1833 auto *ST = dyn_cast<StructType>(Val: F->getReturnType());
1834 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1835 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1836 // Replace return type with literal non-packed struct. Only do this for
1837 // intrinsics declared to return a struct, not for intrinsics with
1838 // overloaded return type, in which case the exact struct type will be
1839 // mangled into the name.
1840 SmallVector<Intrinsic::IITDescriptor> Desc;
1841 Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc);
1842 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1843 auto *FT = F->getFunctionType();
1844 auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements());
1845 auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg());
1846 std::string Name = F->getName().str();
1847 rename(GV: F);
1848 NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
1849 N: Name, M: F->getParent());
1850
1851 // The new function may also need remangling.
1852 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn))
1853 NewFn = *Result;
1854 return true;
1855 }
1856 }
1857
1858 // Remangle our intrinsic since we upgrade the mangling
1859 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1860 if (Result != std::nullopt) {
1861 NewFn = *Result;
1862 return true;
1863 }
1864
1865 // This may not belong here. This function is effectively being overloaded
1866 // to both detect an intrinsic which needs upgrading, and to provide the
1867 // upgraded form of the intrinsic. We should perhaps have two separate
1868 // functions for this.
1869 return false;
1870}
1871
1872bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1873 bool CanUpgradeDebugIntrinsicsToRecords) {
1874 NewFn = nullptr;
1875 bool Upgraded =
1876 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1877
1878 // Upgrade intrinsic attributes. This does not change the function.
1879 if (NewFn)
1880 F = NewFn;
1881 if (Intrinsic::ID id = F->getIntrinsicID()) {
1882 // Only do this if the intrinsic signature is valid.
1883 SmallVector<Type *> OverloadTys;
1884 if (Intrinsic::getIntrinsicSignature(id, FT: F->getFunctionType(), ArgTys&: OverloadTys))
1885 F->setAttributes(
1886 Intrinsic::getAttributes(C&: F->getContext(), id, FT: F->getFunctionType()));
1887 }
1888 return Upgraded;
1889}
1890
1891GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1892 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1893 GV->getName() == "llvm.global_dtors")) ||
1894 !GV->hasInitializer())
1895 return nullptr;
1896 ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType());
1897 if (!ATy)
1898 return nullptr;
1899 StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType());
1900 if (!STy || STy->getNumElements() != 2)
1901 return nullptr;
1902
1903 LLVMContext &C = GV->getContext();
1904 IRBuilder<> IRB(C);
1905 auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1),
1906 elts: IRB.getPtrTy());
1907 Constant *Init = GV->getInitializer();
1908 unsigned N = Init->getNumOperands();
1909 std::vector<Constant *> NewCtors(N);
1910 for (unsigned i = 0; i != N; ++i) {
1911 auto Ctor = cast<Constant>(Val: Init->getOperand(i));
1912 NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u),
1913 Vs: Ctor->getAggregateElement(Elt: 1),
1914 Vs: ConstantPointerNull::get(T: IRB.getPtrTy()));
1915 }
1916 Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors);
1917
1918 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1919 NewInit, GV->getName());
1920}
1921
1922// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1923// to byte shuffles.
1924static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1925 unsigned Shift) {
1926 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1927 unsigned NumElts = ResultTy->getNumElements() * 8;
1928
1929 // Bitcast from a 64-bit element type to a byte element type.
1930 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1931 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1932
1933 // We'll be shuffling in zeroes.
1934 Value *Res = Constant::getNullValue(Ty: VecTy);
1935
1936 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1937 // we'll just return the zero vector.
1938 if (Shift < 16) {
1939 int Idxs[64];
1940 // 256/512-bit version is split into 2/4 16-byte lanes.
1941 for (unsigned l = 0; l != NumElts; l += 16)
1942 for (unsigned i = 0; i != 16; ++i) {
1943 unsigned Idx = NumElts + i - Shift;
1944 if (Idx < NumElts)
1945 Idx -= NumElts - 16; // end of lane, switch operand.
1946 Idxs[l + i] = Idx + l;
1947 }
1948
1949 Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts));
1950 }
1951
1952 // Bitcast back to a 64-bit element type.
1953 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1954}
1955
1956// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1957// to byte shuffles.
1958static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1959 unsigned Shift) {
1960 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1961 unsigned NumElts = ResultTy->getNumElements() * 8;
1962
1963 // Bitcast from a 64-bit element type to a byte element type.
1964 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1965 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1966
1967 // We'll be shuffling in zeroes.
1968 Value *Res = Constant::getNullValue(Ty: VecTy);
1969
1970 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1971 // we'll just return the zero vector.
1972 if (Shift < 16) {
1973 int Idxs[64];
1974 // 256/512-bit version is split into 2/4 16-byte lanes.
1975 for (unsigned l = 0; l != NumElts; l += 16)
1976 for (unsigned i = 0; i != 16; ++i) {
1977 unsigned Idx = i + Shift;
1978 if (Idx >= 16)
1979 Idx += NumElts - 16; // end of lane, switch operand.
1980 Idxs[l + i] = Idx + l;
1981 }
1982
1983 Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts));
1984 }
1985
1986 // Bitcast back to a 64-bit element type.
1987 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1988}
1989
1990static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1991 unsigned NumElts) {
1992 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1993 llvm::VectorType *MaskTy = FixedVectorType::get(
1994 ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
1995 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
1996
1997 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1998 // i8 and we need to extract down to the right number of elements.
1999 if (NumElts <= 4) {
2000 int Indices[4];
2001 for (unsigned i = 0; i != NumElts; ++i)
2002 Indices[i] = i;
2003 Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts),
2004 Name: "extract");
2005 }
2006
2007 return Mask;
2008}
2009
2010static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2011 Value *Op1) {
2012 // If the mask is all ones just emit the first operation.
2013 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2014 if (C->isAllOnesValue())
2015 return Op0;
2016
2017 Mask = getX86MaskVec(Builder, Mask,
2018 NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements());
2019 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2020}
2021
2022static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2023 Value *Op1) {
2024 // If the mask is all ones just emit the first operation.
2025 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2026 if (C->isAllOnesValue())
2027 return Op0;
2028
2029 auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(),
2030 NumElts: Mask->getType()->getIntegerBitWidth());
2031 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
2032 Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
2033 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
2034}
2035
2036// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2037// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2038// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2039static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
2040 Value *Op1, Value *Shift,
2041 Value *Passthru, Value *Mask,
2042 bool IsVALIGN) {
2043 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue();
2044
2045 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2046 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2047 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2048 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2049
2050 // Mask the immediate for VALIGN.
2051 if (IsVALIGN)
2052 ShiftVal &= (NumElts - 1);
2053
2054 // If palignr is shifting the pair of vectors more than the size of two
2055 // lanes, emit zero.
2056 if (ShiftVal >= 32)
2057 return llvm::Constant::getNullValue(Ty: Op0->getType());
2058
2059 // If palignr is shifting the pair of input vectors more than one lane,
2060 // but less than two lanes, convert to shifting in zeroes.
2061 if (ShiftVal > 16) {
2062 ShiftVal -= 16;
2063 Op1 = Op0;
2064 Op0 = llvm::Constant::getNullValue(Ty: Op0->getType());
2065 }
2066
2067 int Indices[64];
2068 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2069 for (unsigned l = 0; l < NumElts; l += 16) {
2070 for (unsigned i = 0; i != 16; ++i) {
2071 unsigned Idx = ShiftVal + i;
2072 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2073 Idx += NumElts - 16; // End of lane, switch operand.
2074 Indices[l + i] = Idx + l;
2075 }
2076 }
2077
2078 Value *Align = Builder.CreateShuffleVector(
2079 V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr");
2080
2081 return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru);
2082}
2083
2084static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
2085 bool ZeroMask, bool IndexForm) {
2086 Type *Ty = CI.getType();
2087 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2088 unsigned EltWidth = Ty->getScalarSizeInBits();
2089 bool IsFloat = Ty->isFPOrFPVectorTy();
2090 Intrinsic::ID IID;
2091 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2092 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2093 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2094 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2095 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2096 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2097 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2098 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2099 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2100 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2101 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2103 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2105 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2107 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2108 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2109 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2110 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2111 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2112 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2113 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2115 else if (VecWidth == 128 && EltWidth == 16)
2116 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2117 else if (VecWidth == 256 && EltWidth == 16)
2118 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2119 else if (VecWidth == 512 && EltWidth == 16)
2120 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2121 else if (VecWidth == 128 && EltWidth == 8)
2122 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2123 else if (VecWidth == 256 && EltWidth == 8)
2124 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2125 else if (VecWidth == 512 && EltWidth == 8)
2126 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2127 else
2128 llvm_unreachable("Unexpected intrinsic");
2129
2130 Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1),
2131 CI.getArgOperand(i: 2) };
2132
2133 // If this isn't index form we need to swap operand 0 and 1.
2134 if (!IndexForm)
2135 std::swap(a&: Args[0], b&: Args[1]);
2136
2137 Value *V = Builder.CreateIntrinsic(ID: IID, Args);
2138 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2139 : Builder.CreateBitCast(V: CI.getArgOperand(i: 1),
2140 DestTy: Ty);
2141 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru);
2142}
2143
2144static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
2145 Intrinsic::ID IID) {
2146 Type *Ty = CI.getType();
2147 Value *Op0 = CI.getOperand(i_nocapture: 0);
2148 Value *Op1 = CI.getOperand(i_nocapture: 1);
2149 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1});
2150
2151 if (CI.arg_size() == 4) { // For masked intrinsics.
2152 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2153 Value *Mask = CI.getOperand(i_nocapture: 3);
2154 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2155 }
2156 return Res;
2157}
2158
2159static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
2160 bool IsRotateRight) {
2161 Type *Ty = CI.getType();
2162 Value *Src = CI.getArgOperand(i: 0);
2163 Value *Amt = CI.getArgOperand(i: 1);
2164
2165 // Amount may be scalar immediate, in which case create a splat vector.
2166 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2167 // we only care about the lowest log2 bits anyway.
2168 if (Amt->getType() != Ty) {
2169 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2170 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2171 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2172 }
2173
2174 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2175 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Src, Src, Amt});
2176
2177 if (CI.arg_size() == 4) { // For masked intrinsics.
2178 Value *VecSrc = CI.getOperand(i_nocapture: 2);
2179 Value *Mask = CI.getOperand(i_nocapture: 3);
2180 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2181 }
2182 return Res;
2183}
2184
2185static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2186 bool IsSigned) {
2187 Type *Ty = CI.getType();
2188 Value *LHS = CI.getArgOperand(i: 0);
2189 Value *RHS = CI.getArgOperand(i: 1);
2190
2191 CmpInst::Predicate Pred;
2192 switch (Imm) {
2193 case 0x0:
2194 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2195 break;
2196 case 0x1:
2197 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2198 break;
2199 case 0x2:
2200 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2201 break;
2202 case 0x3:
2203 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2204 break;
2205 case 0x4:
2206 Pred = ICmpInst::ICMP_EQ;
2207 break;
2208 case 0x5:
2209 Pred = ICmpInst::ICMP_NE;
2210 break;
2211 case 0x6:
2212 return Constant::getNullValue(Ty); // FALSE
2213 case 0x7:
2214 return Constant::getAllOnesValue(Ty); // TRUE
2215 default:
2216 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2217 }
2218
2219 Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS);
2220 Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty);
2221 return Ext;
2222}
2223
2224static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
2225 bool IsShiftRight, bool ZeroMask) {
2226 Type *Ty = CI.getType();
2227 Value *Op0 = CI.getArgOperand(i: 0);
2228 Value *Op1 = CI.getArgOperand(i: 1);
2229 Value *Amt = CI.getArgOperand(i: 2);
2230
2231 if (IsShiftRight)
2232 std::swap(a&: Op0, b&: Op1);
2233
2234 // Amount may be scalar immediate, in which case create a splat vector.
2235 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2236 // we only care about the lowest log2 bits anyway.
2237 if (Amt->getType() != Ty) {
2238 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
2239 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
2240 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
2241 }
2242
2243 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2244 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1, Amt});
2245
2246 unsigned NumArgs = CI.arg_size();
2247 if (NumArgs >= 4) { // For masked intrinsics.
2248 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) :
2249 ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) :
2250 CI.getArgOperand(i: 0);
2251 Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1);
2252 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2253 }
2254 return Res;
2255}
2256
2257static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2258 Value *Mask, bool Aligned) {
2259 const Align Alignment =
2260 Aligned
2261 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2262 : Align(1);
2263
2264 // If the mask is all ones just emit a regular store.
2265 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2266 if (C->isAllOnesValue())
2267 return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment);
2268
2269 // Convert the mask from an integer type to a vector of i1.
2270 unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements();
2271 Mask = getX86MaskVec(Builder, Mask, NumElts);
2272 return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask);
2273}
2274
2275static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2276 Value *Passthru, Value *Mask, bool Aligned) {
2277 Type *ValTy = Passthru->getType();
2278 const Align Alignment =
2279 Aligned
2280 ? Align(
2281 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2282 8)
2283 : Align(1);
2284
2285 // If the mask is all ones just emit a regular store.
2286 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2287 if (C->isAllOnesValue())
2288 return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment);
2289
2290 // Convert the mask from an integer type to a vector of i1.
2291 unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements();
2292 Mask = getX86MaskVec(Builder, Mask, NumElts);
2293 return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru);
2294}
2295
2296static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2297 Type *Ty = CI.getType();
2298 Value *Op0 = CI.getArgOperand(i: 0);
2299 Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::abs, Types: Ty,
2300 Args: {Op0, Builder.getInt1(V: false)});
2301 if (CI.arg_size() == 3)
2302 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1));
2303 return Res;
2304}
2305
2306static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2307 Type *Ty = CI.getType();
2308
2309 // Arguments have a vXi32 type so cast to vXi64.
2310 Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty);
2311 Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty);
2312
2313 if (IsSigned) {
2314 // Shift left then arithmetic shift right.
2315 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
2316 LHS = Builder.CreateShl(LHS, RHS: ShiftAmt);
2317 LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt);
2318 RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
2319 RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
2320 } else {
2321 // Clear the upper bits.
2322 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
2323 LHS = Builder.CreateAnd(LHS, RHS: Mask);
2324 RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask);
2325 }
2326
2327 Value *Res = Builder.CreateMul(LHS, RHS);
2328
2329 if (CI.arg_size() == 4)
2330 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2));
2331
2332 return Res;
2333}
2334
2335// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2336static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2337 Value *Mask) {
2338 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
2339 if (Mask) {
2340 const auto *C = dyn_cast<Constant>(Val: Mask);
2341 if (!C || !C->isAllOnesValue())
2342 Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts));
2343 }
2344
2345 if (NumElts < 8) {
2346 int Indices[8];
2347 for (unsigned i = 0; i != NumElts; ++i)
2348 Indices[i] = i;
2349 for (unsigned i = NumElts; i != 8; ++i)
2350 Indices[i] = NumElts + i % NumElts;
2351 Vec = Builder.CreateShuffleVector(V1: Vec,
2352 V2: Constant::getNullValue(Ty: Vec->getType()),
2353 Mask: Indices);
2354 }
2355 return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U)));
2356}
2357
2358static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2359 unsigned CC, bool Signed) {
2360 Value *Op0 = CI.getArgOperand(i: 0);
2361 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2362
2363 Value *Cmp;
2364 if (CC == 3) {
2365 Cmp = Constant::getNullValue(
2366 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2367 } else if (CC == 7) {
2368 Cmp = Constant::getAllOnesValue(
2369 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2370 } else {
2371 ICmpInst::Predicate Pred;
2372 switch (CC) {
2373 default: llvm_unreachable("Unknown condition code");
2374 case 0: Pred = ICmpInst::ICMP_EQ; break;
2375 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2376 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2377 case 4: Pred = ICmpInst::ICMP_NE; break;
2378 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2379 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2380 }
2381 Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1));
2382 }
2383
2384 Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1);
2385
2386 return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask);
2387}
2388
2389// Replace a masked intrinsic with an older unmasked intrinsic.
2390static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2391 Intrinsic::ID IID) {
2392 Value *Rep =
2393 Builder.CreateIntrinsic(ID: IID, Args: {CI.getArgOperand(i: 0), CI.getArgOperand(i: 1)});
2394 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2));
2395}
2396
2397static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2398 Value* A = CI.getArgOperand(i: 0);
2399 Value* B = CI.getArgOperand(i: 1);
2400 Value* Src = CI.getArgOperand(i: 2);
2401 Value* Mask = CI.getArgOperand(i: 3);
2402
2403 Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1));
2404 Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode);
2405 Value* Extract1 = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
2406 Value* Extract2 = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0);
2407 Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2);
2408 return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0);
2409}
2410
2411static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2412 Value* Op = CI.getArgOperand(i: 0);
2413 Type* ReturnOp = CI.getType();
2414 unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements();
2415 Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts);
2416 return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2");
2417}
2418
2419// Replace intrinsic with unmasked version and a select.
2420static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2421 CallBase &CI, Value *&Rep) {
2422 Name = Name.substr(Start: 12); // Remove avx512.mask.
2423
2424 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2425 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2426 Intrinsic::ID IID;
2427 if (Name.starts_with(Prefix: "max.p")) {
2428 if (VecWidth == 128 && EltWidth == 32)
2429 IID = Intrinsic::x86_sse_max_ps;
2430 else if (VecWidth == 128 && EltWidth == 64)
2431 IID = Intrinsic::x86_sse2_max_pd;
2432 else if (VecWidth == 256 && EltWidth == 32)
2433 IID = Intrinsic::x86_avx_max_ps_256;
2434 else if (VecWidth == 256 && EltWidth == 64)
2435 IID = Intrinsic::x86_avx_max_pd_256;
2436 else
2437 llvm_unreachable("Unexpected intrinsic");
2438 } else if (Name.starts_with(Prefix: "min.p")) {
2439 if (VecWidth == 128 && EltWidth == 32)
2440 IID = Intrinsic::x86_sse_min_ps;
2441 else if (VecWidth == 128 && EltWidth == 64)
2442 IID = Intrinsic::x86_sse2_min_pd;
2443 else if (VecWidth == 256 && EltWidth == 32)
2444 IID = Intrinsic::x86_avx_min_ps_256;
2445 else if (VecWidth == 256 && EltWidth == 64)
2446 IID = Intrinsic::x86_avx_min_pd_256;
2447 else
2448 llvm_unreachable("Unexpected intrinsic");
2449 } else if (Name.starts_with(Prefix: "pshuf.b.")) {
2450 if (VecWidth == 128)
2451 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2452 else if (VecWidth == 256)
2453 IID = Intrinsic::x86_avx2_pshuf_b;
2454 else if (VecWidth == 512)
2455 IID = Intrinsic::x86_avx512_pshuf_b_512;
2456 else
2457 llvm_unreachable("Unexpected intrinsic");
2458 } else if (Name.starts_with(Prefix: "pmul.hr.sw.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2465 else
2466 llvm_unreachable("Unexpected intrinsic");
2467 } else if (Name.starts_with(Prefix: "pmulh.w.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_sse2_pmulh_w;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmulh_w;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmulh_w_512;
2474 else
2475 llvm_unreachable("Unexpected intrinsic");
2476 } else if (Name.starts_with(Prefix: "pmulhu.w.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_pmulhu_w;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_pmulhu_w;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2483 else
2484 llvm_unreachable("Unexpected intrinsic");
2485 } else if (Name.starts_with(Prefix: "pmaddw.d.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_pmadd_wd;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_pmadd_wd;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2492 else
2493 llvm_unreachable("Unexpected intrinsic");
2494 } else if (Name.starts_with(Prefix: "pmaddubs.w.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2501 else
2502 llvm_unreachable("Unexpected intrinsic");
2503 } else if (Name.starts_with(Prefix: "packsswb.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_sse2_packsswb_128;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_packsswb;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_packsswb_512;
2510 else
2511 llvm_unreachable("Unexpected intrinsic");
2512 } else if (Name.starts_with(Prefix: "packssdw.")) {
2513 if (VecWidth == 128)
2514 IID = Intrinsic::x86_sse2_packssdw_128;
2515 else if (VecWidth == 256)
2516 IID = Intrinsic::x86_avx2_packssdw;
2517 else if (VecWidth == 512)
2518 IID = Intrinsic::x86_avx512_packssdw_512;
2519 else
2520 llvm_unreachable("Unexpected intrinsic");
2521 } else if (Name.starts_with(Prefix: "packuswb.")) {
2522 if (VecWidth == 128)
2523 IID = Intrinsic::x86_sse2_packuswb_128;
2524 else if (VecWidth == 256)
2525 IID = Intrinsic::x86_avx2_packuswb;
2526 else if (VecWidth == 512)
2527 IID = Intrinsic::x86_avx512_packuswb_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530 } else if (Name.starts_with(Prefix: "packusdw.")) {
2531 if (VecWidth == 128)
2532 IID = Intrinsic::x86_sse41_packusdw;
2533 else if (VecWidth == 256)
2534 IID = Intrinsic::x86_avx2_packusdw;
2535 else if (VecWidth == 512)
2536 IID = Intrinsic::x86_avx512_packusdw_512;
2537 else
2538 llvm_unreachable("Unexpected intrinsic");
2539 } else if (Name.starts_with(Prefix: "vpermilvar.")) {
2540 if (VecWidth == 128 && EltWidth == 32)
2541 IID = Intrinsic::x86_avx_vpermilvar_ps;
2542 else if (VecWidth == 128 && EltWidth == 64)
2543 IID = Intrinsic::x86_avx_vpermilvar_pd;
2544 else if (VecWidth == 256 && EltWidth == 32)
2545 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2546 else if (VecWidth == 256 && EltWidth == 64)
2547 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2548 else if (VecWidth == 512 && EltWidth == 32)
2549 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2550 else if (VecWidth == 512 && EltWidth == 64)
2551 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2552 else
2553 llvm_unreachable("Unexpected intrinsic");
2554 } else if (Name == "cvtpd2dq.256") {
2555 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2556 } else if (Name == "cvtpd2ps.256") {
2557 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2558 } else if (Name == "cvttpd2dq.256") {
2559 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2560 } else if (Name == "cvttps2dq.128") {
2561 IID = Intrinsic::x86_sse2_cvttps2dq;
2562 } else if (Name == "cvttps2dq.256") {
2563 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2564 } else if (Name.starts_with(Prefix: "permvar.")) {
2565 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2566 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2567 IID = Intrinsic::x86_avx2_permps;
2568 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2569 IID = Intrinsic::x86_avx2_permd;
2570 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2571 IID = Intrinsic::x86_avx512_permvar_df_256;
2572 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2573 IID = Intrinsic::x86_avx512_permvar_di_256;
2574 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2575 IID = Intrinsic::x86_avx512_permvar_sf_512;
2576 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2577 IID = Intrinsic::x86_avx512_permvar_si_512;
2578 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2579 IID = Intrinsic::x86_avx512_permvar_df_512;
2580 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2581 IID = Intrinsic::x86_avx512_permvar_di_512;
2582 else if (VecWidth == 128 && EltWidth == 16)
2583 IID = Intrinsic::x86_avx512_permvar_hi_128;
2584 else if (VecWidth == 256 && EltWidth == 16)
2585 IID = Intrinsic::x86_avx512_permvar_hi_256;
2586 else if (VecWidth == 512 && EltWidth == 16)
2587 IID = Intrinsic::x86_avx512_permvar_hi_512;
2588 else if (VecWidth == 128 && EltWidth == 8)
2589 IID = Intrinsic::x86_avx512_permvar_qi_128;
2590 else if (VecWidth == 256 && EltWidth == 8)
2591 IID = Intrinsic::x86_avx512_permvar_qi_256;
2592 else if (VecWidth == 512 && EltWidth == 8)
2593 IID = Intrinsic::x86_avx512_permvar_qi_512;
2594 else
2595 llvm_unreachable("Unexpected intrinsic");
2596 } else if (Name.starts_with(Prefix: "dbpsadbw.")) {
2597 if (VecWidth == 128)
2598 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2599 else if (VecWidth == 256)
2600 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2601 else if (VecWidth == 512)
2602 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2603 else
2604 llvm_unreachable("Unexpected intrinsic");
2605 } else if (Name.starts_with(Prefix: "pmultishift.qb.")) {
2606 if (VecWidth == 128)
2607 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2608 else if (VecWidth == 256)
2609 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2610 else if (VecWidth == 512)
2611 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2612 else
2613 llvm_unreachable("Unexpected intrinsic");
2614 } else if (Name.starts_with(Prefix: "conflict.")) {
2615 if (Name[9] == 'd' && VecWidth == 128)
2616 IID = Intrinsic::x86_avx512_conflict_d_128;
2617 else if (Name[9] == 'd' && VecWidth == 256)
2618 IID = Intrinsic::x86_avx512_conflict_d_256;
2619 else if (Name[9] == 'd' && VecWidth == 512)
2620 IID = Intrinsic::x86_avx512_conflict_d_512;
2621 else if (Name[9] == 'q' && VecWidth == 128)
2622 IID = Intrinsic::x86_avx512_conflict_q_128;
2623 else if (Name[9] == 'q' && VecWidth == 256)
2624 IID = Intrinsic::x86_avx512_conflict_q_256;
2625 else if (Name[9] == 'q' && VecWidth == 512)
2626 IID = Intrinsic::x86_avx512_conflict_q_512;
2627 else
2628 llvm_unreachable("Unexpected intrinsic");
2629 } else if (Name.starts_with(Prefix: "pavg.")) {
2630 if (Name[5] == 'b' && VecWidth == 128)
2631 IID = Intrinsic::x86_sse2_pavg_b;
2632 else if (Name[5] == 'b' && VecWidth == 256)
2633 IID = Intrinsic::x86_avx2_pavg_b;
2634 else if (Name[5] == 'b' && VecWidth == 512)
2635 IID = Intrinsic::x86_avx512_pavg_b_512;
2636 else if (Name[5] == 'w' && VecWidth == 128)
2637 IID = Intrinsic::x86_sse2_pavg_w;
2638 else if (Name[5] == 'w' && VecWidth == 256)
2639 IID = Intrinsic::x86_avx2_pavg_w;
2640 else if (Name[5] == 'w' && VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pavg_w_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else
2645 return false;
2646
2647 SmallVector<Value *, 4> Args(CI.args());
2648 Args.pop_back();
2649 Args.pop_back();
2650 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2651 unsigned NumArgs = CI.arg_size();
2652 Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep,
2653 Op1: CI.getArgOperand(i: NumArgs - 2));
2654 return true;
2655}
2656
2657/// Upgrade comment in call to inline asm that represents an objc retain release
2658/// marker.
2659void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2660 size_t Pos;
2661 if (AsmStr->find(s: "mov\tfp") == 0 &&
2662 AsmStr->find(s: "objc_retainAutoreleaseReturnValue") != std::string::npos &&
2663 (Pos = AsmStr->find(s: "# marker")) != std::string::npos) {
2664 AsmStr->replace(pos: Pos, n1: 1, s: ";");
2665 }
2666}
2667
2668static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2669 Function *F, IRBuilder<> &Builder) {
2670 Value *Rep = nullptr;
2671
2672 if (Name == "abs.i" || Name == "abs.ll") {
2673 Value *Arg = CI->getArgOperand(i: 0);
2674 Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg");
2675 Value *Cmp = Builder.CreateICmpSGE(
2676 LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond");
2677 Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs");
2678 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2679 Type *Ty = (Name == "abs.bf16")
2680 ? Builder.getBFloatTy()
2681 : FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 2);
2682 Value *Arg = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Ty);
2683 Value *Abs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::nvvm_fabs, V: Arg);
2684 Rep = Builder.CreateBitCast(V: Abs, DestTy: CI->getType());
2685 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2686 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2687 : Intrinsic::nvvm_fabs;
2688 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2689 } else if (Name.consume_front(Prefix: "ex2.approx.")) {
2690 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2691 Intrinsic::ID IID = Name.starts_with(Prefix: "ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2692 : Intrinsic::nvvm_ex2_approx;
2693 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2694 } else if (Name.starts_with(Prefix: "atomic.load.add.f32.p") ||
2695 Name.starts_with(Prefix: "atomic.load.add.f64.p")) {
2696 Value *Ptr = CI->getArgOperand(i: 0);
2697 Value *Val = CI->getArgOperand(i: 1);
2698 Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(),
2699 Ordering: AtomicOrdering::SequentiallyConsistent);
2700 } else if (Name.starts_with(Prefix: "atomic.load.inc.32.p") ||
2701 Name.starts_with(Prefix: "atomic.load.dec.32.p")) {
2702 Value *Ptr = CI->getArgOperand(i: 0);
2703 Value *Val = CI->getArgOperand(i: 1);
2704 auto Op = Name.starts_with(Prefix: "atomic.load.inc") ? AtomicRMWInst::UIncWrap
2705 : AtomicRMWInst::UDecWrap;
2706 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, Align: MaybeAlign(),
2707 Ordering: AtomicOrdering::SequentiallyConsistent);
2708 } else if (Name == "clz.ll") {
2709 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2710 Value *Arg = CI->getArgOperand(i: 0);
2711 Value *Ctlz = Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Arg->getType()},
2712 Args: {Arg, Builder.getFalse()},
2713 /*FMFSource=*/nullptr, Name: "ctlz");
2714 Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc");
2715 } else if (Name == "popc.ll") {
2716 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2717 // i64.
2718 Value *Arg = CI->getArgOperand(i: 0);
2719 Value *Popc = Builder.CreateIntrinsic(ID: Intrinsic::ctpop, Types: {Arg->getType()},
2720 Args: Arg, /*FMFSource=*/nullptr, Name: "ctpop");
2721 Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc");
2722 } else if (Name == "h2f") {
2723 Value *Cast =
2724 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
2725 Rep = Builder.CreateFPExt(V: Cast, DestTy: Builder.getFloatTy());
2726 } else if (Name.consume_front(Prefix: "bitcast.") &&
2727 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2728 Name == "d2ll")) {
2729 Rep = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2730 } else if (Name == "rotate.b32") {
2731 Value *Arg = CI->getOperand(i_nocapture: 0);
2732 Value *ShiftAmt = CI->getOperand(i_nocapture: 1);
2733 Rep = Builder.CreateIntrinsic(RetTy: Builder.getInt32Ty(), ID: Intrinsic::fshl,
2734 Args: {Arg, Arg, ShiftAmt});
2735 } else if (Name == "rotate.b64") {
2736 Type *Int64Ty = Builder.getInt64Ty();
2737 Value *Arg = CI->getOperand(i_nocapture: 0);
2738 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2739 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2740 Args: {Arg, Arg, ZExtShiftAmt});
2741 } else if (Name == "rotate.right.b64") {
2742 Type *Int64Ty = Builder.getInt64Ty();
2743 Value *Arg = CI->getOperand(i_nocapture: 0);
2744 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2745 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshr,
2746 Args: {Arg, Arg, ZExtShiftAmt});
2747 } else if (Name == "swap.lo.hi.b64") {
2748 Type *Int64Ty = Builder.getInt64Ty();
2749 Value *Arg = CI->getOperand(i_nocapture: 0);
2750 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2751 Args: {Arg, Arg, Builder.getInt64(C: 32)});
2752 } else if ((Name.consume_front(Prefix: "ptr.gen.to.") &&
2753 consumeNVVMPtrAddrSpace(Name)) ||
2754 (Name.consume_front(Prefix: "ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2755 Name.starts_with(Prefix: ".to.gen"))) {
2756 Rep = Builder.CreateAddrSpaceCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2757 } else if (Name.consume_front(Prefix: "ldg.global")) {
2758 Value *Ptr = CI->getArgOperand(i: 0);
2759 Align PtrAlign = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getAlignValue();
2760 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2761 Value *ASC = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Builder.getPtrTy(AddrSpace: 1));
2762 Instruction *LD = Builder.CreateAlignedLoad(Ty: CI->getType(), Ptr: ASC, Align: PtrAlign);
2763 MDNode *MD = MDNode::get(Context&: Builder.getContext(), MDs: {});
2764 LD->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
2765 return LD;
2766 } else if (Name == "tanh.approx.f32") {
2767 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2768 FastMathFlags FMF;
2769 FMF.setApproxFunc();
2770 Rep = Builder.CreateUnaryIntrinsic(ID: Intrinsic::tanh, V: CI->getArgOperand(i: 0),
2771 FMFSource: FMF);
2772 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2773 Value *Arg =
2774 Name.ends_with(Suffix: '0') ? Builder.getInt32(C: 0) : CI->getArgOperand(i: 0);
2775 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2776 Types: {}, Args: {Arg});
2777 } else if (Name == "barrier") {
2778 Rep = Builder.CreateIntrinsic(
2779 ID: Intrinsic::nvvm_barrier_cta_sync_aligned_count, Types: {},
2780 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2781 } else if (Name == "barrier.sync") {
2782 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_all, Types: {},
2783 Args: {CI->getArgOperand(i: 0)});
2784 } else if (Name == "barrier.sync.cnt") {
2785 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_count, Types: {},
2786 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2787 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2788 Name == "barrier0.or") {
2789 Value *C = CI->getArgOperand(i: 0);
2790 C = Builder.CreateICmpNE(LHS: C, RHS: Builder.getInt32(C: 0));
2791
2792 Intrinsic::ID IID =
2793 StringSwitch<Intrinsic::ID>(Name)
2794 .Case(S: "barrier0.popc",
2795 Value: Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2796 .Case(S: "barrier0.and",
2797 Value: Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2798 .Case(S: "barrier0.or",
2799 Value: Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2800 Value *Bar = Builder.CreateIntrinsic(ID: IID, Types: {}, Args: {Builder.getInt32(C: 0), C});
2801 Rep = Builder.CreateZExt(V: Bar, DestTy: CI->getType());
2802 } else {
2803 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2804 if (IID != Intrinsic::not_intrinsic &&
2805 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2806 rename(GV: F);
2807 Function *NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
2808 SmallVector<Value *, 2> Args;
2809 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2810 Value *Arg = CI->getArgOperand(i: I);
2811 Type *OldType = Arg->getType();
2812 Type *NewType = NewFn->getArg(i: I)->getType();
2813 Args.push_back(
2814 Elt: (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2815 ? Builder.CreateBitCast(V: Arg, DestTy: NewType)
2816 : Arg);
2817 }
2818 Rep = Builder.CreateCall(Callee: NewFn, Args);
2819 if (F->getReturnType()->isIntegerTy())
2820 Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType());
2821 }
2822 }
2823
2824 return Rep;
2825}
2826
2827static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2828 IRBuilder<> &Builder) {
2829 LLVMContext &C = F->getContext();
2830 Value *Rep = nullptr;
2831
2832 if (Name.starts_with(Prefix: "sse4a.movnt.")) {
2833 SmallVector<Metadata *, 1> Elts;
2834 Elts.push_back(
2835 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2836 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2837
2838 Value *Arg0 = CI->getArgOperand(i: 0);
2839 Value *Arg1 = CI->getArgOperand(i: 1);
2840
2841 // Nontemporal (unaligned) store of the 0'th element of the float/double
2842 // vector.
2843 Value *Extract =
2844 Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement");
2845
2846 StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Arg0, Align: Align(1));
2847 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2848 } else if (Name.starts_with(Prefix: "avx.movnt.") ||
2849 Name.starts_with(Prefix: "avx512.storent.")) {
2850 SmallVector<Metadata *, 1> Elts;
2851 Elts.push_back(
2852 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2853 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2854
2855 Value *Arg0 = CI->getArgOperand(i: 0);
2856 Value *Arg1 = CI->getArgOperand(i: 1);
2857
2858 StoreInst *SI = Builder.CreateAlignedStore(
2859 Val: Arg1, Ptr: Arg0,
2860 Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2861 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2862 } else if (Name == "sse2.storel.dq") {
2863 Value *Arg0 = CI->getArgOperand(i: 0);
2864 Value *Arg1 = CI->getArgOperand(i: 1);
2865
2866 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
2867 Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
2868 Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0);
2869 Builder.CreateAlignedStore(Val: Elt, Ptr: Arg0, Align: Align(1));
2870 } else if (Name.starts_with(Prefix: "sse.storeu.") ||
2871 Name.starts_with(Prefix: "sse2.storeu.") ||
2872 Name.starts_with(Prefix: "avx.storeu.")) {
2873 Value *Arg0 = CI->getArgOperand(i: 0);
2874 Value *Arg1 = CI->getArgOperand(i: 1);
2875 Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1));
2876 } else if (Name == "avx512.mask.store.ss") {
2877 Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1));
2878 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2879 Mask, Aligned: false);
2880 } else if (Name.starts_with(Prefix: "avx512.mask.store")) {
2881 // "avx512.mask.storeu." or "avx512.mask.store."
2882 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2883 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2884 Mask: CI->getArgOperand(i: 2), Aligned);
2885 } else if (Name.starts_with(Prefix: "sse2.pcmp") || Name.starts_with(Prefix: "avx2.pcmp")) {
2886 // Upgrade packed integer vector compare intrinsics to compare instructions.
2887 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2888 bool CmpEq = Name[9] == 'e';
2889 Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2890 LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
2891 Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "");
2892 } else if (Name.starts_with(Prefix: "avx512.broadcastm")) {
2893 Type *ExtTy = Type::getInt32Ty(C);
2894 if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8))
2895 ExtTy = Type::getInt64Ty(C);
2896 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2897 ExtTy->getPrimitiveSizeInBits();
2898 Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy);
2899 Rep = Builder.CreateVectorSplat(NumElts, V: Rep);
2900 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2901 Value *Vec = CI->getArgOperand(i: 0);
2902 Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0);
2903 Elt0 = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: Elt0->getType(), Args: Elt0);
2904 Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0);
2905 } else if (Name.starts_with(Prefix: "avx.sqrt.p") ||
2906 Name.starts_with(Prefix: "sse2.sqrt.p") ||
2907 Name.starts_with(Prefix: "sse.sqrt.p")) {
2908 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2909 Args: {CI->getArgOperand(i: 0)});
2910 } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p")) {
2911 if (CI->arg_size() == 4 &&
2912 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2913 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2914 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2915 : Intrinsic::x86_avx512_sqrt_pd_512;
2916
2917 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)};
2918 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2919 } else {
2920 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2921 Args: {CI->getArgOperand(i: 0)});
2922 }
2923 Rep =
2924 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2925 } else if (Name.starts_with(Prefix: "avx512.ptestm") ||
2926 Name.starts_with(Prefix: "avx512.ptestnm")) {
2927 Value *Op0 = CI->getArgOperand(i: 0);
2928 Value *Op1 = CI->getArgOperand(i: 1);
2929 Value *Mask = CI->getArgOperand(i: 2);
2930 Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1);
2931 llvm::Type *Ty = Op0->getType();
2932 Value *Zero = llvm::Constant::getNullValue(Ty);
2933 ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm")
2934 ? ICmpInst::ICMP_NE
2935 : ICmpInst::ICMP_EQ;
2936 Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero);
2937 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask);
2938 } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast")) {
2939 unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType())
2940 ->getNumElements();
2941 Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0));
2942 Rep =
2943 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2944 } else if (Name.starts_with(Prefix: "avx512.kunpck")) {
2945 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2946 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts);
2947 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts);
2948 int Indices[64];
2949 for (unsigned i = 0; i != NumElts; ++i)
2950 Indices[i] = i;
2951
2952 // First extract half of each vector. This gives better codegen than
2953 // doing it in a single shuffle.
2954 LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
2955 RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
2956 // Concat the vectors.
2957 // NOTE: Operands have to be swapped to match intrinsic definition.
2958 Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
2959 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2960 } else if (Name == "avx512.kand.w") {
2961 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2962 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2963 Rep = Builder.CreateAnd(LHS, RHS);
2964 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2965 } else if (Name == "avx512.kandn.w") {
2966 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2967 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2968 LHS = Builder.CreateNot(V: LHS);
2969 Rep = Builder.CreateAnd(LHS, RHS);
2970 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2971 } else if (Name == "avx512.kor.w") {
2972 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2973 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2974 Rep = Builder.CreateOr(LHS, RHS);
2975 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2976 } else if (Name == "avx512.kxor.w") {
2977 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2978 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2979 Rep = Builder.CreateXor(LHS, RHS);
2980 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2981 } else if (Name == "avx512.kxnor.w") {
2982 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2983 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2984 LHS = Builder.CreateNot(V: LHS);
2985 Rep = Builder.CreateXor(LHS, RHS);
2986 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2987 } else if (Name == "avx512.knot.w") {
2988 Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2989 Rep = Builder.CreateNot(V: Rep);
2990 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2991 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2992 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2993 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2994 Rep = Builder.CreateOr(LHS, RHS);
2995 Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty());
2996 Value *C;
2997 if (Name[14] == 'c')
2998 C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty());
2999 else
3000 C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty());
3001 Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C);
3002 Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty());
3003 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3004 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3005 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3006 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3007 Type *I32Ty = Type::getInt32Ty(C);
3008 Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0),
3009 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3010 Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1),
3011 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3012 Value *EltOp;
3013 if (Name.contains(Other: ".add."))
3014 EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1);
3015 else if (Name.contains(Other: ".sub."))
3016 EltOp = Builder.CreateFSub(L: Elt0, R: Elt1);
3017 else if (Name.contains(Other: ".mul."))
3018 EltOp = Builder.CreateFMul(L: Elt0, R: Elt1);
3019 else
3020 EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1);
3021 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp,
3022 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
3023 } else if (Name.starts_with(Prefix: "avx512.mask.pcmp")) {
3024 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3025 bool CmpEq = Name[16] == 'e';
3026 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true);
3027 } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb.")) {
3028 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3029 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3030 Intrinsic::ID IID;
3031 switch (VecWidth) {
3032 default:
3033 llvm_unreachable("Unexpected intrinsic");
3034 case 128:
3035 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3036 break;
3037 case 256:
3038 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3039 break;
3040 case 512:
3041 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3042 break;
3043 }
3044
3045 Rep =
3046 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3047 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3048 } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p")) {
3049 Type *OpTy = CI->getArgOperand(i: 0)->getType();
3050 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3051 unsigned EltWidth = OpTy->getScalarSizeInBits();
3052 Intrinsic::ID IID;
3053 if (VecWidth == 128 && EltWidth == 32)
3054 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3055 else if (VecWidth == 256 && EltWidth == 32)
3056 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3057 else if (VecWidth == 512 && EltWidth == 32)
3058 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3059 else if (VecWidth == 128 && EltWidth == 64)
3060 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3061 else if (VecWidth == 256 && EltWidth == 64)
3062 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3063 else if (VecWidth == 512 && EltWidth == 64)
3064 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3065 else
3066 llvm_unreachable("Unexpected intrinsic");
3067
3068 Rep =
3069 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
3070 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
3071 } else if (Name.starts_with(Prefix: "avx512.cmp.p")) {
3072 SmallVector<Value *, 4> Args(CI->args());
3073 Type *OpTy = Args[0]->getType();
3074 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3075 unsigned EltWidth = OpTy->getScalarSizeInBits();
3076 Intrinsic::ID IID;
3077 if (VecWidth == 128 && EltWidth == 32)
3078 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3079 else if (VecWidth == 256 && EltWidth == 32)
3080 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3081 else if (VecWidth == 512 && EltWidth == 32)
3082 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3083 else if (VecWidth == 128 && EltWidth == 64)
3084 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3085 else if (VecWidth == 256 && EltWidth == 64)
3086 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3087 else if (VecWidth == 512 && EltWidth == 64)
3088 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3089 else
3090 llvm_unreachable("Unexpected intrinsic");
3091
3092 Value *Mask = Constant::getAllOnesValue(Ty: CI->getType());
3093 if (VecWidth == 512)
3094 std::swap(a&: Mask, b&: Args.back());
3095 Args.push_back(Elt: Mask);
3096
3097 Rep = Builder.CreateIntrinsic(ID: IID, Args);
3098 } else if (Name.starts_with(Prefix: "avx512.mask.cmp.")) {
3099 // Integer compare intrinsics.
3100 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3101 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true);
3102 } else if (Name.starts_with(Prefix: "avx512.mask.ucmp.")) {
3103 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3104 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false);
3105 } else if (Name.starts_with(Prefix: "avx512.cvtb2mask.") ||
3106 Name.starts_with(Prefix: "avx512.cvtw2mask.") ||
3107 Name.starts_with(Prefix: "avx512.cvtd2mask.") ||
3108 Name.starts_with(Prefix: "avx512.cvtq2mask.")) {
3109 Value *Op = CI->getArgOperand(i: 0);
3110 Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType());
3111 Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero);
3112 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr);
3113 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3114 Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs") ||
3115 Name.starts_with(Prefix: "avx512.mask.pabs")) {
3116 Rep = upgradeAbs(Builder, CI&: *CI);
3117 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3118 Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs") ||
3119 Name.starts_with(Prefix: "avx512.mask.pmaxs")) {
3120 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax);
3121 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3122 Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu") ||
3123 Name.starts_with(Prefix: "avx512.mask.pmaxu")) {
3124 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax);
3125 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3126 Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins") ||
3127 Name.starts_with(Prefix: "avx512.mask.pmins")) {
3128 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin);
3129 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3130 Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu") ||
3131 Name.starts_with(Prefix: "avx512.mask.pminu")) {
3132 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin);
3133 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3134 Name == "avx512.pmulu.dq.512" ||
3135 Name.starts_with(Prefix: "avx512.mask.pmulu.dq.")) {
3136 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false);
3137 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3138 Name == "avx512.pmul.dq.512" ||
3139 Name.starts_with(Prefix: "avx512.mask.pmul.dq.")) {
3140 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true);
3141 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3142 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3143 Rep =
3144 Builder.CreateSIToFP(V: CI->getArgOperand(i: 1),
3145 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3146 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3147 } else if (Name == "avx512.cvtusi2sd") {
3148 Rep =
3149 Builder.CreateUIToFP(V: CI->getArgOperand(i: 1),
3150 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3151 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3152 } else if (Name == "sse2.cvtss2sd") {
3153 Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0);
3154 Rep = Builder.CreateFPExt(
3155 V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
3156 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3157 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3158 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3159 Name.starts_with(Prefix: "avx512.mask.cvtdq2pd.") ||
3160 Name.starts_with(Prefix: "avx512.mask.cvtudq2pd.") ||
3161 Name.starts_with(Prefix: "avx512.mask.cvtdq2ps.") ||
3162 Name.starts_with(Prefix: "avx512.mask.cvtudq2ps.") ||
3163 Name.starts_with(Prefix: "avx512.mask.cvtqq2pd.") ||
3164 Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd.") ||
3165 Name == "avx512.mask.cvtqq2ps.256" ||
3166 Name == "avx512.mask.cvtqq2ps.512" ||
3167 Name == "avx512.mask.cvtuqq2ps.256" ||
3168 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3169 Name == "avx.cvt.ps2.pd.256" ||
3170 Name == "avx512.mask.cvtps2pd.128" ||
3171 Name == "avx512.mask.cvtps2pd.256") {
3172 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3173 Rep = CI->getArgOperand(i: 0);
3174 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3175
3176 unsigned NumDstElts = DstTy->getNumElements();
3177 if (NumDstElts < SrcTy->getNumElements()) {
3178 assert(NumDstElts == 2 && "Unexpected vector size");
3179 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1});
3180 }
3181
3182 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3183 bool IsUnsigned = Name.contains(Other: "cvtu");
3184 if (IsPS2PD)
3185 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd");
3186 else if (CI->arg_size() == 4 &&
3187 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
3188 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
3189 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3190 : Intrinsic::x86_avx512_sitofp_round;
3191 Rep = Builder.CreateIntrinsic(ID: IID, Types: {DstTy, SrcTy},
3192 Args: {Rep, CI->getArgOperand(i: 3)});
3193 } else {
3194 Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt")
3195 : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt");
3196 }
3197
3198 if (CI->arg_size() >= 3)
3199 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3200 Op1: CI->getArgOperand(i: 1));
3201 } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps.") ||
3202 Name.starts_with(Prefix: "vcvtph2ps.")) {
3203 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3204 Rep = CI->getArgOperand(i: 0);
3205 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
3206 unsigned NumDstElts = DstTy->getNumElements();
3207 if (NumDstElts != SrcTy->getNumElements()) {
3208 assert(NumDstElts == 4 && "Unexpected vector size");
3209 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3});
3210 }
3211 Rep = Builder.CreateBitCast(
3212 V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts));
3213 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps");
3214 if (CI->arg_size() >= 3)
3215 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3216 Op1: CI->getArgOperand(i: 1));
3217 } else if (Name.starts_with(Prefix: "avx512.mask.load")) {
3218 // "avx512.mask.loadu." or "avx512.mask.load."
3219 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3220 Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1),
3221 Mask: CI->getArgOperand(i: 2), Aligned);
3222 } else if (Name.starts_with(Prefix: "avx512.mask.expand.load.")) {
3223 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3224 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3225 NumElts: ResultTy->getNumElements());
3226
3227 Rep = Builder.CreateIntrinsic(
3228 ID: Intrinsic::masked_expandload, Types: ResultTy,
3229 Args: {CI->getOperand(i_nocapture: 0), MaskVec, CI->getOperand(i_nocapture: 1)});
3230 } else if (Name.starts_with(Prefix: "avx512.mask.compress.store.")) {
3231 auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType());
3232 Value *MaskVec =
3233 getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3234 NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
3235
3236 Rep = Builder.CreateIntrinsic(
3237 ID: Intrinsic::masked_compressstore, Types: ResultTy,
3238 Args: {CI->getArgOperand(i: 1), CI->getArgOperand(i: 0), MaskVec});
3239 } else if (Name.starts_with(Prefix: "avx512.mask.compress.") ||
3240 Name.starts_with(Prefix: "avx512.mask.expand.")) {
3241 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
3242
3243 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
3244 NumElts: ResultTy->getNumElements());
3245
3246 bool IsCompress = Name[12] == 'c';
3247 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3248 : Intrinsic::x86_avx512_mask_expand;
3249 Rep = Builder.CreateIntrinsic(
3250 ID: IID, Types: ResultTy, Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec});
3251 } else if (Name.starts_with(Prefix: "xop.vpcom")) {
3252 bool IsSigned;
3253 if (Name.ends_with(Suffix: "ub") || Name.ends_with(Suffix: "uw") || Name.ends_with(Suffix: "ud") ||
3254 Name.ends_with(Suffix: "uq"))
3255 IsSigned = false;
3256 else if (Name.ends_with(Suffix: "b") || Name.ends_with(Suffix: "w") ||
3257 Name.ends_with(Suffix: "d") || Name.ends_with(Suffix: "q"))
3258 IsSigned = true;
3259 else
3260 llvm_unreachable("Unknown suffix");
3261
3262 unsigned Imm;
3263 if (CI->arg_size() == 3) {
3264 Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3265 } else {
3266 Name = Name.substr(Start: 9); // strip off "xop.vpcom"
3267 if (Name.starts_with(Prefix: "lt"))
3268 Imm = 0;
3269 else if (Name.starts_with(Prefix: "le"))
3270 Imm = 1;
3271 else if (Name.starts_with(Prefix: "gt"))
3272 Imm = 2;
3273 else if (Name.starts_with(Prefix: "ge"))
3274 Imm = 3;
3275 else if (Name.starts_with(Prefix: "eq"))
3276 Imm = 4;
3277 else if (Name.starts_with(Prefix: "ne"))
3278 Imm = 5;
3279 else if (Name.starts_with(Prefix: "false"))
3280 Imm = 6;
3281 else if (Name.starts_with(Prefix: "true"))
3282 Imm = 7;
3283 else
3284 llvm_unreachable("Unknown condition");
3285 }
3286
3287 Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned);
3288 } else if (Name.starts_with(Prefix: "xop.vpcmov")) {
3289 Value *Sel = CI->getArgOperand(i: 2);
3290 Value *NotSel = Builder.CreateNot(V: Sel);
3291 Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel);
3292 Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel);
3293 Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1);
3294 } else if (Name.starts_with(Prefix: "xop.vprot") || Name.starts_with(Prefix: "avx512.prol") ||
3295 Name.starts_with(Prefix: "avx512.mask.prol")) {
3296 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false);
3297 } else if (Name.starts_with(Prefix: "avx512.pror") ||
3298 Name.starts_with(Prefix: "avx512.mask.pror")) {
3299 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true);
3300 } else if (Name.starts_with(Prefix: "avx512.vpshld.") ||
3301 Name.starts_with(Prefix: "avx512.mask.vpshld") ||
3302 Name.starts_with(Prefix: "avx512.maskz.vpshld")) {
3303 bool ZeroMask = Name[11] == 'z';
3304 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask);
3305 } else if (Name.starts_with(Prefix: "avx512.vpshrd.") ||
3306 Name.starts_with(Prefix: "avx512.mask.vpshrd") ||
3307 Name.starts_with(Prefix: "avx512.maskz.vpshrd")) {
3308 bool ZeroMask = Name[11] == 'z';
3309 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask);
3310 } else if (Name == "sse42.crc32.64.8") {
3311 Value *Trunc0 =
3312 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C));
3313 Rep = Builder.CreateIntrinsic(ID: Intrinsic::x86_sse42_crc32_32_8,
3314 Args: {Trunc0, CI->getArgOperand(i: 1)});
3315 Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "");
3316 } else if (Name.starts_with(Prefix: "avx.vbroadcast.s") ||
3317 Name.starts_with(Prefix: "avx512.vbroadcast.s")) {
3318 // Replace broadcasts with a series of insertelements.
3319 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3320 Type *EltTy = VecTy->getElementType();
3321 unsigned EltNum = VecTy->getNumElements();
3322 Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0));
3323 Type *I32Ty = Type::getInt32Ty(C);
3324 Rep = PoisonValue::get(T: VecTy);
3325 for (unsigned I = 0; I < EltNum; ++I)
3326 Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I));
3327 } else if (Name.starts_with(Prefix: "sse41.pmovsx") ||
3328 Name.starts_with(Prefix: "sse41.pmovzx") ||
3329 Name.starts_with(Prefix: "avx2.pmovsx") ||
3330 Name.starts_with(Prefix: "avx2.pmovzx") ||
3331 Name.starts_with(Prefix: "avx512.mask.pmovsx") ||
3332 Name.starts_with(Prefix: "avx512.mask.pmovzx")) {
3333 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3334 unsigned NumDstElts = DstTy->getNumElements();
3335
3336 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3337 SmallVector<int, 8> ShuffleMask(NumDstElts);
3338 for (unsigned i = 0; i != NumDstElts; ++i)
3339 ShuffleMask[i] = i;
3340
3341 Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3342
3343 bool DoSext = Name.contains(Other: "pmovsx");
3344 Rep =
3345 DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy);
3346 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3347 if (CI->arg_size() == 3)
3348 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3349 Op1: CI->getArgOperand(i: 1));
3350 } else if (Name == "avx512.mask.pmov.qd.256" ||
3351 Name == "avx512.mask.pmov.qd.512" ||
3352 Name == "avx512.mask.pmov.wb.256" ||
3353 Name == "avx512.mask.pmov.wb.512") {
3354 Type *Ty = CI->getArgOperand(i: 1)->getType();
3355 Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty);
3356 Rep =
3357 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3358 } else if (Name.starts_with(Prefix: "avx.vbroadcastf128") ||
3359 Name == "avx2.vbroadcasti128") {
3360 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3361 Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType();
3362 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3363 auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts);
3364 Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
3365 if (NumSrcElts == 2)
3366 Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1});
3367 else
3368 Rep = Builder.CreateShuffleVector(V: Load,
3369 Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3370 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i") ||
3371 Name.starts_with(Prefix: "avx512.mask.shuf.f")) {
3372 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3373 Type *VT = CI->getType();
3374 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3375 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3376 unsigned ControlBitsMask = NumLanes - 1;
3377 unsigned NumControlBits = NumLanes / 2;
3378 SmallVector<int, 8> ShuffleMask(0);
3379
3380 for (unsigned l = 0; l != NumLanes; ++l) {
3381 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3382 // We actually need the other source.
3383 if (l >= NumLanes / 2)
3384 LaneMask += NumLanes;
3385 for (unsigned i = 0; i != NumElementsInLane; ++i)
3386 ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i);
3387 }
3388 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3389 V2: CI->getArgOperand(i: 1), Mask: ShuffleMask);
3390 Rep =
3391 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3392 } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf") ||
3393 Name.starts_with(Prefix: "avx512.mask.broadcasti")) {
3394 unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType())
3395 ->getNumElements();
3396 unsigned NumDstElts =
3397 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3398
3399 SmallVector<int, 8> ShuffleMask(NumDstElts);
3400 for (unsigned i = 0; i != NumDstElts; ++i)
3401 ShuffleMask[i] = i % NumSrcElts;
3402
3403 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3404 V2: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3405 Rep =
3406 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3407 } else if (Name.starts_with(Prefix: "avx2.pbroadcast") ||
3408 Name.starts_with(Prefix: "avx2.vbroadcast") ||
3409 Name.starts_with(Prefix: "avx512.pbroadcast") ||
3410 Name.starts_with(Prefix: "avx512.mask.broadcast.s")) {
3411 // Replace vp?broadcasts with a vector shuffle.
3412 Value *Op = CI->getArgOperand(i: 0);
3413 ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount();
3414 Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC);
3415 SmallVector<int, 8> M;
3416 ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M);
3417 Rep = Builder.CreateShuffleVector(V: Op, Mask: M);
3418
3419 if (CI->arg_size() == 3)
3420 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3421 Op1: CI->getArgOperand(i: 1));
3422 } else if (Name.starts_with(Prefix: "sse2.padds.") ||
3423 Name.starts_with(Prefix: "avx2.padds.") ||
3424 Name.starts_with(Prefix: "avx512.padds.") ||
3425 Name.starts_with(Prefix: "avx512.mask.padds.")) {
3426 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat);
3427 } else if (Name.starts_with(Prefix: "sse2.psubs.") ||
3428 Name.starts_with(Prefix: "avx2.psubs.") ||
3429 Name.starts_with(Prefix: "avx512.psubs.") ||
3430 Name.starts_with(Prefix: "avx512.mask.psubs.")) {
3431 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat);
3432 } else if (Name.starts_with(Prefix: "sse2.paddus.") ||
3433 Name.starts_with(Prefix: "avx2.paddus.") ||
3434 Name.starts_with(Prefix: "avx512.mask.paddus.")) {
3435 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat);
3436 } else if (Name.starts_with(Prefix: "sse2.psubus.") ||
3437 Name.starts_with(Prefix: "avx2.psubus.") ||
3438 Name.starts_with(Prefix: "avx512.mask.psubus.")) {
3439 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat);
3440 } else if (Name.starts_with(Prefix: "avx512.mask.palignr.")) {
3441 Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0),
3442 Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2),
3443 Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3444 IsVALIGN: false);
3445 } else if (Name.starts_with(Prefix: "avx512.mask.valign.")) {
3446 Rep = upgradeX86ALIGNIntrinsics(
3447 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3448 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true);
3449 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3450 // 128/256-bit shift left specified in bits.
3451 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3452 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3453 Shift: Shift / 8); // Shift is in bits.
3454 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3455 // 128/256-bit shift right specified in bits.
3456 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3457 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3458 Shift: Shift / 8); // Shift is in bits.
3459 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3460 Name == "avx512.psll.dq.512") {
3461 // 128/256/512-bit shift left specified in bytes.
3462 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3463 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3464 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3465 Name == "avx512.psrl.dq.512") {
3466 // 128/256/512-bit shift right specified in bytes.
3467 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3468 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3469 } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp") ||
3470 Name.starts_with(Prefix: "avx.blend.p") || Name == "avx2.pblendw" ||
3471 Name.starts_with(Prefix: "avx2.pblendd.")) {
3472 Value *Op0 = CI->getArgOperand(i: 0);
3473 Value *Op1 = CI->getArgOperand(i: 1);
3474 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3475 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3476 unsigned NumElts = VecTy->getNumElements();
3477
3478 SmallVector<int, 16> Idxs(NumElts);
3479 for (unsigned i = 0; i != NumElts; ++i)
3480 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3481
3482 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3483 } else if (Name.starts_with(Prefix: "avx.vinsertf128.") ||
3484 Name == "avx2.vinserti128" ||
3485 Name.starts_with(Prefix: "avx512.mask.insert")) {
3486 Value *Op0 = CI->getArgOperand(i: 0);
3487 Value *Op1 = CI->getArgOperand(i: 1);
3488 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3489 unsigned DstNumElts =
3490 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3491 unsigned SrcNumElts =
3492 cast<FixedVectorType>(Val: Op1->getType())->getNumElements();
3493 unsigned Scale = DstNumElts / SrcNumElts;
3494
3495 // Mask off the high bits of the immediate value; hardware ignores those.
3496 Imm = Imm % Scale;
3497
3498 // Extend the second operand into a vector the size of the destination.
3499 SmallVector<int, 8> Idxs(DstNumElts);
3500 for (unsigned i = 0; i != SrcNumElts; ++i)
3501 Idxs[i] = i;
3502 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3503 Idxs[i] = SrcNumElts;
3504 Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs);
3505
3506 // Insert the second operand into the first operand.
3507
3508 // Note that there is no guarantee that instruction lowering will actually
3509 // produce a vinsertf128 instruction for the created shuffles. In
3510 // particular, the 0 immediate case involves no lane changes, so it can
3511 // be handled as a blend.
3512
3513 // Example of shuffle mask for 32-bit elements:
3514 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3515 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3516
3517 // First fill with identify mask.
3518 for (unsigned i = 0; i != DstNumElts; ++i)
3519 Idxs[i] = i;
3520 // Then replace the elements where we need to insert.
3521 for (unsigned i = 0; i != SrcNumElts; ++i)
3522 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3523 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs);
3524
3525 // If the intrinsic has a mask operand, handle that.
3526 if (CI->arg_size() == 5)
3527 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3528 Op1: CI->getArgOperand(i: 3));
3529 } else if (Name.starts_with(Prefix: "avx.vextractf128.") ||
3530 Name == "avx2.vextracti128" ||
3531 Name.starts_with(Prefix: "avx512.mask.vextract")) {
3532 Value *Op0 = CI->getArgOperand(i: 0);
3533 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3534 unsigned DstNumElts =
3535 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3536 unsigned SrcNumElts =
3537 cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
3538 unsigned Scale = SrcNumElts / DstNumElts;
3539
3540 // Mask off the high bits of the immediate value; hardware ignores those.
3541 Imm = Imm % Scale;
3542
3543 // Get indexes for the subvector of the input vector.
3544 SmallVector<int, 8> Idxs(DstNumElts);
3545 for (unsigned i = 0; i != DstNumElts; ++i) {
3546 Idxs[i] = i + (Imm * DstNumElts);
3547 }
3548 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3549
3550 // If the intrinsic has a mask operand, handle that.
3551 if (CI->arg_size() == 4)
3552 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3553 Op1: CI->getArgOperand(i: 2));
3554 } else if (Name.starts_with(Prefix: "avx512.mask.perm.df.") ||
3555 Name.starts_with(Prefix: "avx512.mask.perm.di.")) {
3556 Value *Op0 = CI->getArgOperand(i: 0);
3557 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3558 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3559 unsigned NumElts = VecTy->getNumElements();
3560
3561 SmallVector<int, 8> Idxs(NumElts);
3562 for (unsigned i = 0; i != NumElts; ++i)
3563 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3564
3565 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3566
3567 if (CI->arg_size() == 4)
3568 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3569 Op1: CI->getArgOperand(i: 2));
3570 } else if (Name.starts_with(Prefix: "avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3571 // The immediate permute control byte looks like this:
3572 // [1:0] - select 128 bits from sources for low half of destination
3573 // [2] - ignore
3574 // [3] - zero low half of destination
3575 // [5:4] - select 128 bits from sources for high half of destination
3576 // [6] - ignore
3577 // [7] - zero high half of destination
3578
3579 uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3580
3581 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3582 unsigned HalfSize = NumElts / 2;
3583 SmallVector<int, 8> ShuffleMask(NumElts);
3584
3585 // Determine which operand(s) are actually in use for this instruction.
3586 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3587 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3588
3589 // If needed, replace operands based on zero mask.
3590 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0;
3591 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1;
3592
3593 // Permute low half of result.
3594 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3595 for (unsigned i = 0; i < HalfSize; ++i)
3596 ShuffleMask[i] = StartIndex + i;
3597
3598 // Permute high half of result.
3599 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3600 for (unsigned i = 0; i < HalfSize; ++i)
3601 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3602
3603 Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask);
3604
3605 } else if (Name.starts_with(Prefix: "avx.vpermil.") || Name == "sse2.pshuf.d" ||
3606 Name.starts_with(Prefix: "avx512.mask.vpermil.p") ||
3607 Name.starts_with(Prefix: "avx512.mask.pshuf.d.")) {
3608 Value *Op0 = CI->getArgOperand(i: 0);
3609 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3610 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3611 unsigned NumElts = VecTy->getNumElements();
3612 // Calculate the size of each index in the immediate.
3613 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3614 unsigned IdxMask = ((1 << IdxSize) - 1);
3615
3616 SmallVector<int, 8> Idxs(NumElts);
3617 // Lookup the bits for this element, wrapping around the immediate every
3618 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3619 // to offset by the first index of each group.
3620 for (unsigned i = 0; i != NumElts; ++i)
3621 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3622
3623 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3624
3625 if (CI->arg_size() == 4)
3626 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3627 Op1: CI->getArgOperand(i: 2));
3628 } else if (Name == "sse2.pshufl.w" ||
3629 Name.starts_with(Prefix: "avx512.mask.pshufl.w.")) {
3630 Value *Op0 = CI->getArgOperand(i: 0);
3631 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3632 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3633
3634 SmallVector<int, 16> Idxs(NumElts);
3635 for (unsigned l = 0; l != NumElts; l += 8) {
3636 for (unsigned i = 0; i != 4; ++i)
3637 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3638 for (unsigned i = 4; i != 8; ++i)
3639 Idxs[i + l] = i + l;
3640 }
3641
3642 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3643
3644 if (CI->arg_size() == 4)
3645 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3646 Op1: CI->getArgOperand(i: 2));
3647 } else if (Name == "sse2.pshufh.w" ||
3648 Name.starts_with(Prefix: "avx512.mask.pshufh.w.")) {
3649 Value *Op0 = CI->getArgOperand(i: 0);
3650 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3651 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3652
3653 SmallVector<int, 16> Idxs(NumElts);
3654 for (unsigned l = 0; l != NumElts; l += 8) {
3655 for (unsigned i = 0; i != 4; ++i)
3656 Idxs[i + l] = i + l;
3657 for (unsigned i = 0; i != 4; ++i)
3658 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3659 }
3660
3661 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3662
3663 if (CI->arg_size() == 4)
3664 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3665 Op1: CI->getArgOperand(i: 2));
3666 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p")) {
3667 Value *Op0 = CI->getArgOperand(i: 0);
3668 Value *Op1 = CI->getArgOperand(i: 1);
3669 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3670 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3671
3672 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3673 unsigned HalfLaneElts = NumLaneElts / 2;
3674
3675 SmallVector<int, 16> Idxs(NumElts);
3676 for (unsigned i = 0; i != NumElts; ++i) {
3677 // Base index is the starting element of the lane.
3678 Idxs[i] = i - (i % NumLaneElts);
3679 // If we are half way through the lane switch to the other source.
3680 if ((i % NumLaneElts) >= HalfLaneElts)
3681 Idxs[i] += NumElts;
3682 // Now select the specific element. By adding HalfLaneElts bits from
3683 // the immediate. Wrapping around the immediate every 8-bits.
3684 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3685 }
3686
3687 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3688
3689 Rep =
3690 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3691 } else if (Name.starts_with(Prefix: "avx512.mask.movddup") ||
3692 Name.starts_with(Prefix: "avx512.mask.movshdup") ||
3693 Name.starts_with(Prefix: "avx512.mask.movsldup")) {
3694 Value *Op0 = CI->getArgOperand(i: 0);
3695 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3696 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3697
3698 unsigned Offset = 0;
3699 if (Name.starts_with(Prefix: "avx512.mask.movshdup."))
3700 Offset = 1;
3701
3702 SmallVector<int, 16> Idxs(NumElts);
3703 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3704 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3705 Idxs[i + l + 0] = i + l + Offset;
3706 Idxs[i + l + 1] = i + l + Offset;
3707 }
3708
3709 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3710
3711 Rep =
3712 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3713 } else if (Name.starts_with(Prefix: "avx512.mask.punpckl") ||
3714 Name.starts_with(Prefix: "avx512.mask.unpckl.")) {
3715 Value *Op0 = CI->getArgOperand(i: 0);
3716 Value *Op1 = CI->getArgOperand(i: 1);
3717 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3718 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3719
3720 SmallVector<int, 64> Idxs(NumElts);
3721 for (int l = 0; l != NumElts; l += NumLaneElts)
3722 for (int i = 0; i != NumLaneElts; ++i)
3723 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3724
3725 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3726
3727 Rep =
3728 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3729 } else if (Name.starts_with(Prefix: "avx512.mask.punpckh") ||
3730 Name.starts_with(Prefix: "avx512.mask.unpckh.")) {
3731 Value *Op0 = CI->getArgOperand(i: 0);
3732 Value *Op1 = CI->getArgOperand(i: 1);
3733 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3734 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3735
3736 SmallVector<int, 64> Idxs(NumElts);
3737 for (int l = 0; l != NumElts; l += NumLaneElts)
3738 for (int i = 0; i != NumLaneElts; ++i)
3739 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3740
3741 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3742
3743 Rep =
3744 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3745 } else if (Name.starts_with(Prefix: "avx512.mask.and.") ||
3746 Name.starts_with(Prefix: "avx512.mask.pand.")) {
3747 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3748 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3749 Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3750 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3751 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3752 Rep =
3753 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3754 } else if (Name.starts_with(Prefix: "avx512.mask.andn.") ||
3755 Name.starts_with(Prefix: "avx512.mask.pandn.")) {
3756 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3757 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3758 Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy));
3759 Rep = Builder.CreateAnd(LHS: Rep,
3760 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3761 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3762 Rep =
3763 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3764 } else if (Name.starts_with(Prefix: "avx512.mask.or.") ||
3765 Name.starts_with(Prefix: "avx512.mask.por.")) {
3766 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3767 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3768 Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3769 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3770 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3771 Rep =
3772 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3773 } else if (Name.starts_with(Prefix: "avx512.mask.xor.") ||
3774 Name.starts_with(Prefix: "avx512.mask.pxor.")) {
3775 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3776 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3777 Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3778 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3779 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3780 Rep =
3781 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3782 } else if (Name.starts_with(Prefix: "avx512.mask.padd.")) {
3783 Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3784 Rep =
3785 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3786 } else if (Name.starts_with(Prefix: "avx512.mask.psub.")) {
3787 Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3788 Rep =
3789 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3790 } else if (Name.starts_with(Prefix: "avx512.mask.pmull.")) {
3791 Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3792 Rep =
3793 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3794 } else if (Name.starts_with(Prefix: "avx512.mask.add.p")) {
3795 if (Name.ends_with(Suffix: ".512")) {
3796 Intrinsic::ID IID;
3797 if (Name[17] == 's')
3798 IID = Intrinsic::x86_avx512_add_ps_512;
3799 else
3800 IID = Intrinsic::x86_avx512_add_pd_512;
3801
3802 Rep = Builder.CreateIntrinsic(
3803 ID: IID,
3804 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3805 } else {
3806 Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3807 }
3808 Rep =
3809 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3810 } else if (Name.starts_with(Prefix: "avx512.mask.div.p")) {
3811 if (Name.ends_with(Suffix: ".512")) {
3812 Intrinsic::ID IID;
3813 if (Name[17] == 's')
3814 IID = Intrinsic::x86_avx512_div_ps_512;
3815 else
3816 IID = Intrinsic::x86_avx512_div_pd_512;
3817
3818 Rep = Builder.CreateIntrinsic(
3819 ID: IID,
3820 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3821 } else {
3822 Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3823 }
3824 Rep =
3825 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3826 } else if (Name.starts_with(Prefix: "avx512.mask.mul.p")) {
3827 if (Name.ends_with(Suffix: ".512")) {
3828 Intrinsic::ID IID;
3829 if (Name[17] == 's')
3830 IID = Intrinsic::x86_avx512_mul_ps_512;
3831 else
3832 IID = Intrinsic::x86_avx512_mul_pd_512;
3833
3834 Rep = Builder.CreateIntrinsic(
3835 ID: IID,
3836 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3837 } else {
3838 Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3839 }
3840 Rep =
3841 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3842 } else if (Name.starts_with(Prefix: "avx512.mask.sub.p")) {
3843 if (Name.ends_with(Suffix: ".512")) {
3844 Intrinsic::ID IID;
3845 if (Name[17] == 's')
3846 IID = Intrinsic::x86_avx512_sub_ps_512;
3847 else
3848 IID = Intrinsic::x86_avx512_sub_pd_512;
3849
3850 Rep = Builder.CreateIntrinsic(
3851 ID: IID,
3852 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3853 } else {
3854 Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3855 }
3856 Rep =
3857 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3858 } else if ((Name.starts_with(Prefix: "avx512.mask.max.p") ||
3859 Name.starts_with(Prefix: "avx512.mask.min.p")) &&
3860 Name.drop_front(N: 18) == ".512") {
3861 bool IsDouble = Name[17] == 'd';
3862 bool IsMin = Name[13] == 'i';
3863 static const Intrinsic::ID MinMaxTbl[2][2] = {
3864 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3865 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3866 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3867
3868 Rep = Builder.CreateIntrinsic(
3869 ID: IID,
3870 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3871 Rep =
3872 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3873 } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt.")) {
3874 Rep =
3875 Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: CI->getType(),
3876 Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)});
3877 Rep =
3878 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3879 } else if (Name.starts_with(Prefix: "avx512.mask.psll")) {
3880 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3881 bool IsVariable = Name[16] == 'v';
3882 char Size = Name[16] == '.' ? Name[17]
3883 : Name[17] == '.' ? Name[18]
3884 : Name[18] == '.' ? Name[19]
3885 : Name[20];
3886
3887 Intrinsic::ID IID;
3888 if (IsVariable && Name[17] != '.') {
3889 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3890 IID = Intrinsic::x86_avx2_psllv_q;
3891 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3892 IID = Intrinsic::x86_avx2_psllv_q_256;
3893 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3894 IID = Intrinsic::x86_avx2_psllv_d;
3895 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3896 IID = Intrinsic::x86_avx2_psllv_d_256;
3897 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3898 IID = Intrinsic::x86_avx512_psllv_w_128;
3899 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3900 IID = Intrinsic::x86_avx512_psllv_w_256;
3901 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3902 IID = Intrinsic::x86_avx512_psllv_w_512;
3903 else
3904 llvm_unreachable("Unexpected size");
3905 } else if (Name.ends_with(Suffix: ".128")) {
3906 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3907 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3908 : Intrinsic::x86_sse2_psll_d;
3909 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3910 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3911 : Intrinsic::x86_sse2_psll_q;
3912 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3913 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3914 : Intrinsic::x86_sse2_psll_w;
3915 else
3916 llvm_unreachable("Unexpected size");
3917 } else if (Name.ends_with(Suffix: ".256")) {
3918 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3919 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3920 : Intrinsic::x86_avx2_psll_d;
3921 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3922 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3923 : Intrinsic::x86_avx2_psll_q;
3924 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3925 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3926 : Intrinsic::x86_avx2_psll_w;
3927 else
3928 llvm_unreachable("Unexpected size");
3929 } else {
3930 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3931 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3932 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3933 : Intrinsic::x86_avx512_psll_d_512;
3934 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3935 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3936 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3937 : Intrinsic::x86_avx512_psll_q_512;
3938 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3939 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3940 : Intrinsic::x86_avx512_psll_w_512;
3941 else
3942 llvm_unreachable("Unexpected size");
3943 }
3944
3945 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3946 } else if (Name.starts_with(Prefix: "avx512.mask.psrl")) {
3947 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3948 bool IsVariable = Name[16] == 'v';
3949 char Size = Name[16] == '.' ? Name[17]
3950 : Name[17] == '.' ? Name[18]
3951 : Name[18] == '.' ? Name[19]
3952 : Name[20];
3953
3954 Intrinsic::ID IID;
3955 if (IsVariable && Name[17] != '.') {
3956 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3957 IID = Intrinsic::x86_avx2_psrlv_q;
3958 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3959 IID = Intrinsic::x86_avx2_psrlv_q_256;
3960 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3961 IID = Intrinsic::x86_avx2_psrlv_d;
3962 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3963 IID = Intrinsic::x86_avx2_psrlv_d_256;
3964 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3965 IID = Intrinsic::x86_avx512_psrlv_w_128;
3966 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3967 IID = Intrinsic::x86_avx512_psrlv_w_256;
3968 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3969 IID = Intrinsic::x86_avx512_psrlv_w_512;
3970 else
3971 llvm_unreachable("Unexpected size");
3972 } else if (Name.ends_with(Suffix: ".128")) {
3973 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3974 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3975 : Intrinsic::x86_sse2_psrl_d;
3976 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3977 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3978 : Intrinsic::x86_sse2_psrl_q;
3979 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3980 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3981 : Intrinsic::x86_sse2_psrl_w;
3982 else
3983 llvm_unreachable("Unexpected size");
3984 } else if (Name.ends_with(Suffix: ".256")) {
3985 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3986 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3987 : Intrinsic::x86_avx2_psrl_d;
3988 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3989 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3990 : Intrinsic::x86_avx2_psrl_q;
3991 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3992 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3993 : Intrinsic::x86_avx2_psrl_w;
3994 else
3995 llvm_unreachable("Unexpected size");
3996 } else {
3997 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3998 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3999 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4000 : Intrinsic::x86_avx512_psrl_d_512;
4001 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4002 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4003 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4004 : Intrinsic::x86_avx512_psrl_q_512;
4005 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4006 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4007 : Intrinsic::x86_avx512_psrl_w_512;
4008 else
4009 llvm_unreachable("Unexpected size");
4010 }
4011
4012 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4013 } else if (Name.starts_with(Prefix: "avx512.mask.psra")) {
4014 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4015 bool IsVariable = Name[16] == 'v';
4016 char Size = Name[16] == '.' ? Name[17]
4017 : Name[17] == '.' ? Name[18]
4018 : Name[18] == '.' ? Name[19]
4019 : Name[20];
4020
4021 Intrinsic::ID IID;
4022 if (IsVariable && Name[17] != '.') {
4023 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4024 IID = Intrinsic::x86_avx2_psrav_d;
4025 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4026 IID = Intrinsic::x86_avx2_psrav_d_256;
4027 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4028 IID = Intrinsic::x86_avx512_psrav_w_128;
4029 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4030 IID = Intrinsic::x86_avx512_psrav_w_256;
4031 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4032 IID = Intrinsic::x86_avx512_psrav_w_512;
4033 else
4034 llvm_unreachable("Unexpected size");
4035 } else if (Name.ends_with(Suffix: ".128")) {
4036 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4037 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4038 : Intrinsic::x86_sse2_psra_d;
4039 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4041 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4042 : Intrinsic::x86_avx512_psra_q_128;
4043 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4044 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4045 : Intrinsic::x86_sse2_psra_w;
4046 else
4047 llvm_unreachable("Unexpected size");
4048 } else if (Name.ends_with(Suffix: ".256")) {
4049 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4050 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4051 : Intrinsic::x86_avx2_psra_d;
4052 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4053 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4054 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4055 : Intrinsic::x86_avx512_psra_q_256;
4056 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4057 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4058 : Intrinsic::x86_avx2_psra_w;
4059 else
4060 llvm_unreachable("Unexpected size");
4061 } else {
4062 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4063 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4064 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4065 : Intrinsic::x86_avx512_psra_d_512;
4066 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4067 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4068 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4069 : Intrinsic::x86_avx512_psra_q_512;
4070 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4071 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4072 : Intrinsic::x86_avx512_psra_w_512;
4073 else
4074 llvm_unreachable("Unexpected size");
4075 }
4076
4077 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
4078 } else if (Name.starts_with(Prefix: "avx512.mask.move.s")) {
4079 Rep = upgradeMaskedMove(Builder, CI&: *CI);
4080 } else if (Name.starts_with(Prefix: "avx512.cvtmask2")) {
4081 Rep = upgradeMaskToInt(Builder, CI&: *CI);
4082 } else if (Name.ends_with(Suffix: ".movntdqa")) {
4083 MDNode *Node = MDNode::get(
4084 Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
4085
4086 LoadInst *LI = Builder.CreateAlignedLoad(
4087 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0),
4088 Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
4089 LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
4090 Rep = LI;
4091 } else if (Name.starts_with(Prefix: "fma.vfmadd.") ||
4092 Name.starts_with(Prefix: "fma.vfmsub.") ||
4093 Name.starts_with(Prefix: "fma.vfnmadd.") ||
4094 Name.starts_with(Prefix: "fma.vfnmsub.")) {
4095 bool NegMul = Name[6] == 'n';
4096 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4097 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4098
4099 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4100 CI->getArgOperand(i: 2)};
4101
4102 if (IsScalar) {
4103 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4104 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4105 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4106 }
4107
4108 if (NegMul && !IsScalar)
4109 Ops[0] = Builder.CreateFNeg(V: Ops[0]);
4110 if (NegMul && IsScalar)
4111 Ops[1] = Builder.CreateFNeg(V: Ops[1]);
4112 if (NegAcc)
4113 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4114
4115 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
4116
4117 if (IsScalar)
4118 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
4119 } else if (Name.starts_with(Prefix: "fma4.vfmadd.s")) {
4120 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4121 CI->getArgOperand(i: 2)};
4122
4123 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
4124 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
4125 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
4126
4127 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
4128
4129 Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()),
4130 NewElt: Rep, Idx: (uint64_t)0);
4131 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s") ||
4132 Name.starts_with(Prefix: "avx512.maskz.vfmadd.s") ||
4133 Name.starts_with(Prefix: "avx512.mask3.vfmadd.s") ||
4134 Name.starts_with(Prefix: "avx512.mask3.vfmsub.s") ||
4135 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s")) {
4136 bool IsMask3 = Name[11] == '3';
4137 bool IsMaskZ = Name[11] == 'z';
4138 // Drop the "avx512.mask." to make it easier.
4139 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4140 bool NegMul = Name[2] == 'n';
4141 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4142
4143 Value *A = CI->getArgOperand(i: 0);
4144 Value *B = CI->getArgOperand(i: 1);
4145 Value *C = CI->getArgOperand(i: 2);
4146
4147 if (NegMul && (IsMask3 || IsMaskZ))
4148 A = Builder.CreateFNeg(V: A);
4149 if (NegMul && !(IsMask3 || IsMaskZ))
4150 B = Builder.CreateFNeg(V: B);
4151 if (NegAcc)
4152 C = Builder.CreateFNeg(V: C);
4153
4154 A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0);
4155 B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
4156 C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0);
4157
4158 if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4159 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) {
4160 Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)};
4161
4162 Intrinsic::ID IID;
4163 if (Name.back() == 'd')
4164 IID = Intrinsic::x86_avx512_vfmadd_f64;
4165 else
4166 IID = Intrinsic::x86_avx512_vfmadd_f32;
4167 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4168 } else {
4169 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4170 }
4171
4172 Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType())
4173 : IsMask3 ? C
4174 : A;
4175
4176 // For Mask3 with NegAcc, we need to create a new extractelement that
4177 // avoids the negation above.
4178 if (NegAcc && IsMask3)
4179 PassThru =
4180 Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0);
4181
4182 Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4183 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep,
4184 Idx: (uint64_t)0);
4185 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p") ||
4186 Name.starts_with(Prefix: "avx512.mask.vfnmadd.p") ||
4187 Name.starts_with(Prefix: "avx512.mask.vfnmsub.p") ||
4188 Name.starts_with(Prefix: "avx512.mask3.vfmadd.p") ||
4189 Name.starts_with(Prefix: "avx512.mask3.vfmsub.p") ||
4190 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p") ||
4191 Name.starts_with(Prefix: "avx512.maskz.vfmadd.p")) {
4192 bool IsMask3 = Name[11] == '3';
4193 bool IsMaskZ = Name[11] == 'z';
4194 // Drop the "avx512.mask." to make it easier.
4195 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4196 bool NegMul = Name[2] == 'n';
4197 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4198
4199 Value *A = CI->getArgOperand(i: 0);
4200 Value *B = CI->getArgOperand(i: 1);
4201 Value *C = CI->getArgOperand(i: 2);
4202
4203 if (NegMul && (IsMask3 || IsMaskZ))
4204 A = Builder.CreateFNeg(V: A);
4205 if (NegMul && !(IsMask3 || IsMaskZ))
4206 B = Builder.CreateFNeg(V: B);
4207 if (NegAcc)
4208 C = Builder.CreateFNeg(V: C);
4209
4210 if (CI->arg_size() == 5 &&
4211 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
4212 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) {
4213 Intrinsic::ID IID;
4214 // Check the character before ".512" in string.
4215 if (Name[Name.size() - 5] == 's')
4216 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4217 else
4218 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4219
4220 Rep = Builder.CreateIntrinsic(ID: IID, Args: {A, B, C, CI->getArgOperand(i: 4)});
4221 } else {
4222 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
4223 }
4224
4225 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4226 : IsMask3 ? CI->getArgOperand(i: 2)
4227 : CI->getArgOperand(i: 0);
4228
4229 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4230 } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p")) {
4231 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4232 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4233 Intrinsic::ID IID;
4234 if (VecWidth == 128 && EltWidth == 32)
4235 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4236 else if (VecWidth == 256 && EltWidth == 32)
4237 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4238 else if (VecWidth == 128 && EltWidth == 64)
4239 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4240 else if (VecWidth == 256 && EltWidth == 64)
4241 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4242 else
4243 llvm_unreachable("Unexpected intrinsic");
4244
4245 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4246 CI->getArgOperand(i: 2)};
4247 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4248 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4249 } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p") ||
4250 Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p") ||
4251 Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p") ||
4252 Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p")) {
4253 bool IsMask3 = Name[11] == '3';
4254 bool IsMaskZ = Name[11] == 'z';
4255 // Drop the "avx512.mask." to make it easier.
4256 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4257 bool IsSubAdd = Name[3] == 's';
4258 if (CI->arg_size() == 5) {
4259 Intrinsic::ID IID;
4260 // Check the character before ".512" in string.
4261 if (Name[Name.size() - 5] == 's')
4262 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4263 else
4264 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4265
4266 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4267 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
4268 if (IsSubAdd)
4269 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4270
4271 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4272 } else {
4273 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4274
4275 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4276 CI->getArgOperand(i: 2)};
4277
4278 Function *FMA = Intrinsic::getOrInsertDeclaration(
4279 M: CI->getModule(), id: Intrinsic::fma, Tys: Ops[0]->getType());
4280 Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops);
4281 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4282 Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops);
4283
4284 if (IsSubAdd)
4285 std::swap(a&: Even, b&: Odd);
4286
4287 SmallVector<int, 32> Idxs(NumElts);
4288 for (int i = 0; i != NumElts; ++i)
4289 Idxs[i] = i + (i % 2) * NumElts;
4290
4291 Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs);
4292 }
4293
4294 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4295 : IsMask3 ? CI->getArgOperand(i: 2)
4296 : CI->getArgOperand(i: 0);
4297
4298 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4299 } else if (Name.starts_with(Prefix: "avx512.mask.pternlog.") ||
4300 Name.starts_with(Prefix: "avx512.maskz.pternlog.")) {
4301 bool ZeroMask = Name[11] == 'z';
4302 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4303 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4304 Intrinsic::ID IID;
4305 if (VecWidth == 128 && EltWidth == 32)
4306 IID = Intrinsic::x86_avx512_pternlog_d_128;
4307 else if (VecWidth == 256 && EltWidth == 32)
4308 IID = Intrinsic::x86_avx512_pternlog_d_256;
4309 else if (VecWidth == 512 && EltWidth == 32)
4310 IID = Intrinsic::x86_avx512_pternlog_d_512;
4311 else if (VecWidth == 128 && EltWidth == 64)
4312 IID = Intrinsic::x86_avx512_pternlog_q_128;
4313 else if (VecWidth == 256 && EltWidth == 64)
4314 IID = Intrinsic::x86_avx512_pternlog_q_256;
4315 else if (VecWidth == 512 && EltWidth == 64)
4316 IID = Intrinsic::x86_avx512_pternlog_q_512;
4317 else
4318 llvm_unreachable("Unexpected intrinsic");
4319
4320 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4321 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)};
4322 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4323 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4324 : CI->getArgOperand(i: 0);
4325 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru);
4326 } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52") ||
4327 Name.starts_with(Prefix: "avx512.maskz.vpmadd52")) {
4328 bool ZeroMask = Name[11] == 'z';
4329 bool High = Name[20] == 'h' || Name[21] == 'h';
4330 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4331 Intrinsic::ID IID;
4332 if (VecWidth == 128 && !High)
4333 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4334 else if (VecWidth == 256 && !High)
4335 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4336 else if (VecWidth == 512 && !High)
4337 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4338 else if (VecWidth == 128 && High)
4339 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4340 else if (VecWidth == 256 && High)
4341 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4342 else if (VecWidth == 512 && High)
4343 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4344 else
4345 llvm_unreachable("Unexpected intrinsic");
4346
4347 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4348 CI->getArgOperand(i: 2)};
4349 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4350 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4351 : CI->getArgOperand(i: 0);
4352 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4353 } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var.") ||
4354 Name.starts_with(Prefix: "avx512.mask.vpermt2var.") ||
4355 Name.starts_with(Prefix: "avx512.maskz.vpermt2var.")) {
4356 bool ZeroMask = Name[11] == 'z';
4357 bool IndexForm = Name[17] == 'i';
4358 Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm);
4359 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd.") ||
4360 Name.starts_with(Prefix: "avx512.maskz.vpdpbusd.") ||
4361 Name.starts_with(Prefix: "avx512.mask.vpdpbusds.") ||
4362 Name.starts_with(Prefix: "avx512.maskz.vpdpbusds.")) {
4363 bool ZeroMask = Name[11] == 'z';
4364 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4365 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4366 Intrinsic::ID IID;
4367 if (VecWidth == 128 && !IsSaturating)
4368 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4369 else if (VecWidth == 256 && !IsSaturating)
4370 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4371 else if (VecWidth == 512 && !IsSaturating)
4372 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4373 else if (VecWidth == 128 && IsSaturating)
4374 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4375 else if (VecWidth == 256 && IsSaturating)
4376 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4377 else if (VecWidth == 512 && IsSaturating)
4378 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4379 else
4380 llvm_unreachable("Unexpected intrinsic");
4381
4382 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4383 CI->getArgOperand(i: 2)};
4384
4385 // Input arguments types were incorrectly set to vectors of i32 before but
4386 // they should be vectors of i8. Insert bit cast when encountering the old
4387 // types
4388 if (Args[1]->getType()->isVectorTy() &&
4389 cast<VectorType>(Val: Args[1]->getType())
4390 ->getElementType()
4391 ->isIntegerTy(Bitwidth: 32) &&
4392 Args[2]->getType()->isVectorTy() &&
4393 cast<VectorType>(Val: Args[2]->getType())
4394 ->getElementType()
4395 ->isIntegerTy(Bitwidth: 32)) {
4396 Type *NewArgType = nullptr;
4397 if (VecWidth == 128)
4398 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 16, Scalable: false);
4399 else if (VecWidth == 256)
4400 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 32, Scalable: false);
4401 else if (VecWidth == 512)
4402 NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: 64, Scalable: false);
4403 else
4404 llvm_unreachable("Unexpected vector bit width");
4405
4406 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4407 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4408 }
4409
4410 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4411 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4412 : CI->getArgOperand(i: 0);
4413 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4414 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd.") ||
4415 Name.starts_with(Prefix: "avx512.maskz.vpdpwssd.") ||
4416 Name.starts_with(Prefix: "avx512.mask.vpdpwssds.") ||
4417 Name.starts_with(Prefix: "avx512.maskz.vpdpwssds.")) {
4418 bool ZeroMask = Name[11] == 'z';
4419 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4420 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4421 Intrinsic::ID IID;
4422 if (VecWidth == 128 && !IsSaturating)
4423 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4424 else if (VecWidth == 256 && !IsSaturating)
4425 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4426 else if (VecWidth == 512 && !IsSaturating)
4427 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4428 else if (VecWidth == 128 && IsSaturating)
4429 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4430 else if (VecWidth == 256 && IsSaturating)
4431 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4432 else if (VecWidth == 512 && IsSaturating)
4433 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4434 else
4435 llvm_unreachable("Unexpected intrinsic");
4436
4437 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4438 CI->getArgOperand(i: 2)};
4439
4440 // Input arguments types were incorrectly set to vectors of i32 before but
4441 // they should be vectors of i16. Insert bit cast when encountering the old
4442 // types
4443 if (Args[1]->getType()->isVectorTy() &&
4444 cast<VectorType>(Val: Args[1]->getType())
4445 ->getElementType()
4446 ->isIntegerTy(Bitwidth: 32) &&
4447 Args[2]->getType()->isVectorTy() &&
4448 cast<VectorType>(Val: Args[2]->getType())
4449 ->getElementType()
4450 ->isIntegerTy(Bitwidth: 32)) {
4451 Type *NewArgType = nullptr;
4452 if (VecWidth == 128)
4453 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 8, Scalable: false);
4454 else if (VecWidth == 256)
4455 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 16, Scalable: false);
4456 else if (VecWidth == 512)
4457 NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: 32, Scalable: false);
4458 else
4459 llvm_unreachable("Unexpected vector bit width");
4460
4461 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
4462 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
4463 }
4464
4465 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4466 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4467 : CI->getArgOperand(i: 0);
4468 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4469 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4470 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4471 Name == "subborrow.u32" || Name == "subborrow.u64") {
4472 Intrinsic::ID IID;
4473 if (Name[0] == 'a' && Name.back() == '2')
4474 IID = Intrinsic::x86_addcarry_32;
4475 else if (Name[0] == 'a' && Name.back() == '4')
4476 IID = Intrinsic::x86_addcarry_64;
4477 else if (Name[0] == 's' && Name.back() == '2')
4478 IID = Intrinsic::x86_subborrow_32;
4479 else if (Name[0] == 's' && Name.back() == '4')
4480 IID = Intrinsic::x86_subborrow_64;
4481 else
4482 llvm_unreachable("Unexpected intrinsic");
4483
4484 // Make a call with 3 operands.
4485 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4486 CI->getArgOperand(i: 2)};
4487 Value *NewCall = Builder.CreateIntrinsic(ID: IID, Args);
4488
4489 // Extract the second result and store it.
4490 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4491 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 3), Align: Align(1));
4492 // Replace the original call result with the first result of the new call.
4493 Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4494
4495 CI->replaceAllUsesWith(V: CF);
4496 Rep = nullptr;
4497 } else if (Name.starts_with(Prefix: "avx512.mask.") &&
4498 upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) {
4499 // Rep will be updated by the call in the condition.
4500 }
4501
4502 return Rep;
4503}
4504
4505static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4506 Function *F, IRBuilder<> &Builder) {
4507 if (Name.starts_with(Prefix: "neon.bfcvt")) {
4508 if (Name.starts_with(Prefix: "neon.bfcvtn2")) {
4509 SmallVector<int, 32> LoMask(4);
4510 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
4511 SmallVector<int, 32> ConcatMask(8);
4512 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4513 Value *Inactive = Builder.CreateShuffleVector(V: CI->getOperand(i_nocapture: 0), Mask: LoMask);
4514 Value *Trunc =
4515 Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 1), DestTy: Inactive->getType());
4516 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
4517 } else if (Name.starts_with(Prefix: "neon.bfcvtn")) {
4518 SmallVector<int, 32> ConcatMask(8);
4519 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4520 Type *V4BF16 =
4521 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: F->getContext()), NumElts: 4);
4522 Value *Trunc = Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), DestTy: V4BF16);
4523 dbgs() << "Trunc: " << *Trunc << "\n";
4524 return Builder.CreateShuffleVector(
4525 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
4526 } else {
4527 return Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0),
4528 DestTy: Type::getBFloatTy(C&: F->getContext()));
4529 }
4530 } else if (Name.starts_with(Prefix: "sve.fcvt")) {
4531 Intrinsic::ID NewID =
4532 StringSwitch<Intrinsic::ID>(Name)
4533 .Case(S: "sve.fcvt.bf16f32", Value: Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4534 .Case(S: "sve.fcvtnt.bf16f32",
4535 Value: Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4536 .Default(Value: Intrinsic::not_intrinsic);
4537 if (NewID == Intrinsic::not_intrinsic)
4538 llvm_unreachable("Unhandled Intrinsic!");
4539
4540 SmallVector<Value *, 3> Args(CI->args());
4541
4542 // The original intrinsics incorrectly used a predicate based on the
4543 // smallest element type rather than the largest.
4544 Type *BadPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
4545 Type *GoodPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
4546
4547 if (Args[1]->getType() != BadPredTy)
4548 llvm_unreachable("Unexpected predicate type!");
4549
4550 Args[1] = Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_to_svbool,
4551 Types: BadPredTy, Args: Args[1]);
4552 Args[1] = Builder.CreateIntrinsic(
4553 ID: Intrinsic::aarch64_sve_convert_from_svbool, Types: GoodPredTy, Args: Args[1]);
4554
4555 return Builder.CreateIntrinsic(ID: NewID, Args, /*FMFSource=*/nullptr,
4556 Name: CI->getName());
4557 }
4558
4559 llvm_unreachable("Unhandled Intrinsic!");
4560}
4561
4562static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4563 IRBuilder<> &Builder) {
4564 if (Name == "mve.vctp64.old") {
4565 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4566 // correct type.
4567 Value *VCTP = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_vctp64, Types: {},
4568 Args: CI->getArgOperand(i: 0),
4569 /*FMFSource=*/nullptr, Name: CI->getName());
4570 Value *C1 = Builder.CreateIntrinsic(
4571 ID: Intrinsic::arm_mve_pred_v2i,
4572 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}, Args: VCTP);
4573 return Builder.CreateIntrinsic(
4574 ID: Intrinsic::arm_mve_pred_i2v,
4575 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: C1);
4576 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4577 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4578 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4579 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4580 Name ==
4581 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4582 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4583 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4584 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4585 Name ==
4586 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4587 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4588 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4589 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4590 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4591 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4592 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4593 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4594 std::vector<Type *> Tys;
4595 unsigned ID = CI->getIntrinsicID();
4596 Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2);
4597 switch (ID) {
4598 case Intrinsic::arm_mve_mull_int_predicated:
4599 case Intrinsic::arm_mve_vqdmull_predicated:
4600 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4601 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty};
4602 break;
4603 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4604 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4605 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4606 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4607 V2I1Ty};
4608 break;
4609 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4610 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4611 CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4612 break;
4613 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4614 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(),
4615 CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty};
4616 break;
4617 case Intrinsic::arm_cde_vcx1q_predicated:
4618 case Intrinsic::arm_cde_vcx1qa_predicated:
4619 case Intrinsic::arm_cde_vcx2q_predicated:
4620 case Intrinsic::arm_cde_vcx2qa_predicated:
4621 case Intrinsic::arm_cde_vcx3q_predicated:
4622 case Intrinsic::arm_cde_vcx3qa_predicated:
4623 Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4624 break;
4625 default:
4626 llvm_unreachable("Unhandled Intrinsic!");
4627 }
4628
4629 std::vector<Value *> Ops;
4630 for (Value *Op : CI->args()) {
4631 Type *Ty = Op->getType();
4632 if (Ty->getScalarSizeInBits() == 1) {
4633 Value *C1 = Builder.CreateIntrinsic(
4634 ID: Intrinsic::arm_mve_pred_v2i,
4635 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: Op);
4636 Op = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_pred_i2v, Types: {V2I1Ty}, Args: C1);
4637 }
4638 Ops.push_back(x: Op);
4639 }
4640
4641 return Builder.CreateIntrinsic(ID, Types: Tys, Args: Ops, /*FMFSource=*/nullptr,
4642 Name: CI->getName());
4643 }
4644 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4645}
4646
4647// These are expected to have the arguments:
4648// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4649//
4650// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4651//
4652static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4653 Function *F, IRBuilder<> &Builder) {
4654 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4655 // for compatibility.
4656 auto UpgradeLegacyWMMAIUIntrinsicCall =
4657 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4658 ArrayRef<Type *> OverloadTys) -> Value * {
4659 // Prepare arguments, append clamp=0 for compatibility
4660 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4661 Args.push_back(Elt: Builder.getFalse());
4662
4663 // Insert the declaration for the right overload types
4664 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
4665 M: F->getParent(), id: F->getIntrinsicID(), Tys: OverloadTys);
4666
4667 // Copy operand bundles if any
4668 SmallVector<OperandBundleDef, 1> Bundles;
4669 CI->getOperandBundlesAsDefs(Defs&: Bundles);
4670
4671 // Create the new call and copy calling properties
4672 auto *NewCall = cast<CallInst>(Val: Builder.CreateCall(Callee: NewDecl, Args, OpBundles: Bundles));
4673 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4674 NewCall->setCallingConv(CI->getCallingConv());
4675 NewCall->setAttributes(CI->getAttributes());
4676 NewCall->setDebugLoc(CI->getDebugLoc());
4677 NewCall->copyMetadata(SrcInst: *CI);
4678 return NewCall;
4679 };
4680
4681 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4682 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4683 "intrinsic should have 7 arguments");
4684 Type *T1 = CI->getArgOperand(i: 4)->getType();
4685 Type *T2 = CI->getArgOperand(i: 1)->getType();
4686 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4687 }
4688 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4689 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4690 "intrinsic should have 8 arguments");
4691 Type *T1 = CI->getArgOperand(i: 4)->getType();
4692 Type *T2 = CI->getArgOperand(i: 1)->getType();
4693 Type *T3 = CI->getArgOperand(i: 3)->getType();
4694 Type *T4 = CI->getArgOperand(i: 5)->getType();
4695 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4696 }
4697
4698 AtomicRMWInst::BinOp RMWOp =
4699 StringSwitch<AtomicRMWInst::BinOp>(Name)
4700 .StartsWith(S: "ds.fadd", Value: AtomicRMWInst::FAdd)
4701 .StartsWith(S: "ds.fmin", Value: AtomicRMWInst::FMin)
4702 .StartsWith(S: "ds.fmax", Value: AtomicRMWInst::FMax)
4703 .StartsWith(S: "atomic.inc.", Value: AtomicRMWInst::UIncWrap)
4704 .StartsWith(S: "atomic.dec.", Value: AtomicRMWInst::UDecWrap)
4705 .StartsWith(S: "global.atomic.fadd", Value: AtomicRMWInst::FAdd)
4706 .StartsWith(S: "flat.atomic.fadd", Value: AtomicRMWInst::FAdd)
4707 .StartsWith(S: "global.atomic.fmin", Value: AtomicRMWInst::FMin)
4708 .StartsWith(S: "flat.atomic.fmin", Value: AtomicRMWInst::FMin)
4709 .StartsWith(S: "global.atomic.fmax", Value: AtomicRMWInst::FMax)
4710 .StartsWith(S: "flat.atomic.fmax", Value: AtomicRMWInst::FMax)
4711 .StartsWith(S: "atomic.cond.sub", Value: AtomicRMWInst::USubCond)
4712 .StartsWith(S: "atomic.csub", Value: AtomicRMWInst::USubSat);
4713
4714 unsigned NumOperands = CI->getNumOperands();
4715 if (NumOperands < 3) // Malformed bitcode.
4716 return nullptr;
4717
4718 Value *Ptr = CI->getArgOperand(i: 0);
4719 PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType());
4720 if (!PtrTy) // Malformed.
4721 return nullptr;
4722
4723 Value *Val = CI->getArgOperand(i: 1);
4724 if (Val->getType() != CI->getType()) // Malformed.
4725 return nullptr;
4726
4727 ConstantInt *OrderArg = nullptr;
4728 bool IsVolatile = false;
4729
4730 // These should have 5 arguments (plus the callee). A separate version of the
4731 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4732 if (NumOperands > 3)
4733 OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4734
4735 // Ignore scope argument at 3
4736
4737 if (NumOperands > 5) {
4738 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4));
4739 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4740 }
4741
4742 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4743 if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue()))
4744 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4745 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4746 Order = AtomicOrdering::SequentiallyConsistent;
4747
4748 LLVMContext &Ctx = F->getContext();
4749
4750 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4751 Type *RetTy = CI->getType();
4752 if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) {
4753 if (VT->getElementType()->isIntegerTy(Bitwidth: 16)) {
4754 VectorType *AsBF16 =
4755 VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount());
4756 Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16);
4757 }
4758 }
4759
4760 // The scope argument never really worked correctly. Use agent as the most
4761 // conservative option which should still always produce the instruction.
4762 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent");
4763 AtomicRMWInst *RMW =
4764 Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID);
4765
4766 unsigned AddrSpace = PtrTy->getAddressSpace();
4767 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4768 MDNode *EmptyMD = MDNode::get(Context&: F->getContext(), MDs: {});
4769 RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory", Node: EmptyMD);
4770 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4771 RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: EmptyMD);
4772 }
4773
4774 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4775 MDBuilder MDB(F->getContext());
4776 MDNode *RangeNotPrivate =
4777 MDB.createRange(Lo: APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4778 Hi: APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4779 RMW->setMetadata(KindID: LLVMContext::MD_noalias_addrspace, Node: RangeNotPrivate);
4780 }
4781
4782 if (IsVolatile)
4783 RMW->setVolatile(true);
4784
4785 return Builder.CreateBitCast(V: RMW, DestTy: RetTy);
4786}
4787
4788/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4789/// plain MDNode, as it's the verifier's job to check these are the correct
4790/// types later.
4791static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4792 if (Op < CI->arg_size()) {
4793 if (MetadataAsValue *MAV =
4794 dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) {
4795 Metadata *MD = MAV->getMetadata();
4796 return dyn_cast_if_present<MDNode>(Val: MD);
4797 }
4798 }
4799 return nullptr;
4800}
4801
4802/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4803static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4804 if (Op < CI->arg_size())
4805 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op)))
4806 return MAV->getMetadata();
4807 return nullptr;
4808}
4809
4810static MDNode *getDebugLocSafe(const Instruction *I) {
4811 // The MDNode attached to this instruction might not be the correct type,
4812 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4813 return I->getDebugLoc().getAsMDNode();
4814}
4815
4816/// Convert debug intrinsic calls to non-instruction debug records.
4817/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4818/// \p CI - The debug intrinsic call.
4819static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4820 DbgRecord *DR = nullptr;
4821 if (Name == "label") {
4822 DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(Label: unwrapMAVOp(CI, Op: 0),
4823 DL: CI->getDebugLoc());
4824 } else if (Name == "assign") {
4825 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4826 Type: DbgVariableRecord::LocationType::Assign, Val: unwrapMAVMetadataOp(CI, Op: 0),
4827 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: unwrapMAVOp(CI, Op: 3),
4828 Address: unwrapMAVMetadataOp(CI, Op: 4),
4829 /*The address is a Value ref, it will be stored as a Metadata */
4830 AddressExpression: unwrapMAVOp(CI, Op: 5), DI: getDebugLocSafe(I: CI));
4831 } else if (Name == "declare") {
4832 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4833 Type: DbgVariableRecord::LocationType::Declare, Val: unwrapMAVMetadataOp(CI, Op: 0),
4834 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4835 DI: getDebugLocSafe(I: CI));
4836 } else if (Name == "addr") {
4837 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4838 MDNode *ExprNode = unwrapMAVOp(CI, Op: 2);
4839 // Don't try to add something to the expression if it's not an expression.
4840 // Instead, allow the verifier to fail later.
4841 if (DIExpression *Expr = dyn_cast<DIExpression>(Val: ExprNode)) {
4842 ExprNode = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
4843 }
4844 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4845 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4846 Variable: unwrapMAVOp(CI, Op: 1), Expression: ExprNode, AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4847 DI: getDebugLocSafe(I: CI));
4848 } else if (Name == "value") {
4849 // An old version of dbg.value had an extra offset argument.
4850 unsigned VarOp = 1;
4851 unsigned ExprOp = 2;
4852 if (CI->arg_size() == 4) {
4853 auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1));
4854 // Nonzero offset dbg.values get dropped without a replacement.
4855 if (!Offset || !Offset->isZeroValue())
4856 return;
4857 VarOp = 2;
4858 ExprOp = 3;
4859 }
4860 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4861 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4862 Variable: unwrapMAVOp(CI, Op: VarOp), Expression: unwrapMAVOp(CI, Op: ExprOp), AssignID: nullptr, Address: nullptr,
4863 AddressExpression: nullptr, DI: getDebugLocSafe(I: CI));
4864 }
4865 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4866 CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator());
4867}
4868
4869static Value *upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder) {
4870 auto *Offset = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4871 if (!Offset)
4872 reportFatalUsageError(reason: "Invalid llvm.vector.splice offset argument");
4873 int64_t OffsetVal = Offset->getSExtValue();
4874 return Builder.CreateIntrinsic(ID: OffsetVal >= 0
4875 ? Intrinsic::vector_splice_left
4876 : Intrinsic::vector_splice_right,
4877 Types: CI->getType(),
4878 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4879 Builder.getInt32(C: std::abs(i: OffsetVal))});
4880}
4881
4882static Value *upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI,
4883 Function *F, IRBuilder<> &Builder) {
4884 if (Name.starts_with(Prefix: "to.fp16")) {
4885 Value *Cast =
4886 Builder.CreateFPTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
4887 return Builder.CreateBitCast(V: Cast, DestTy: CI->getType());
4888 }
4889
4890 if (Name.starts_with(Prefix: "from.fp16")) {
4891 Value *Cast =
4892 Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Builder.getHalfTy());
4893 return Builder.CreateFPExt(V: Cast, DestTy: CI->getType());
4894 }
4895
4896 return nullptr;
4897}
4898
4899/// Upgrade a call to an old intrinsic. All argument and return casting must be
4900/// provided to seamlessly integrate with existing context.
4901void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4902 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4903 // checks the callee's function type matches. It's likely we need to handle
4904 // type changes here.
4905 Function *F = dyn_cast<Function>(Val: CI->getCalledOperand());
4906 if (!F)
4907 return;
4908
4909 LLVMContext &C = CI->getContext();
4910 IRBuilder<> Builder(C);
4911 if (isa<FPMathOperator>(Val: CI))
4912 Builder.setFastMathFlags(CI->getFastMathFlags());
4913 Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator());
4914
4915 if (!NewFn) {
4916 // Get the Function's name.
4917 StringRef Name = F->getName();
4918 if (!Name.consume_front(Prefix: "llvm."))
4919 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4920
4921 bool IsX86 = Name.consume_front(Prefix: "x86.");
4922 bool IsNVVM = Name.consume_front(Prefix: "nvvm.");
4923 bool IsAArch64 = Name.consume_front(Prefix: "aarch64.");
4924 bool IsARM = Name.consume_front(Prefix: "arm.");
4925 bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn.");
4926 bool IsDbg = Name.consume_front(Prefix: "dbg.");
4927 bool IsOldSplice =
4928 (Name.consume_front(Prefix: "experimental.vector.splice") ||
4929 Name.consume_front(Prefix: "vector.splice")) &&
4930 !(Name.starts_with(Prefix: ".left") || Name.starts_with(Prefix: ".right"));
4931 Value *Rep = nullptr;
4932
4933 if (!IsX86 && Name == "stackprotectorcheck") {
4934 Rep = nullptr;
4935 } else if (IsNVVM) {
4936 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4937 } else if (IsX86) {
4938 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4939 } else if (IsAArch64) {
4940 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4941 } else if (IsARM) {
4942 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4943 } else if (IsAMDGCN) {
4944 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4945 } else if (IsDbg) {
4946 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4947 } else if (IsOldSplice) {
4948 Rep = upgradeVectorSplice(CI, Builder);
4949 } else if (Name.consume_front(Prefix: "convert.")) {
4950 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
4951 } else {
4952 llvm_unreachable("Unknown function for CallBase upgrade.");
4953 }
4954
4955 if (Rep)
4956 CI->replaceAllUsesWith(V: Rep);
4957 CI->eraseFromParent();
4958 return;
4959 }
4960
4961 const auto &DefaultCase = [&]() -> void {
4962 if (F == NewFn)
4963 return;
4964
4965 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4966 // Handle generic mangling change.
4967 assert(
4968 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4969 "Unknown function for CallBase upgrade and isn't just a name change");
4970 CI->setCalledFunction(NewFn);
4971 return;
4972 }
4973
4974 // This must be an upgrade from a named to a literal struct.
4975 if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) {
4976 assert(OldST != NewFn->getReturnType() &&
4977 "Return type must have changed");
4978 assert(OldST->getNumElements() ==
4979 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4980 "Must have same number of elements");
4981
4982 SmallVector<Value *> Args(CI->args());
4983 CallInst *NewCI = Builder.CreateCall(Callee: NewFn, Args);
4984 NewCI->setAttributes(CI->getAttributes());
4985 Value *Res = PoisonValue::get(T: OldST);
4986 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4987 Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx);
4988 Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx);
4989 }
4990 CI->replaceAllUsesWith(V: Res);
4991 CI->eraseFromParent();
4992 return;
4993 }
4994
4995 // We're probably about to produce something invalid. Let the verifier catch
4996 // it instead of dying here.
4997 CI->setCalledOperand(
4998 ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType()));
4999 return;
5000 };
5001 CallInst *NewCall = nullptr;
5002 switch (NewFn->getIntrinsicID()) {
5003 default: {
5004 DefaultCase();
5005 return;
5006 }
5007 case Intrinsic::arm_neon_vst1:
5008 case Intrinsic::arm_neon_vst2:
5009 case Intrinsic::arm_neon_vst3:
5010 case Intrinsic::arm_neon_vst4:
5011 case Intrinsic::arm_neon_vst2lane:
5012 case Intrinsic::arm_neon_vst3lane:
5013 case Intrinsic::arm_neon_vst4lane: {
5014 SmallVector<Value *, 4> Args(CI->args());
5015 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5016 break;
5017 }
5018 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5019 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5020 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5021 LLVMContext &Ctx = F->getParent()->getContext();
5022 SmallVector<Value *, 4> Args(CI->args());
5023 Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx),
5024 V: cast<ConstantInt>(Val: Args[3])->getZExtValue());
5025 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5026 break;
5027 }
5028 case Intrinsic::aarch64_sve_ld3_sret:
5029 case Intrinsic::aarch64_sve_ld4_sret:
5030 case Intrinsic::aarch64_sve_ld2_sret: {
5031 StringRef Name = F->getName();
5032 Name = Name.substr(Start: 5);
5033 unsigned N = StringSwitch<unsigned>(Name)
5034 .StartsWith(S: "aarch64.sve.ld2", Value: 2)
5035 .StartsWith(S: "aarch64.sve.ld3", Value: 3)
5036 .StartsWith(S: "aarch64.sve.ld4", Value: 4)
5037 .Default(Value: 0);
5038 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5039 unsigned MinElts = RetTy->getMinNumElements() / N;
5040 SmallVector<Value *, 2> Args(CI->args());
5041 Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args);
5042 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5043 for (unsigned I = 0; I < N; I++) {
5044 Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I);
5045 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx: I * MinElts);
5046 }
5047 NewCall = dyn_cast<CallInst>(Val: Ret);
5048 break;
5049 }
5050
5051 case Intrinsic::coro_end: {
5052 SmallVector<Value *, 3> Args(CI->args());
5053 Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext()));
5054 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5055 break;
5056 }
5057
5058 case Intrinsic::vector_extract: {
5059 StringRef Name = F->getName();
5060 Name = Name.substr(Start: 5); // Strip llvm
5061 if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get")) {
5062 DefaultCase();
5063 return;
5064 }
5065 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5066 unsigned MinElts = RetTy->getMinNumElements();
5067 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5068 Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
5069 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx});
5070 break;
5071 }
5072
5073 case Intrinsic::vector_insert: {
5074 StringRef Name = F->getName();
5075 Name = Name.substr(Start: 5);
5076 if (!Name.starts_with(Prefix: "aarch64.sve.tuple")) {
5077 DefaultCase();
5078 return;
5079 }
5080 if (Name.starts_with(Prefix: "aarch64.sve.tuple.set")) {
5081 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
5082 auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType());
5083 Value *NewIdx =
5084 ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements());
5085 NewCall = Builder.CreateCall(
5086 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx});
5087 break;
5088 }
5089 if (Name.starts_with(Prefix: "aarch64.sve.tuple.create")) {
5090 unsigned N = StringSwitch<unsigned>(Name)
5091 .StartsWith(S: "aarch64.sve.tuple.create2", Value: 2)
5092 .StartsWith(S: "aarch64.sve.tuple.create3", Value: 3)
5093 .StartsWith(S: "aarch64.sve.tuple.create4", Value: 4)
5094 .Default(Value: 0);
5095 assert(N > 1 && "Create is expected to be between 2-4");
5096 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
5097 Value *Ret = llvm::PoisonValue::get(T: RetTy);
5098 unsigned MinElts = RetTy->getMinNumElements() / N;
5099 for (unsigned I = 0; I < N; I++) {
5100 Value *V = CI->getArgOperand(i: I);
5101 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx: I * MinElts);
5102 }
5103 NewCall = dyn_cast<CallInst>(Val: Ret);
5104 }
5105 break;
5106 }
5107
5108 case Intrinsic::arm_neon_bfdot:
5109 case Intrinsic::arm_neon_bfmmla:
5110 case Intrinsic::arm_neon_bfmlalb:
5111 case Intrinsic::arm_neon_bfmlalt:
5112 case Intrinsic::aarch64_neon_bfdot:
5113 case Intrinsic::aarch64_neon_bfmmla:
5114 case Intrinsic::aarch64_neon_bfmlalb:
5115 case Intrinsic::aarch64_neon_bfmlalt: {
5116 SmallVector<Value *, 3> Args;
5117 assert(CI->arg_size() == 3 &&
5118 "Mismatch between function args and call args");
5119 size_t OperandWidth =
5120 CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits();
5121 assert((OperandWidth == 64 || OperandWidth == 128) &&
5122 "Unexpected operand width");
5123 Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16);
5124 auto Iter = CI->args().begin();
5125 Args.push_back(Elt: *Iter++);
5126 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5127 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
5128 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5129 break;
5130 }
5131
5132 case Intrinsic::bitreverse:
5133 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5134 break;
5135
5136 case Intrinsic::ctlz:
5137 case Intrinsic::cttz: {
5138 if (CI->arg_size() != 1) {
5139 DefaultCase();
5140 return;
5141 }
5142
5143 NewCall =
5144 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()});
5145 break;
5146 }
5147
5148 case Intrinsic::objectsize: {
5149 Value *NullIsUnknownSize =
5150 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2);
5151 Value *Dynamic =
5152 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3);
5153 NewCall = Builder.CreateCall(
5154 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic});
5155 break;
5156 }
5157
5158 case Intrinsic::ctpop:
5159 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
5160 break;
5161 case Intrinsic::dbg_value: {
5162 StringRef Name = F->getName();
5163 Name = Name.substr(Start: 5); // Strip llvm.
5164 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5165 if (Name.starts_with(Prefix: "dbg.addr")) {
5166 DIExpression *Expr = cast<DIExpression>(
5167 Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata());
5168 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
5169 NewCall =
5170 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5171 MetadataAsValue::get(Context&: C, MD: Expr)});
5172 break;
5173 }
5174
5175 // Upgrade from the old version that had an extra offset argument.
5176 assert(CI->arg_size() == 4);
5177 // Drop nonzero offsets instead of attempting to upgrade them.
5178 if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)))
5179 if (Offset->isZeroValue()) {
5180 NewCall = Builder.CreateCall(
5181 Callee: NewFn,
5182 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)});
5183 break;
5184 }
5185 CI->eraseFromParent();
5186 return;
5187 }
5188
5189 case Intrinsic::ptr_annotation:
5190 // Upgrade from versions that lacked the annotation attribute argument.
5191 if (CI->arg_size() != 4) {
5192 DefaultCase();
5193 return;
5194 }
5195
5196 // Create a new call with an added null annotation attribute argument.
5197 NewCall = Builder.CreateCall(
5198 Callee: NewFn,
5199 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5200 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5201 NewCall->takeName(V: CI);
5202 CI->replaceAllUsesWith(V: NewCall);
5203 CI->eraseFromParent();
5204 return;
5205
5206 case Intrinsic::var_annotation:
5207 // Upgrade from versions that lacked the annotation attribute argument.
5208 if (CI->arg_size() != 4) {
5209 DefaultCase();
5210 return;
5211 }
5212 // Create a new call with an added null annotation attribute argument.
5213 NewCall = Builder.CreateCall(
5214 Callee: NewFn,
5215 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
5216 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
5217 NewCall->takeName(V: CI);
5218 CI->replaceAllUsesWith(V: NewCall);
5219 CI->eraseFromParent();
5220 return;
5221
5222 case Intrinsic::riscv_aes32dsi:
5223 case Intrinsic::riscv_aes32dsmi:
5224 case Intrinsic::riscv_aes32esi:
5225 case Intrinsic::riscv_aes32esmi:
5226 case Intrinsic::riscv_sm4ks:
5227 case Intrinsic::riscv_sm4ed: {
5228 // The last argument to these intrinsics used to be i8 and changed to i32.
5229 // The type overload for sm4ks and sm4ed was removed.
5230 Value *Arg2 = CI->getArgOperand(i: 2);
5231 if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64))
5232 return;
5233
5234 Value *Arg0 = CI->getArgOperand(i: 0);
5235 Value *Arg1 = CI->getArgOperand(i: 1);
5236 if (CI->getType()->isIntegerTy(Bitwidth: 64)) {
5237 Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty());
5238 Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty());
5239 }
5240
5241 Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C),
5242 V: cast<ConstantInt>(Val: Arg2)->getZExtValue());
5243
5244 NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2});
5245 Value *Res = NewCall;
5246 if (Res->getType() != CI->getType())
5247 Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5248 NewCall->takeName(V: CI);
5249 CI->replaceAllUsesWith(V: Res);
5250 CI->eraseFromParent();
5251 return;
5252 }
5253 case Intrinsic::nvvm_mapa_shared_cluster: {
5254 // Create a new call with the correct address space.
5255 NewCall =
5256 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
5257 Value *Res = NewCall;
5258 Res = Builder.CreateAddrSpaceCast(
5259 V: Res, DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED));
5260 NewCall->takeName(V: CI);
5261 CI->replaceAllUsesWith(V: Res);
5262 CI->eraseFromParent();
5263 return;
5264 }
5265 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5266 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5267 // Create a new call with the correct address space.
5268 SmallVector<Value *, 4> Args(CI->args());
5269 Args[0] = Builder.CreateAddrSpaceCast(
5270 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5271
5272 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5273 NewCall->takeName(V: CI);
5274 CI->replaceAllUsesWith(V: NewCall);
5275 CI->eraseFromParent();
5276 return;
5277 }
5278 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5279 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5280 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5281 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5282 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5283 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5284 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5285 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5286 SmallVector<Value *, 16> Args(CI->args());
5287
5288 // Create AddrSpaceCast to shared_cluster if needed.
5289 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5290 unsigned AS = CI->getArgOperand(i: 0)->getType()->getPointerAddressSpace();
5291 if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
5292 Args[0] = Builder.CreateAddrSpaceCast(
5293 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5294
5295 // Attach the flag argument for cta_group, with a
5296 // default value of 0. This handles case (2) in
5297 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5298 size_t NumArgs = CI->arg_size();
5299 Value *FlagArg = CI->getArgOperand(i: NumArgs - 3);
5300 if (!FlagArg->getType()->isIntegerTy(Bitwidth: 1))
5301 Args.push_back(Elt: ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0));
5302
5303 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5304 NewCall->takeName(V: CI);
5305 CI->replaceAllUsesWith(V: NewCall);
5306 CI->eraseFromParent();
5307 return;
5308 }
5309 case Intrinsic::riscv_sha256sig0:
5310 case Intrinsic::riscv_sha256sig1:
5311 case Intrinsic::riscv_sha256sum0:
5312 case Intrinsic::riscv_sha256sum1:
5313 case Intrinsic::riscv_sm3p0:
5314 case Intrinsic::riscv_sm3p1: {
5315 // The last argument to these intrinsics used to be i8 and changed to i32.
5316 // The type overload for sm4ks and sm4ed was removed.
5317 if (!CI->getType()->isIntegerTy(Bitwidth: 64))
5318 return;
5319
5320 Value *Arg =
5321 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty());
5322
5323 NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg);
5324 Value *Res =
5325 Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
5326 NewCall->takeName(V: CI);
5327 CI->replaceAllUsesWith(V: Res);
5328 CI->eraseFromParent();
5329 return;
5330 }
5331
5332 case Intrinsic::x86_xop_vfrcz_ss:
5333 case Intrinsic::x86_xop_vfrcz_sd:
5334 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)});
5335 break;
5336
5337 case Intrinsic::x86_xop_vpermil2pd:
5338 case Intrinsic::x86_xop_vpermil2ps:
5339 case Intrinsic::x86_xop_vpermil2pd_256:
5340 case Intrinsic::x86_xop_vpermil2ps_256: {
5341 SmallVector<Value *, 4> Args(CI->args());
5342 VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType());
5343 VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy);
5344 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy);
5345 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5346 break;
5347 }
5348
5349 case Intrinsic::x86_sse41_ptestc:
5350 case Intrinsic::x86_sse41_ptestz:
5351 case Intrinsic::x86_sse41_ptestnzc: {
5352 // The arguments for these intrinsics used to be v4f32, and changed
5353 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5354 // So, the only thing required is a bitcast for both arguments.
5355 // First, check the arguments have the old type.
5356 Value *Arg0 = CI->getArgOperand(i: 0);
5357 if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4))
5358 return;
5359
5360 // Old intrinsic, add bitcasts
5361 Value *Arg1 = CI->getArgOperand(i: 1);
5362
5363 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
5364
5365 Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast");
5366 Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
5367
5368 NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1});
5369 break;
5370 }
5371
5372 case Intrinsic::x86_rdtscp: {
5373 // This used to take 1 arguments. If we have no arguments, it is already
5374 // upgraded.
5375 if (CI->getNumOperands() == 0)
5376 return;
5377
5378 NewCall = Builder.CreateCall(Callee: NewFn);
5379 // Extract the second result and store it.
5380 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
5381 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
5382 // Replace the original call result with the first result of the new call.
5383 Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
5384
5385 NewCall->takeName(V: CI);
5386 CI->replaceAllUsesWith(V: TSC);
5387 CI->eraseFromParent();
5388 return;
5389 }
5390
5391 case Intrinsic::x86_sse41_insertps:
5392 case Intrinsic::x86_sse41_dppd:
5393 case Intrinsic::x86_sse41_dpps:
5394 case Intrinsic::x86_sse41_mpsadbw:
5395 case Intrinsic::x86_avx_dp_ps_256:
5396 case Intrinsic::x86_avx2_mpsadbw: {
5397 // Need to truncate the last argument from i32 to i8 -- this argument models
5398 // an inherently 8-bit immediate operand to these x86 instructions.
5399 SmallVector<Value *, 4> Args(CI->args());
5400
5401 // Replace the last argument with a trunc.
5402 Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc");
5403 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5404 break;
5405 }
5406
5407 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5408 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5409 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5410 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5411 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5412 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5413 SmallVector<Value *, 4> Args(CI->args());
5414 unsigned NumElts =
5415 cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements();
5416 Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts);
5417
5418 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5419 Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr);
5420
5421 NewCall->takeName(V: CI);
5422 CI->replaceAllUsesWith(V: Res);
5423 CI->eraseFromParent();
5424 return;
5425 }
5426
5427 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5428 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5429 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5430 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5431 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5432 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5433 SmallVector<Value *, 4> Args(CI->args());
5434 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
5435 if (NewFn->getIntrinsicID() ==
5436 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5437 Args[1] = Builder.CreateBitCast(
5438 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5439
5440 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5441 Value *Res = Builder.CreateBitCast(
5442 V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts));
5443
5444 NewCall->takeName(V: CI);
5445 CI->replaceAllUsesWith(V: Res);
5446 CI->eraseFromParent();
5447 return;
5448 }
5449 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5450 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5451 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5452 SmallVector<Value *, 4> Args(CI->args());
5453 unsigned NumElts =
5454 cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2;
5455 Args[1] = Builder.CreateBitCast(
5456 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5457 Args[2] = Builder.CreateBitCast(
5458 V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5459
5460 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5461 break;
5462 }
5463
5464 case Intrinsic::thread_pointer: {
5465 NewCall = Builder.CreateCall(Callee: NewFn, Args: {});
5466 break;
5467 }
5468
5469 case Intrinsic::memcpy:
5470 case Intrinsic::memmove:
5471 case Intrinsic::memset: {
5472 // We have to make sure that the call signature is what we're expecting.
5473 // We only want to change the old signatures by removing the alignment arg:
5474 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5475 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5476 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5477 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5478 // Note: i8*'s in the above can be any pointer type
5479 if (CI->arg_size() != 5) {
5480 DefaultCase();
5481 return;
5482 }
5483 // Remove alignment argument (3), and add alignment attributes to the
5484 // dest/src pointers.
5485 Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5486 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
5487 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5488 AttributeList OldAttrs = CI->getAttributes();
5489 AttributeList NewAttrs = AttributeList::get(
5490 C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(),
5491 ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1),
5492 OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)});
5493 NewCall->setAttributes(NewAttrs);
5494 auto *MemCI = cast<MemIntrinsic>(Val: NewCall);
5495 // All mem intrinsics support dest alignment.
5496 const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
5497 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5498 // Memcpy/Memmove also support source alignment.
5499 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI))
5500 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5501 break;
5502 }
5503
5504 case Intrinsic::masked_load:
5505 case Intrinsic::masked_gather:
5506 case Intrinsic::masked_store:
5507 case Intrinsic::masked_scatter: {
5508 if (CI->arg_size() != 4) {
5509 DefaultCase();
5510 return;
5511 }
5512
5513 auto GetMaybeAlign = [](Value *Op) {
5514 if (auto *CI = dyn_cast<ConstantInt>(Val: Op)) {
5515 uint64_t Val = CI->getZExtValue();
5516 if (Val == 0)
5517 return MaybeAlign();
5518 if (isPowerOf2_64(Value: Val))
5519 return MaybeAlign(Val);
5520 }
5521 reportFatalUsageError(reason: "Invalid alignment argument");
5522 };
5523 auto GetAlign = [&](Value *Op) {
5524 MaybeAlign Align = GetMaybeAlign(Op);
5525 if (Align)
5526 return *Align;
5527 reportFatalUsageError(reason: "Invalid zero alignment argument");
5528 };
5529
5530 const DataLayout &DL = CI->getDataLayout();
5531 switch (NewFn->getIntrinsicID()) {
5532 case Intrinsic::masked_load:
5533 NewCall = Builder.CreateMaskedLoad(
5534 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0), Alignment: GetAlign(CI->getArgOperand(i: 1)),
5535 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5536 break;
5537 case Intrinsic::masked_gather:
5538 NewCall = Builder.CreateMaskedGather(
5539 Ty: CI->getType(), Ptrs: CI->getArgOperand(i: 0),
5540 Alignment: DL.getValueOrABITypeAlignment(Alignment: GetMaybeAlign(CI->getArgOperand(i: 1)),
5541 Ty: CI->getType()->getScalarType()),
5542 Mask: CI->getArgOperand(i: 2), PassThru: CI->getArgOperand(i: 3));
5543 break;
5544 case Intrinsic::masked_store:
5545 NewCall = Builder.CreateMaskedStore(
5546 Val: CI->getArgOperand(i: 0), Ptr: CI->getArgOperand(i: 1),
5547 Alignment: GetAlign(CI->getArgOperand(i: 2)), Mask: CI->getArgOperand(i: 3));
5548 break;
5549 case Intrinsic::masked_scatter:
5550 NewCall = Builder.CreateMaskedScatter(
5551 Val: CI->getArgOperand(i: 0), Ptrs: CI->getArgOperand(i: 1),
5552 Alignment: DL.getValueOrABITypeAlignment(
5553 Alignment: GetMaybeAlign(CI->getArgOperand(i: 2)),
5554 Ty: CI->getArgOperand(i: 0)->getType()->getScalarType()),
5555 Mask: CI->getArgOperand(i: 3));
5556 break;
5557 default:
5558 llvm_unreachable("Unexpected intrinsic ID");
5559 }
5560 // Previous metadata is still valid.
5561 NewCall->copyMetadata(SrcInst: *CI);
5562 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
5563 break;
5564 }
5565
5566 case Intrinsic::lifetime_start:
5567 case Intrinsic::lifetime_end: {
5568 if (CI->arg_size() != 2) {
5569 DefaultCase();
5570 return;
5571 }
5572
5573 Value *Ptr = CI->getArgOperand(i: 1);
5574 // Try to strip pointer casts, such that the lifetime works on an alloca.
5575 Ptr = Ptr->stripPointerCasts();
5576 if (isa<AllocaInst>(Val: Ptr)) {
5577 // Don't use NewFn, as we might have looked through an addrspacecast.
5578 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5579 NewCall = Builder.CreateLifetimeStart(Ptr);
5580 else
5581 NewCall = Builder.CreateLifetimeEnd(Ptr);
5582 break;
5583 }
5584
5585 // Otherwise remove the lifetime marker.
5586 CI->eraseFromParent();
5587 return;
5588 }
5589
5590 case Intrinsic::x86_avx512_vpdpbusd_128:
5591 case Intrinsic::x86_avx512_vpdpbusd_256:
5592 case Intrinsic::x86_avx512_vpdpbusd_512:
5593 case Intrinsic::x86_avx512_vpdpbusds_128:
5594 case Intrinsic::x86_avx512_vpdpbusds_256:
5595 case Intrinsic::x86_avx512_vpdpbusds_512:
5596 case Intrinsic::x86_avx2_vpdpbssd_128:
5597 case Intrinsic::x86_avx2_vpdpbssd_256:
5598 case Intrinsic::x86_avx10_vpdpbssd_512:
5599 case Intrinsic::x86_avx2_vpdpbssds_128:
5600 case Intrinsic::x86_avx2_vpdpbssds_256:
5601 case Intrinsic::x86_avx10_vpdpbssds_512:
5602 case Intrinsic::x86_avx2_vpdpbsud_128:
5603 case Intrinsic::x86_avx2_vpdpbsud_256:
5604 case Intrinsic::x86_avx10_vpdpbsud_512:
5605 case Intrinsic::x86_avx2_vpdpbsuds_128:
5606 case Intrinsic::x86_avx2_vpdpbsuds_256:
5607 case Intrinsic::x86_avx10_vpdpbsuds_512:
5608 case Intrinsic::x86_avx2_vpdpbuud_128:
5609 case Intrinsic::x86_avx2_vpdpbuud_256:
5610 case Intrinsic::x86_avx10_vpdpbuud_512:
5611 case Intrinsic::x86_avx2_vpdpbuuds_128:
5612 case Intrinsic::x86_avx2_vpdpbuuds_256:
5613 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5614 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5615 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5616 CI->getArgOperand(i: 2)};
5617 Type *NewArgType = VectorType::get(ElementType: Builder.getInt8Ty(), NumElements: NumElts, Scalable: false);
5618 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5619 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5620
5621 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5622 break;
5623 }
5624 case Intrinsic::x86_avx512_vpdpwssd_128:
5625 case Intrinsic::x86_avx512_vpdpwssd_256:
5626 case Intrinsic::x86_avx512_vpdpwssd_512:
5627 case Intrinsic::x86_avx512_vpdpwssds_128:
5628 case Intrinsic::x86_avx512_vpdpwssds_256:
5629 case Intrinsic::x86_avx512_vpdpwssds_512:
5630 case Intrinsic::x86_avx2_vpdpwsud_128:
5631 case Intrinsic::x86_avx2_vpdpwsud_256:
5632 case Intrinsic::x86_avx10_vpdpwsud_512:
5633 case Intrinsic::x86_avx2_vpdpwsuds_128:
5634 case Intrinsic::x86_avx2_vpdpwsuds_256:
5635 case Intrinsic::x86_avx10_vpdpwsuds_512:
5636 case Intrinsic::x86_avx2_vpdpwusd_128:
5637 case Intrinsic::x86_avx2_vpdpwusd_256:
5638 case Intrinsic::x86_avx10_vpdpwusd_512:
5639 case Intrinsic::x86_avx2_vpdpwusds_128:
5640 case Intrinsic::x86_avx2_vpdpwusds_256:
5641 case Intrinsic::x86_avx10_vpdpwusds_512:
5642 case Intrinsic::x86_avx2_vpdpwuud_128:
5643 case Intrinsic::x86_avx2_vpdpwuud_256:
5644 case Intrinsic::x86_avx10_vpdpwuud_512:
5645 case Intrinsic::x86_avx2_vpdpwuuds_128:
5646 case Intrinsic::x86_avx2_vpdpwuuds_256:
5647 case Intrinsic::x86_avx10_vpdpwuuds_512:
5648 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5649 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5650 CI->getArgOperand(i: 2)};
5651 Type *NewArgType = VectorType::get(ElementType: Builder.getInt16Ty(), NumElements: NumElts, Scalable: false);
5652 Args[1] = Builder.CreateBitCast(V: Args[1], DestTy: NewArgType);
5653 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: NewArgType);
5654
5655 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5656 break;
5657 }
5658 assert(NewCall && "Should have either set this variable or returned through "
5659 "the default case");
5660 NewCall->takeName(V: CI);
5661 CI->replaceAllUsesWith(V: NewCall);
5662 CI->eraseFromParent();
5663}
5664
5665void llvm::UpgradeCallsToIntrinsic(Function *F) {
5666 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5667
5668 // Check if this function should be upgraded and get the replacement function
5669 // if there is one.
5670 Function *NewFn;
5671 if (UpgradeIntrinsicFunction(F, NewFn)) {
5672 // Replace all users of the old function with the new function or new
5673 // instructions. This is not a range loop because the call is deleted.
5674 for (User *U : make_early_inc_range(Range: F->users()))
5675 if (CallBase *CB = dyn_cast<CallBase>(Val: U))
5676 UpgradeIntrinsicCall(CI: CB, NewFn);
5677
5678 // Remove old function, no longer used, from the module.
5679 if (F != NewFn)
5680 F->eraseFromParent();
5681 }
5682}
5683
5684MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5685 const unsigned NumOperands = MD.getNumOperands();
5686 if (NumOperands == 0)
5687 return &MD; // Invalid, punt to a verifier error.
5688
5689 // Check if the tag uses struct-path aware TBAA format.
5690 if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3)
5691 return &MD;
5692
5693 auto &Context = MD.getContext();
5694 if (NumOperands == 3) {
5695 Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)};
5696 MDNode *ScalarType = MDNode::get(Context, MDs: Elts);
5697 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5698 Metadata *Elts2[] = {ScalarType, ScalarType,
5699 ConstantAsMetadata::get(
5700 C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))),
5701 MD.getOperand(I: 2)};
5702 return MDNode::get(Context, MDs: Elts2);
5703 }
5704 // Create a MDNode <MD, MD, offset 0>
5705 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue(
5706 Ty: Type::getInt64Ty(C&: Context)))};
5707 return MDNode::get(Context, MDs: Elts);
5708}
5709
5710Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5711 Instruction *&Temp) {
5712 if (Opc != Instruction::BitCast)
5713 return nullptr;
5714
5715 Temp = nullptr;
5716 Type *SrcTy = V->getType();
5717 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5718 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5719 LLVMContext &Context = V->getContext();
5720
5721 // We have no information about target data layout, so we assume that
5722 // the maximum pointer size is 64bit.
5723 Type *MidTy = Type::getInt64Ty(C&: Context);
5724 Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy);
5725
5726 return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy);
5727 }
5728
5729 return nullptr;
5730}
5731
5732Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5733 if (Opc != Instruction::BitCast)
5734 return nullptr;
5735
5736 Type *SrcTy = C->getType();
5737 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5738 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5739 LLVMContext &Context = C->getContext();
5740
5741 // We have no information about target data layout, so we assume that
5742 // the maximum pointer size is 64bit.
5743 Type *MidTy = Type::getInt64Ty(C&: Context);
5744
5745 return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy),
5746 Ty: DestTy);
5747 }
5748
5749 return nullptr;
5750}
5751
5752/// Check the debug info version number, if it is out-dated, drop the debug
5753/// info. Return true if module is modified.
5754bool llvm::UpgradeDebugInfo(Module &M) {
5755 if (DisableAutoUpgradeDebugInfo)
5756 return false;
5757
5758 llvm::TimeTraceScope timeScope("Upgrade debug info");
5759 // We need to get metadata before the module is verified (i.e., getModuleFlag
5760 // makes assumptions that we haven't verified yet). Carefully extract the flag
5761 // from the metadata.
5762 unsigned Version = 0;
5763 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5764 auto OpIt = find_if(Range: ModFlags->operands(), P: [](const MDNode *Flag) {
5765 if (Flag->getNumOperands() < 3)
5766 return false;
5767 if (MDString *K = dyn_cast_or_null<MDString>(Val: Flag->getOperand(I: 1)))
5768 return K->getString() == "Debug Info Version";
5769 return false;
5770 });
5771 if (OpIt != ModFlags->op_end()) {
5772 const MDOperand &ValOp = (*OpIt)->getOperand(I: 2);
5773 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD: ValOp))
5774 Version = CI->getZExtValue();
5775 }
5776 }
5777
5778 if (Version == DEBUG_METADATA_VERSION) {
5779 bool BrokenDebugInfo = false;
5780 if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo))
5781 report_fatal_error(reason: "Broken module found, compilation aborted!");
5782 if (!BrokenDebugInfo)
5783 // Everything is ok.
5784 return false;
5785 else {
5786 // Diagnose malformed debug info.
5787 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5788 M.getContext().diagnose(DI: Diag);
5789 }
5790 }
5791 bool Modified = StripDebugInfo(M);
5792 if (Modified && Version != DEBUG_METADATA_VERSION) {
5793 // Diagnose a version mismatch.
5794 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5795 M.getContext().diagnose(DI: DiagVersion);
5796 }
5797 return Modified;
5798}
5799
5800static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5801 GlobalValue *GV, const Metadata *V) {
5802 Function *F = cast<Function>(Val: GV);
5803
5804 constexpr StringLiteral DefaultValue = "1";
5805 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5806 unsigned Length = 0;
5807
5808 if (F->hasFnAttribute(Kind: Attr)) {
5809 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5810 // parse these elements placing them into Vect3
5811 StringRef S = F->getFnAttribute(Kind: Attr).getValueAsString();
5812 for (; Length < 3 && !S.empty(); Length++) {
5813 auto [Part, Rest] = S.split(Separator: ',');
5814 Vect3[Length] = Part.trim();
5815 S = Rest;
5816 }
5817 }
5818
5819 const unsigned Dim = DimC - 'x';
5820 assert(Dim < 3 && "Unexpected dim char");
5821
5822 const uint64_t VInt = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5823
5824 // local variable required for StringRef in Vect3 to point to.
5825 const std::string VStr = llvm::utostr(X: VInt);
5826 Vect3[Dim] = VStr;
5827 Length = std::max(a: Length, b: Dim + 1);
5828
5829 const std::string NewAttr = llvm::join(R: ArrayRef(Vect3, Length), Separator: ",");
5830 F->addFnAttr(Kind: Attr, Val: NewAttr);
5831}
5832
5833static inline bool isXYZ(StringRef S) {
5834 return S == "x" || S == "y" || S == "z";
5835}
5836
5837bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5838 const Metadata *V) {
5839 if (K == "kernel") {
5840 if (!mdconst::extract<ConstantInt>(MD&: V)->isZero())
5841 cast<Function>(Val: GV)->setCallingConv(CallingConv::PTX_Kernel);
5842 return true;
5843 }
5844 if (K == "align") {
5845 // V is a bitfeild specifying two 16-bit values. The alignment value is
5846 // specfied in low 16-bits, The index is specified in the high bits. For the
5847 // index, 0 indicates the return value while higher values correspond to
5848 // each parameter (idx = param + 1).
5849 const uint64_t AlignIdxValuePair =
5850 mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5851 const unsigned Idx = (AlignIdxValuePair >> 16);
5852 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5853 cast<Function>(Val: GV)->addAttributeAtIndex(
5854 i: Idx, Attr: Attribute::getWithStackAlignment(Context&: GV->getContext(), Alignment: StackAlign));
5855 return true;
5856 }
5857 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5858 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5859 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxclusterrank", Val: llvm::utostr(X: CV));
5860 return true;
5861 }
5862 if (K == "minctasm") {
5863 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5864 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: CV));
5865 return true;
5866 }
5867 if (K == "maxnreg") {
5868 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5869 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxnreg", Val: llvm::utostr(X: CV));
5870 return true;
5871 }
5872 if (K.consume_front(Prefix: "maxntid") && isXYZ(S: K)) {
5873 upgradeNVVMFnVectorAttr(Attr: "nvvm.maxntid", DimC: K[0], GV, V);
5874 return true;
5875 }
5876 if (K.consume_front(Prefix: "reqntid") && isXYZ(S: K)) {
5877 upgradeNVVMFnVectorAttr(Attr: "nvvm.reqntid", DimC: K[0], GV, V);
5878 return true;
5879 }
5880 if (K.consume_front(Prefix: "cluster_dim_") && isXYZ(S: K)) {
5881 upgradeNVVMFnVectorAttr(Attr: "nvvm.cluster_dim", DimC: K[0], GV, V);
5882 return true;
5883 }
5884 if (K == "grid_constant") {
5885 const auto Attr = Attribute::get(Context&: GV->getContext(), Kind: "nvvm.grid_constant");
5886 for (const auto &Op : cast<MDNode>(Val: V)->operands()) {
5887 // For some reason, the index is 1-based in the metadata. Good thing we're
5888 // able to auto-upgrade it!
5889 const auto Index = mdconst::extract<ConstantInt>(MD: Op)->getZExtValue() - 1;
5890 cast<Function>(Val: GV)->addParamAttr(ArgNo: Index, Attr);
5891 }
5892 return true;
5893 }
5894
5895 return false;
5896}
5897
5898void llvm::UpgradeNVVMAnnotations(Module &M) {
5899 NamedMDNode *NamedMD = M.getNamedMetadata(Name: "nvvm.annotations");
5900 if (!NamedMD)
5901 return;
5902
5903 SmallVector<MDNode *, 8> NewNodes;
5904 SmallPtrSet<const MDNode *, 8> SeenNodes;
5905 for (MDNode *MD : NamedMD->operands()) {
5906 if (!SeenNodes.insert(Ptr: MD).second)
5907 continue;
5908
5909 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD: MD->getOperand(I: 0));
5910 if (!GV)
5911 continue;
5912
5913 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5914
5915 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(I: 0)};
5916 // Each nvvm.annotations metadata entry will be of the following form:
5917 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5918 // start index = 1, to skip the global variable key
5919 // increment = 2, to skip the value for each property-value pairs
5920 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5921 MDString *K = cast<MDString>(Val: MD->getOperand(I: j));
5922 const MDOperand &V = MD->getOperand(I: j + 1);
5923 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K: K->getString(), V);
5924 if (!Upgraded)
5925 NewOperands.append(IL: {K, V});
5926 }
5927
5928 if (NewOperands.size() > 1)
5929 NewNodes.push_back(Elt: MDNode::get(Context&: M.getContext(), MDs: NewOperands));
5930 }
5931
5932 NamedMD->clearOperands();
5933 for (MDNode *N : NewNodes)
5934 NamedMD->addOperand(M: N);
5935}
5936
5937/// This checks for objc retain release marker which should be upgraded. It
5938/// returns true if module is modified.
5939static bool upgradeRetainReleaseMarker(Module &M) {
5940 bool Changed = false;
5941 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5942 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey);
5943 if (ModRetainReleaseMarker) {
5944 MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0);
5945 if (Op) {
5946 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0));
5947 if (ID) {
5948 SmallVector<StringRef, 4> ValueComp;
5949 ID->getString().split(A&: ValueComp, Separator: "#");
5950 if (ValueComp.size() == 2) {
5951 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5952 ID = MDString::get(Context&: M.getContext(), Str: NewValue);
5953 }
5954 M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID);
5955 M.eraseNamedMetadata(NMD: ModRetainReleaseMarker);
5956 Changed = true;
5957 }
5958 }
5959 }
5960 return Changed;
5961}
5962
5963void llvm::UpgradeARCRuntime(Module &M) {
5964 // This lambda converts normal function calls to ARC runtime functions to
5965 // intrinsic calls.
5966 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5967 llvm::Intrinsic::ID IntrinsicFunc) {
5968 Function *Fn = M.getFunction(Name: OldFunc);
5969
5970 if (!Fn)
5971 return;
5972
5973 Function *NewFn =
5974 llvm::Intrinsic::getOrInsertDeclaration(M: &M, id: IntrinsicFunc);
5975
5976 for (User *U : make_early_inc_range(Range: Fn->users())) {
5977 CallInst *CI = dyn_cast<CallInst>(Val: U);
5978 if (!CI || CI->getCalledFunction() != Fn)
5979 continue;
5980
5981 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5982 FunctionType *NewFuncTy = NewFn->getFunctionType();
5983 SmallVector<Value *, 2> Args;
5984
5985 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5986 // value to the return type of the old function.
5987 if (NewFuncTy->getReturnType() != CI->getType() &&
5988 !CastInst::castIsValid(op: Instruction::BitCast, S: CI,
5989 DstTy: NewFuncTy->getReturnType()))
5990 continue;
5991
5992 bool InvalidCast = false;
5993
5994 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5995 Value *Arg = CI->getArgOperand(i: I);
5996
5997 // Bitcast argument to the parameter type of the new function if it's
5998 // not a variadic argument.
5999 if (I < NewFuncTy->getNumParams()) {
6000 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6001 // to the parameter type of the new function.
6002 if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg,
6003 DstTy: NewFuncTy->getParamType(i: I))) {
6004 InvalidCast = true;
6005 break;
6006 }
6007 Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I));
6008 }
6009 Args.push_back(Elt: Arg);
6010 }
6011
6012 if (InvalidCast)
6013 continue;
6014
6015 // Create a call instruction that calls the new function.
6016 CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args);
6017 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
6018 NewCall->takeName(V: CI);
6019
6020 // Bitcast the return value back to the type of the old call.
6021 Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType());
6022
6023 if (!CI->use_empty())
6024 CI->replaceAllUsesWith(V: NewRetVal);
6025 CI->eraseFromParent();
6026 }
6027
6028 if (Fn->use_empty())
6029 Fn->eraseFromParent();
6030 };
6031
6032 // Unconditionally convert a call to "clang.arc.use" to a call to
6033 // "llvm.objc.clang.arc.use".
6034 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6035
6036 // Upgrade the retain release marker. If there is no need to upgrade
6037 // the marker, that means either the module is already new enough to contain
6038 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6039 if (!upgradeRetainReleaseMarker(M))
6040 return;
6041
6042 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6043 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6044 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6045 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6046 {"objc_autoreleaseReturnValue",
6047 llvm::Intrinsic::objc_autoreleaseReturnValue},
6048 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6049 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6050 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6051 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6052 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6053 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6054 {"objc_release", llvm::Intrinsic::objc_release},
6055 {"objc_retain", llvm::Intrinsic::objc_retain},
6056 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6057 {"objc_retainAutoreleaseReturnValue",
6058 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6059 {"objc_retainAutoreleasedReturnValue",
6060 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6061 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6062 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6063 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6064 {"objc_unsafeClaimAutoreleasedReturnValue",
6065 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6066 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6067 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6068 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6069 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6070 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6071 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6072 {"objc_arc_annotation_topdown_bbstart",
6073 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6074 {"objc_arc_annotation_topdown_bbend",
6075 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6076 {"objc_arc_annotation_bottomup_bbstart",
6077 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6078 {"objc_arc_annotation_bottomup_bbend",
6079 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6080
6081 for (auto &I : RuntimeFuncs)
6082 UpgradeToIntrinsic(I.first, I.second);
6083}
6084
6085bool llvm::UpgradeModuleFlags(Module &M) {
6086 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6087 if (!ModFlags)
6088 return false;
6089
6090 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6091 bool HasSwiftVersionFlag = false;
6092 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6093 uint32_t SwiftABIVersion;
6094 auto Int8Ty = Type::getInt8Ty(C&: M.getContext());
6095 auto Int32Ty = Type::getInt32Ty(C&: M.getContext());
6096
6097 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6098 MDNode *Op = ModFlags->getOperand(i: I);
6099 if (Op->getNumOperands() != 3)
6100 continue;
6101 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6102 if (!ID)
6103 continue;
6104 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6105 Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get(
6106 Ty: Type::getInt32Ty(C&: M.getContext()), V: B)),
6107 MDString::get(Context&: M.getContext(), Str: ID->getString()),
6108 Op->getOperand(I: 2)};
6109 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6110 Changed = true;
6111 };
6112
6113 if (ID->getString() == "Objective-C Image Info Version")
6114 HasObjCFlag = true;
6115 if (ID->getString() == "Objective-C Class Properties")
6116 HasClassProperties = true;
6117 // Upgrade PIC from Error/Max to Min.
6118 if (ID->getString() == "PIC Level") {
6119 if (auto *Behavior =
6120 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6121 uint64_t V = Behavior->getLimitedValue();
6122 if (V == Module::Error || V == Module::Max)
6123 SetBehavior(Module::Min);
6124 }
6125 }
6126 // Upgrade "PIE Level" from Error to Max.
6127 if (ID->getString() == "PIE Level")
6128 if (auto *Behavior =
6129 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0)))
6130 if (Behavior->getLimitedValue() == Module::Error)
6131 SetBehavior(Module::Max);
6132
6133 // Upgrade branch protection and return address signing module flags. The
6134 // module flag behavior for these fields were Error and now they are Min.
6135 if (ID->getString() == "branch-target-enforcement" ||
6136 ID->getString().starts_with(Prefix: "sign-return-address")) {
6137 if (auto *Behavior =
6138 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
6139 if (Behavior->getLimitedValue() == Module::Error) {
6140 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
6141 Metadata *Ops[3] = {
6142 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)),
6143 Op->getOperand(I: 1), Op->getOperand(I: 2)};
6144 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6145 Changed = true;
6146 }
6147 }
6148 }
6149
6150 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6151 // section name so that llvm-lto will not complain about mismatching
6152 // module flags that is functionally the same.
6153 if (ID->getString() == "Objective-C Image Info Section") {
6154 if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) {
6155 SmallVector<StringRef, 4> ValueComp;
6156 Value->getString().split(A&: ValueComp, Separator: " ");
6157 if (ValueComp.size() != 1) {
6158 std::string NewValue;
6159 for (auto &S : ValueComp)
6160 NewValue += S.str();
6161 Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1),
6162 MDString::get(Context&: M.getContext(), Str: NewValue)};
6163 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6164 Changed = true;
6165 }
6166 }
6167 }
6168
6169 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6170 // If the higher bits are set, it adds new module flag for swift info.
6171 if (ID->getString() == "Objective-C Garbage Collection") {
6172 auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2));
6173 if (Md) {
6174 assert(Md->getValue() && "Expected non-empty metadata");
6175 auto Type = Md->getValue()->getType();
6176 if (Type == Int8Ty)
6177 continue;
6178 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6179 if ((Val & 0xff) != Val) {
6180 HasSwiftVersionFlag = true;
6181 SwiftABIVersion = (Val & 0xff00) >> 8;
6182 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6183 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6184 }
6185 Metadata *Ops[3] = {
6186 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)),
6187 Op->getOperand(I: 1),
6188 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))};
6189 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6190 Changed = true;
6191 }
6192 }
6193
6194 if (ID->getString() == "amdgpu_code_object_version") {
6195 Metadata *Ops[3] = {
6196 Op->getOperand(I: 0),
6197 MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version"),
6198 Op->getOperand(I: 2)};
6199 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
6200 Changed = true;
6201 }
6202 }
6203
6204 // "Objective-C Class Properties" is recently added for Objective-C. We
6205 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6206 // flag of value 0, so we can correclty downgrade this flag when trying to
6207 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6208 // this module flag.
6209 if (HasObjCFlag && !HasClassProperties) {
6210 M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties",
6211 Val: (uint32_t)0);
6212 Changed = true;
6213 }
6214
6215 if (HasSwiftVersionFlag) {
6216 M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version",
6217 Val: SwiftABIVersion);
6218 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version",
6219 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion));
6220 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version",
6221 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion));
6222 Changed = true;
6223 }
6224
6225 return Changed;
6226}
6227
6228void llvm::UpgradeSectionAttributes(Module &M) {
6229 auto TrimSpaces = [](StringRef Section) -> std::string {
6230 SmallVector<StringRef, 5> Components;
6231 Section.split(A&: Components, Separator: ',');
6232
6233 SmallString<32> Buffer;
6234 raw_svector_ostream OS(Buffer);
6235
6236 for (auto Component : Components)
6237 OS << ',' << Component.trim();
6238
6239 return std::string(OS.str().substr(Start: 1));
6240 };
6241
6242 for (auto &GV : M.globals()) {
6243 if (!GV.hasSection())
6244 continue;
6245
6246 StringRef Section = GV.getSection();
6247
6248 if (!Section.starts_with(Prefix: "__DATA, __objc_catlist"))
6249 continue;
6250
6251 // __DATA, __objc_catlist, regular, no_dead_strip
6252 // __DATA,__objc_catlist,regular,no_dead_strip
6253 GV.setSection(TrimSpaces(Section));
6254 }
6255}
6256
6257namespace {
6258// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6259// callsites within a function that did not also have the strictfp attribute.
6260// Since 10.0, if strict FP semantics are needed within a function, the
6261// function must have the strictfp attribute and all calls within the function
6262// must also have the strictfp attribute. This latter restriction is
6263// necessary to prevent unwanted libcall simplification when a function is
6264// being cloned (such as for inlining).
6265//
6266// The "dangling" strictfp attribute usage was only used to prevent constant
6267// folding and other libcall simplification. The nobuiltin attribute on the
6268// callsite has the same effect.
6269struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6270 StrictFPUpgradeVisitor() = default;
6271
6272 void visitCallBase(CallBase &Call) {
6273 if (!Call.isStrictFP())
6274 return;
6275 if (isa<ConstrainedFPIntrinsic>(Val: &Call))
6276 return;
6277 // If we get here, the caller doesn't have the strictfp attribute
6278 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6279 Call.removeFnAttr(Kind: Attribute::StrictFP);
6280 Call.addFnAttr(Kind: Attribute::NoBuiltin);
6281 }
6282};
6283
6284/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6285struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6286 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6287 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6288
6289 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6290 if (!RMW.isFloatingPointOperation())
6291 return;
6292
6293 MDNode *Empty = MDNode::get(Context&: RMW.getContext(), MDs: {});
6294 RMW.setMetadata(Kind: "amdgpu.no.fine.grained.host.memory", Node: Empty);
6295 RMW.setMetadata(Kind: "amdgpu.no.remote.memory.access", Node: Empty);
6296 RMW.setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: Empty);
6297 }
6298};
6299} // namespace
6300
6301void llvm::UpgradeFunctionAttributes(Function &F) {
6302 // If a function definition doesn't have the strictfp attribute,
6303 // convert any callsite strictfp attributes to nobuiltin.
6304 if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) {
6305 StrictFPUpgradeVisitor SFPV;
6306 SFPV.visit(F);
6307 }
6308
6309 // Remove all incompatibile attributes from function.
6310 F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(
6311 Ty: F.getReturnType(), AS: F.getAttributes().getRetAttrs()));
6312 for (auto &Arg : F.args())
6313 Arg.removeAttrs(
6314 AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType(), AS: Arg.getAttributes()));
6315
6316 // Older versions of LLVM treated an "implicit-section-name" attribute
6317 // similarly to directly setting the section on a Function.
6318 if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name");
6319 A.isValid() && A.isStringAttribute()) {
6320 F.setSection(A.getValueAsString());
6321 F.removeFnAttr(Kind: "implicit-section-name");
6322 }
6323
6324 if (!F.empty()) {
6325 // For some reason this is called twice, and the first time is before any
6326 // instructions are loaded into the body.
6327
6328 if (Attribute A = F.getFnAttribute(Kind: "amdgpu-unsafe-fp-atomics");
6329 A.isValid()) {
6330
6331 if (A.getValueAsBool()) {
6332 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6333 Visitor.visit(F);
6334 }
6335
6336 // We will leave behind dead attribute uses on external declarations, but
6337 // clang never added these to declarations anyway.
6338 F.removeFnAttr(Kind: "amdgpu-unsafe-fp-atomics");
6339 }
6340 }
6341}
6342
6343// Check if the function attribute is not present and set it.
6344static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName,
6345 StringRef Value) {
6346 if (!F.hasFnAttribute(Kind: FnAttrName))
6347 F.addFnAttr(Kind: FnAttrName, Val: Value);
6348}
6349
6350// Check if the function attribute is not present and set it if needed.
6351// If the attribute is "false" then removes it.
6352// If the attribute is "true" resets it to a valueless attribute.
6353static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6354 if (!F.hasFnAttribute(Kind: FnAttrName)) {
6355 if (Set)
6356 F.addFnAttr(Kind: FnAttrName);
6357 } else {
6358 auto A = F.getFnAttribute(Kind: FnAttrName);
6359 if ("false" == A.getValueAsString())
6360 F.removeFnAttr(Kind: FnAttrName);
6361 else if ("true" == A.getValueAsString()) {
6362 F.removeFnAttr(Kind: FnAttrName);
6363 F.addFnAttr(Kind: FnAttrName);
6364 }
6365 }
6366}
6367
6368void llvm::copyModuleAttrToFunctions(Module &M) {
6369 Triple T(M.getTargetTriple());
6370 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6371 return;
6372
6373 uint64_t BTEValue = 0;
6374 uint64_t BPPLRValue = 0;
6375 uint64_t GCSValue = 0;
6376 uint64_t SRAValue = 0;
6377 uint64_t SRAALLValue = 0;
6378 uint64_t SRABKeyValue = 0;
6379
6380 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6381 if (ModFlags) {
6382 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6383 MDNode *Op = ModFlags->getOperand(i: I);
6384 if (Op->getNumOperands() != 3)
6385 continue;
6386
6387 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
6388 auto *CI = mdconst::dyn_extract<ConstantInt>(MD: Op->getOperand(I: 2));
6389 if (!ID || !CI)
6390 continue;
6391
6392 StringRef IDStr = ID->getString();
6393 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6394 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6395 : IDStr == "guarded-control-stack" ? &GCSValue
6396 : IDStr == "sign-return-address" ? &SRAValue
6397 : IDStr == "sign-return-address-all" ? &SRAALLValue
6398 : IDStr == "sign-return-address-with-bkey"
6399 ? &SRABKeyValue
6400 : nullptr;
6401 if (!ValPtr)
6402 continue;
6403
6404 *ValPtr = CI->getZExtValue();
6405 if (*ValPtr == 2)
6406 return;
6407 }
6408 }
6409
6410 bool BTE = BTEValue == 1;
6411 bool BPPLR = BPPLRValue == 1;
6412 bool GCS = GCSValue == 1;
6413 bool SRA = SRAValue == 1;
6414
6415 StringRef SignTypeValue = "non-leaf";
6416 if (SRA && SRAALLValue == 1)
6417 SignTypeValue = "all";
6418
6419 StringRef SignKeyValue = "a_key";
6420 if (SRA && SRABKeyValue == 1)
6421 SignKeyValue = "b_key";
6422
6423 for (Function &F : M.getFunctionList()) {
6424 if (F.isDeclaration())
6425 continue;
6426
6427 if (SRA) {
6428 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address", Value: SignTypeValue);
6429 setFunctionAttrIfNotSet(F, FnAttrName: "sign-return-address-key", Value: SignKeyValue);
6430 } else {
6431 if (auto A = F.getFnAttribute(Kind: "sign-return-address");
6432 A.isValid() && "none" == A.getValueAsString()) {
6433 F.removeFnAttr(Kind: "sign-return-address");
6434 F.removeFnAttr(Kind: "sign-return-address-key");
6435 }
6436 }
6437 ConvertFunctionAttr(F, Set: BTE, FnAttrName: "branch-target-enforcement");
6438 ConvertFunctionAttr(F, Set: BPPLR, FnAttrName: "branch-protection-pauth-lr");
6439 ConvertFunctionAttr(F, Set: GCS, FnAttrName: "guarded-control-stack");
6440 }
6441
6442 if (BTE)
6443 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-target-enforcement", Val: 2);
6444 if (BPPLR)
6445 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "branch-protection-pauth-lr", Val: 2);
6446 if (GCS)
6447 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "guarded-control-stack", Val: 2);
6448 if (SRA) {
6449 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address", Val: 2);
6450 if (SRAALLValue == 1)
6451 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-all", Val: 2);
6452 if (SRABKeyValue == 1)
6453 M.setModuleFlag(Behavior: llvm::Module::Min, Key: "sign-return-address-with-bkey", Val: 2);
6454 }
6455}
6456
6457static bool isOldLoopArgument(Metadata *MD) {
6458 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6459 if (!T)
6460 return false;
6461 if (T->getNumOperands() < 1)
6462 return false;
6463 auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6464 if (!S)
6465 return false;
6466 return S->getString().starts_with(Prefix: "llvm.vectorizer.");
6467}
6468
6469static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
6470 StringRef OldPrefix = "llvm.vectorizer.";
6471 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6472
6473 if (OldTag == "llvm.vectorizer.unroll")
6474 return MDString::get(Context&: C, Str: "llvm.loop.interleave.count");
6475
6476 return MDString::get(
6477 Context&: C, Str: (Twine("llvm.loop.vectorize.") + OldTag.drop_front(N: OldPrefix.size()))
6478 .str());
6479}
6480
6481static Metadata *upgradeLoopArgument(Metadata *MD) {
6482 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
6483 if (!T)
6484 return MD;
6485 if (T->getNumOperands() < 1)
6486 return MD;
6487 auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
6488 if (!OldTag)
6489 return MD;
6490 if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer."))
6491 return MD;
6492
6493 // This has an old tag. Upgrade it.
6494 SmallVector<Metadata *, 8> Ops;
6495 Ops.reserve(N: T->getNumOperands());
6496 Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString()));
6497 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6498 Ops.push_back(Elt: T->getOperand(I));
6499
6500 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6501}
6502
6503MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
6504 auto *T = dyn_cast<MDTuple>(Val: &N);
6505 if (!T)
6506 return &N;
6507
6508 if (none_of(Range: T->operands(), P: isOldLoopArgument))
6509 return &N;
6510
6511 SmallVector<Metadata *, 8> Ops;
6512 Ops.reserve(N: T->getNumOperands());
6513 for (Metadata *MD : T->operands())
6514 Ops.push_back(Elt: upgradeLoopArgument(MD));
6515
6516 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
6517}
6518
6519std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
6520 Triple T(TT);
6521 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6522 // the address space of globals to 1. This does not apply to SPIRV Logical.
6523 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6524 !DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G")) {
6525 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6526 }
6527
6528 if (T.isLoongArch64() || T.isRISCV64()) {
6529 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6530 auto I = DL.find(Str: "-n64-");
6531 if (I != StringRef::npos)
6532 return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str();
6533 return DL.str();
6534 }
6535
6536 // AMDGPU data layout upgrades.
6537 std::string Res = DL.str();
6538 if (T.isAMDGPU()) {
6539 // Define address spaces for constants.
6540 if (!DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G"))
6541 Res.append(s: Res.empty() ? "G1" : "-G1");
6542
6543 // AMDGCN data layout upgrades.
6544 if (T.isAMDGCN()) {
6545
6546 // Add missing non-integral declarations.
6547 // This goes before adding new address spaces to prevent incoherent string
6548 // values.
6549 if (!DL.contains(Other: "-ni") && !DL.starts_with(Prefix: "ni"))
6550 Res.append(s: "-ni:7:8:9");
6551 // Update ni:7 to ni:7:8:9.
6552 if (DL.ends_with(Suffix: "ni:7"))
6553 Res.append(s: ":8:9");
6554 if (DL.ends_with(Suffix: "ni:7:8"))
6555 Res.append(s: ":9");
6556
6557 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6558 // resources) An empty data layout has already been upgraded to G1 by now.
6559 if (!DL.contains(Other: "-p7") && !DL.starts_with(Prefix: "p7"))
6560 Res.append(s: "-p7:160:256:256:32");
6561 if (!DL.contains(Other: "-p8") && !DL.starts_with(Prefix: "p8"))
6562 Res.append(s: "-p8:128:128:128:48");
6563 constexpr StringRef OldP8("-p8:128:128-");
6564 if (DL.contains(Other: OldP8))
6565 Res.replace(pos: Res.find(svt: OldP8), n1: OldP8.size(), s: "-p8:128:128:128:48-");
6566 if (!DL.contains(Other: "-p9") && !DL.starts_with(Prefix: "p9"))
6567 Res.append(s: "-p9:192:256:256:32");
6568 }
6569
6570 // Upgrade the ELF mangling mode.
6571 if (!DL.contains(Other: "m:e"))
6572 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6573
6574 return Res;
6575 }
6576
6577 if (T.isSystemZ() && !DL.empty()) {
6578 // Make sure the stack alignment is present.
6579 if (!DL.contains(Other: "-S64"))
6580 return "E-S64" + DL.drop_front(N: 1).str();
6581 return DL.str();
6582 }
6583
6584 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6585 // If the datalayout matches the expected format, add pointer size address
6586 // spaces to the datalayout.
6587 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6588 if (!DL.contains(Other: AddrSpaces)) {
6589 SmallVector<StringRef, 4> Groups;
6590 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6591 if (R.match(String: Res, Matches: &Groups))
6592 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6593 }
6594 };
6595
6596 // AArch64 data layout upgrades.
6597 if (T.isAArch64()) {
6598 // Add "-Fn32"
6599 if (!DL.empty() && !DL.contains(Other: "-Fn32"))
6600 Res.append(s: "-Fn32");
6601 AddPtr32Ptr64AddrSpaces();
6602 return Res;
6603 }
6604
6605 if (T.isSPARC() || (T.isMIPS64() && !DL.contains(Other: "m:m")) || T.isPPC64() ||
6606 T.isWasm()) {
6607 // Mips64 with o32 ABI did not add "-i128:128".
6608 // Add "-i128:128"
6609 std::string I64 = "-i64:64";
6610 std::string I128 = "-i128:128";
6611 if (!StringRef(Res).contains(Other: I128)) {
6612 size_t Pos = Res.find(str: I64);
6613 if (Pos != size_t(-1))
6614 Res.insert(pos1: Pos + I64.size(), str: I128);
6615 }
6616 }
6617
6618 if (T.isPPC() && T.isOSAIX() && !DL.contains(Other: "f64:32:64") && !DL.empty()) {
6619 size_t Pos = Res.find(s: "-S128");
6620 if (Pos == StringRef::npos)
6621 Pos = Res.size();
6622 Res.insert(pos: Pos, s: "-f64:32:64");
6623 }
6624
6625 if (!T.isX86())
6626 return Res;
6627
6628 AddPtr32Ptr64AddrSpaces();
6629
6630 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6631 // for i128 operations prior to this being reflected in the data layout, and
6632 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6633 // boundaries, so although this is a breaking change, the upgrade is expected
6634 // to fix more IR than it breaks.
6635 // Intel MCU is an exception and uses 4-byte-alignment.
6636 if (!T.isOSIAMCU()) {
6637 std::string I128 = "-i128:128";
6638 if (StringRef Ref = Res; !Ref.contains(Other: I128)) {
6639 SmallVector<StringRef, 4> Groups;
6640 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6641 if (R.match(String: Res, Matches: &Groups))
6642 Res = (Groups[1] + I128 + Groups[3]).str();
6643 }
6644 }
6645
6646 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6647 // Raising the alignment is safe because Clang did not produce f80 values in
6648 // the MSVC environment before this upgrade was added.
6649 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6650 StringRef Ref = Res;
6651 auto I = Ref.find(Str: "-f80:32-");
6652 if (I != StringRef::npos)
6653 Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str();
6654 }
6655
6656 return Res;
6657}
6658
6659void llvm::UpgradeAttributes(AttrBuilder &B) {
6660 StringRef FramePointer;
6661 Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim");
6662 if (A.isValid()) {
6663 // The value can be "true" or "false".
6664 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6665 B.removeAttribute(A: "no-frame-pointer-elim");
6666 }
6667 if (B.contains(A: "no-frame-pointer-elim-non-leaf")) {
6668 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6669 if (FramePointer != "all")
6670 FramePointer = "non-leaf";
6671 B.removeAttribute(A: "no-frame-pointer-elim-non-leaf");
6672 }
6673 if (!FramePointer.empty())
6674 B.addAttribute(A: "frame-pointer", V: FramePointer);
6675
6676 A = B.getAttribute(Kind: "null-pointer-is-valid");
6677 if (A.isValid()) {
6678 // The value can be "true" or "false".
6679 bool NullPointerIsValid = A.getValueAsString() == "true";
6680 B.removeAttribute(A: "null-pointer-is-valid");
6681 if (NullPointerIsValid)
6682 B.addAttribute(Val: Attribute::NullPointerIsValid);
6683 }
6684}
6685
6686void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6687 // clang.arc.attachedcall bundles are now required to have an operand.
6688 // If they don't, it's okay to drop them entirely: when there is an operand,
6689 // the "attachedcall" is meaningful and required, but without an operand,
6690 // it's just a marker NOP. Dropping it merely prevents an optimization.
6691 erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) {
6692 return OBD.getTag() == "clang.arc.attachedcall" &&
6693 OBD.inputs().empty();
6694 });
6695}
6696