1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/BinaryFormat/Dwarf.h"
21#include "llvm/IR/AttributeMask.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
26#include "llvm/IR/DebugInfoMetadata.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/IntrinsicInst.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
46#include "llvm/Support/AMDGPUAddrSpace.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/NVPTXAddrSpace.h"
50#include "llvm/Support/Regex.h"
51#include "llvm/TargetParser/Triple.h"
52#include <cstdint>
53#include <cstring>
54#include <numeric>
55
56using namespace llvm;
57
58static cl::opt<bool>
59 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
60 cl::desc("Disable autoupgrade of debug info"));
61
62static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
63
64// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
65// changed their type from v4f32 to v2i64.
66static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
67 Function *&NewFn) {
68 // Check whether this is an old version of the function, which received
69 // v4f32 arguments.
70 Type *Arg0Type = F->getFunctionType()->getParamType(i: 0);
71 if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4))
72 return false;
73
74 // Yes, it's old, replace it with new version.
75 rename(GV: F);
76 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
77 return true;
78}
79
80// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
81// arguments have changed their type from i32 to i8.
82static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
83 Function *&NewFn) {
84 // Check that the last argument is an i32.
85 Type *LastArgType = F->getFunctionType()->getParamType(
86 i: F->getFunctionType()->getNumParams() - 1);
87 if (!LastArgType->isIntegerTy(Bitwidth: 32))
88 return false;
89
90 // Move this function aside and map down.
91 rename(GV: F);
92 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
93 return true;
94}
95
96// Upgrade the declaration of fp compare intrinsics that change return type
97// from scalar to vXi1 mask.
98static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
99 Function *&NewFn) {
100 // Check if the return type is a vector.
101 if (F->getReturnType()->isVectorTy())
102 return false;
103
104 rename(GV: F);
105 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
106 return true;
107}
108
109static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
110 Function *&NewFn) {
111 if (F->getReturnType()->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(GV: F);
115 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
116 return true;
117}
118
119static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
120 Function *&NewFn) {
121 if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy())
122 return false;
123
124 rename(GV: F);
125 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
126 return true;
127}
128
129static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
130 // All of the intrinsics matches below should be marked with which llvm
131 // version started autoupgrading them. At some point in the future we would
132 // like to use this information to remove upgrade code for some older
133 // intrinsics. It is currently undecided how we will determine that future
134 // point.
135 if (Name.consume_front(Prefix: "avx."))
136 return (Name.starts_with(Prefix: "blend.p") || // Added in 3.7
137 Name == "cvt.ps2.pd.256" || // Added in 3.9
138 Name == "cvtdq2.pd.256" || // Added in 3.9
139 Name == "cvtdq2.ps.256" || // Added in 7.0
140 Name.starts_with(Prefix: "movnt.") || // Added in 3.2
141 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
142 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
143 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 3.5
144 Name.starts_with(Prefix: "vbroadcastf128") || // Added in 4.0
145 Name.starts_with(Prefix: "vextractf128.") || // Added in 3.7
146 Name.starts_with(Prefix: "vinsertf128.") || // Added in 3.7
147 Name.starts_with(Prefix: "vperm2f128.") || // Added in 6.0
148 Name.starts_with(Prefix: "vpermil.")); // Added in 3.1
149
150 if (Name.consume_front(Prefix: "avx2."))
151 return (Name == "movntdqa" || // Added in 5.0
152 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
153 Name.starts_with(Prefix: "padds.") || // Added in 8.0
154 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
155 Name.starts_with(Prefix: "pblendd.") || // Added in 3.7
156 Name == "pblendw" || // Added in 3.7
157 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.8
158 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
159 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
160 Name.starts_with(Prefix: "pmax") || // Added in 3.9
161 Name.starts_with(Prefix: "pmin") || // Added in 3.9
162 Name.starts_with(Prefix: "pmovsx") || // Added in 3.9
163 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
164 Name == "pmul.dq" || // Added in 7.0
165 Name == "pmulu.dq" || // Added in 7.0
166 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
167 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
168 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
169 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
170 Name.starts_with(Prefix: "vbroadcast") || // Added in 3.8
171 Name == "vbroadcasti128" || // Added in 3.7
172 Name == "vextracti128" || // Added in 3.7
173 Name == "vinserti128" || // Added in 3.7
174 Name == "vperm2i128"); // Added in 6.0
175
176 if (Name.consume_front(Prefix: "avx512.")) {
177 if (Name.consume_front(Prefix: "mask."))
178 // 'avx512.mask.*'
179 return (Name.starts_with(Prefix: "add.p") || // Added in 7.0. 128/256 in 4.0
180 Name.starts_with(Prefix: "and.") || // Added in 3.9
181 Name.starts_with(Prefix: "andn.") || // Added in 3.9
182 Name.starts_with(Prefix: "broadcast.s") || // Added in 3.9
183 Name.starts_with(Prefix: "broadcastf32x4.") || // Added in 6.0
184 Name.starts_with(Prefix: "broadcastf32x8.") || // Added in 6.0
185 Name.starts_with(Prefix: "broadcastf64x2.") || // Added in 6.0
186 Name.starts_with(Prefix: "broadcastf64x4.") || // Added in 6.0
187 Name.starts_with(Prefix: "broadcasti32x4.") || // Added in 6.0
188 Name.starts_with(Prefix: "broadcasti32x8.") || // Added in 6.0
189 Name.starts_with(Prefix: "broadcasti64x2.") || // Added in 6.0
190 Name.starts_with(Prefix: "broadcasti64x4.") || // Added in 6.0
191 Name.starts_with(Prefix: "cmp.b") || // Added in 5.0
192 Name.starts_with(Prefix: "cmp.d") || // Added in 5.0
193 Name.starts_with(Prefix: "cmp.q") || // Added in 5.0
194 Name.starts_with(Prefix: "cmp.w") || // Added in 5.0
195 Name.starts_with(Prefix: "compress.b") || // Added in 9.0
196 Name.starts_with(Prefix: "compress.d") || // Added in 9.0
197 Name.starts_with(Prefix: "compress.p") || // Added in 9.0
198 Name.starts_with(Prefix: "compress.q") || // Added in 9.0
199 Name.starts_with(Prefix: "compress.store.") || // Added in 7.0
200 Name.starts_with(Prefix: "compress.w") || // Added in 9.0
201 Name.starts_with(Prefix: "conflict.") || // Added in 9.0
202 Name.starts_with(Prefix: "cvtdq2pd.") || // Added in 4.0
203 Name.starts_with(Prefix: "cvtdq2ps.") || // Added in 7.0 updated 9.0
204 Name == "cvtpd2dq.256" || // Added in 7.0
205 Name == "cvtpd2ps.256" || // Added in 7.0
206 Name == "cvtps2pd.128" || // Added in 7.0
207 Name == "cvtps2pd.256" || // Added in 7.0
208 Name.starts_with(Prefix: "cvtqq2pd.") || // Added in 7.0 updated 9.0
209 Name == "cvtqq2ps.256" || // Added in 9.0
210 Name == "cvtqq2ps.512" || // Added in 9.0
211 Name == "cvttpd2dq.256" || // Added in 7.0
212 Name == "cvttps2dq.128" || // Added in 7.0
213 Name == "cvttps2dq.256" || // Added in 7.0
214 Name.starts_with(Prefix: "cvtudq2pd.") || // Added in 4.0
215 Name.starts_with(Prefix: "cvtudq2ps.") || // Added in 7.0 updated 9.0
216 Name.starts_with(Prefix: "cvtuqq2pd.") || // Added in 7.0 updated 9.0
217 Name == "cvtuqq2ps.256" || // Added in 9.0
218 Name == "cvtuqq2ps.512" || // Added in 9.0
219 Name.starts_with(Prefix: "dbpsadbw.") || // Added in 7.0
220 Name.starts_with(Prefix: "div.p") || // Added in 7.0. 128/256 in 4.0
221 Name.starts_with(Prefix: "expand.b") || // Added in 9.0
222 Name.starts_with(Prefix: "expand.d") || // Added in 9.0
223 Name.starts_with(Prefix: "expand.load.") || // Added in 7.0
224 Name.starts_with(Prefix: "expand.p") || // Added in 9.0
225 Name.starts_with(Prefix: "expand.q") || // Added in 9.0
226 Name.starts_with(Prefix: "expand.w") || // Added in 9.0
227 Name.starts_with(Prefix: "fpclass.p") || // Added in 7.0
228 Name.starts_with(Prefix: "insert") || // Added in 4.0
229 Name.starts_with(Prefix: "load.") || // Added in 3.9
230 Name.starts_with(Prefix: "loadu.") || // Added in 3.9
231 Name.starts_with(Prefix: "lzcnt.") || // Added in 5.0
232 Name.starts_with(Prefix: "max.p") || // Added in 7.0. 128/256 in 5.0
233 Name.starts_with(Prefix: "min.p") || // Added in 7.0. 128/256 in 5.0
234 Name.starts_with(Prefix: "movddup") || // Added in 3.9
235 Name.starts_with(Prefix: "move.s") || // Added in 4.0
236 Name.starts_with(Prefix: "movshdup") || // Added in 3.9
237 Name.starts_with(Prefix: "movsldup") || // Added in 3.9
238 Name.starts_with(Prefix: "mul.p") || // Added in 7.0. 128/256 in 4.0
239 Name.starts_with(Prefix: "or.") || // Added in 3.9
240 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
241 Name.starts_with(Prefix: "packssdw.") || // Added in 5.0
242 Name.starts_with(Prefix: "packsswb.") || // Added in 5.0
243 Name.starts_with(Prefix: "packusdw.") || // Added in 5.0
244 Name.starts_with(Prefix: "packuswb.") || // Added in 5.0
245 Name.starts_with(Prefix: "padd.") || // Added in 4.0
246 Name.starts_with(Prefix: "padds.") || // Added in 8.0
247 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
248 Name.starts_with(Prefix: "palignr.") || // Added in 3.9
249 Name.starts_with(Prefix: "pand.") || // Added in 3.9
250 Name.starts_with(Prefix: "pandn.") || // Added in 3.9
251 Name.starts_with(Prefix: "pavg") || // Added in 6.0
252 Name.starts_with(Prefix: "pbroadcast") || // Added in 6.0
253 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.9
254 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.9
255 Name.starts_with(Prefix: "perm.df.") || // Added in 3.9
256 Name.starts_with(Prefix: "perm.di.") || // Added in 3.9
257 Name.starts_with(Prefix: "permvar.") || // Added in 7.0
258 Name.starts_with(Prefix: "pmaddubs.w.") || // Added in 7.0
259 Name.starts_with(Prefix: "pmaddw.d.") || // Added in 7.0
260 Name.starts_with(Prefix: "pmax") || // Added in 4.0
261 Name.starts_with(Prefix: "pmin") || // Added in 4.0
262 Name == "pmov.qd.256" || // Added in 9.0
263 Name == "pmov.qd.512" || // Added in 9.0
264 Name == "pmov.wb.256" || // Added in 9.0
265 Name == "pmov.wb.512" || // Added in 9.0
266 Name.starts_with(Prefix: "pmovsx") || // Added in 4.0
267 Name.starts_with(Prefix: "pmovzx") || // Added in 4.0
268 Name.starts_with(Prefix: "pmul.dq.") || // Added in 4.0
269 Name.starts_with(Prefix: "pmul.hr.sw.") || // Added in 7.0
270 Name.starts_with(Prefix: "pmulh.w.") || // Added in 7.0
271 Name.starts_with(Prefix: "pmulhu.w.") || // Added in 7.0
272 Name.starts_with(Prefix: "pmull.") || // Added in 4.0
273 Name.starts_with(Prefix: "pmultishift.qb.") || // Added in 8.0
274 Name.starts_with(Prefix: "pmulu.dq.") || // Added in 4.0
275 Name.starts_with(Prefix: "por.") || // Added in 3.9
276 Name.starts_with(Prefix: "prol.") || // Added in 8.0
277 Name.starts_with(Prefix: "prolv.") || // Added in 8.0
278 Name.starts_with(Prefix: "pror.") || // Added in 8.0
279 Name.starts_with(Prefix: "prorv.") || // Added in 8.0
280 Name.starts_with(Prefix: "pshuf.b.") || // Added in 4.0
281 Name.starts_with(Prefix: "pshuf.d.") || // Added in 3.9
282 Name.starts_with(Prefix: "pshufh.w.") || // Added in 3.9
283 Name.starts_with(Prefix: "pshufl.w.") || // Added in 3.9
284 Name.starts_with(Prefix: "psll.d") || // Added in 4.0
285 Name.starts_with(Prefix: "psll.q") || // Added in 4.0
286 Name.starts_with(Prefix: "psll.w") || // Added in 4.0
287 Name.starts_with(Prefix: "pslli") || // Added in 4.0
288 Name.starts_with(Prefix: "psllv") || // Added in 4.0
289 Name.starts_with(Prefix: "psra.d") || // Added in 4.0
290 Name.starts_with(Prefix: "psra.q") || // Added in 4.0
291 Name.starts_with(Prefix: "psra.w") || // Added in 4.0
292 Name.starts_with(Prefix: "psrai") || // Added in 4.0
293 Name.starts_with(Prefix: "psrav") || // Added in 4.0
294 Name.starts_with(Prefix: "psrl.d") || // Added in 4.0
295 Name.starts_with(Prefix: "psrl.q") || // Added in 4.0
296 Name.starts_with(Prefix: "psrl.w") || // Added in 4.0
297 Name.starts_with(Prefix: "psrli") || // Added in 4.0
298 Name.starts_with(Prefix: "psrlv") || // Added in 4.0
299 Name.starts_with(Prefix: "psub.") || // Added in 4.0
300 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
301 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
302 Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
303 Name.starts_with(Prefix: "punpckh") || // Added in 3.9
304 Name.starts_with(Prefix: "punpckl") || // Added in 3.9
305 Name.starts_with(Prefix: "pxor.") || // Added in 3.9
306 Name.starts_with(Prefix: "shuf.f") || // Added in 6.0
307 Name.starts_with(Prefix: "shuf.i") || // Added in 6.0
308 Name.starts_with(Prefix: "shuf.p") || // Added in 4.0
309 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
310 Name.starts_with(Prefix: "store.b.") || // Added in 3.9
311 Name.starts_with(Prefix: "store.d.") || // Added in 3.9
312 Name.starts_with(Prefix: "store.p") || // Added in 3.9
313 Name.starts_with(Prefix: "store.q.") || // Added in 3.9
314 Name.starts_with(Prefix: "store.w.") || // Added in 3.9
315 Name == "store.ss" || // Added in 7.0
316 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
317 Name.starts_with(Prefix: "sub.p") || // Added in 7.0. 128/256 in 4.0
318 Name.starts_with(Prefix: "ucmp.") || // Added in 5.0
319 Name.starts_with(Prefix: "unpckh.") || // Added in 3.9
320 Name.starts_with(Prefix: "unpckl.") || // Added in 3.9
321 Name.starts_with(Prefix: "valign.") || // Added in 4.0
322 Name == "vcvtph2ps.128" || // Added in 11.0
323 Name == "vcvtph2ps.256" || // Added in 11.0
324 Name.starts_with(Prefix: "vextract") || // Added in 4.0
325 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
326 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
327 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
328 Name.starts_with(Prefix: "vfnmsub.") || // Added in 7.0
329 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
330 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
331 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
332 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
333 Name.starts_with(Prefix: "vpermi2var.") || // Added in 7.0
334 Name.starts_with(Prefix: "vpermil.p") || // Added in 3.9
335 Name.starts_with(Prefix: "vpermilvar.") || // Added in 4.0
336 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
337 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
338 Name.starts_with(Prefix: "vpshld.") || // Added in 7.0
339 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
340 Name.starts_with(Prefix: "vpshrd.") || // Added in 7.0
341 Name.starts_with(Prefix: "vpshrdv.") || // Added in 8.0
342 Name.starts_with(Prefix: "vpshufbitqmb.") || // Added in 8.0
343 Name.starts_with(Prefix: "xor.")); // Added in 3.9
344
345 if (Name.consume_front(Prefix: "mask3."))
346 // 'avx512.mask3.*'
347 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
348 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
349 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
350 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
351 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
352
353 if (Name.consume_front(Prefix: "maskz."))
354 // 'avx512.maskz.*'
355 return (Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
356 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
357 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
358 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
359 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
360 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
361 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
362 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
363 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
364 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
365 Name.starts_with(Prefix: "vpshrdv.")); // Added in 8.0
366
367 // 'avx512.*'
368 return (Name == "movntdqa" || // Added in 5.0
369 Name == "pmul.dq.512" || // Added in 7.0
370 Name == "pmulu.dq.512" || // Added in 7.0
371 Name.starts_with(Prefix: "broadcastm") || // Added in 6.0
372 Name.starts_with(Prefix: "cmp.p") || // Added in 12.0
373 Name.starts_with(Prefix: "cvtb2mask.") || // Added in 7.0
374 Name.starts_with(Prefix: "cvtd2mask.") || // Added in 7.0
375 Name.starts_with(Prefix: "cvtmask2") || // Added in 5.0
376 Name.starts_with(Prefix: "cvtq2mask.") || // Added in 7.0
377 Name == "cvtusi2sd" || // Added in 7.0
378 Name.starts_with(Prefix: "cvtw2mask.") || // Added in 7.0
379 Name == "kand.w" || // Added in 7.0
380 Name == "kandn.w" || // Added in 7.0
381 Name == "knot.w" || // Added in 7.0
382 Name == "kor.w" || // Added in 7.0
383 Name == "kortestc.w" || // Added in 7.0
384 Name == "kortestz.w" || // Added in 7.0
385 Name.starts_with(Prefix: "kunpck") || // added in 6.0
386 Name == "kxnor.w" || // Added in 7.0
387 Name == "kxor.w" || // Added in 7.0
388 Name.starts_with(Prefix: "padds.") || // Added in 8.0
389 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.9
390 Name.starts_with(Prefix: "prol") || // Added in 8.0
391 Name.starts_with(Prefix: "pror") || // Added in 8.0
392 Name.starts_with(Prefix: "psll.dq") || // Added in 3.9
393 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.9
394 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
395 Name.starts_with(Prefix: "ptestm") || // Added in 6.0
396 Name.starts_with(Prefix: "ptestnm") || // Added in 6.0
397 Name.starts_with(Prefix: "storent.") || // Added in 3.9
398 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 7.0
399 Name.starts_with(Prefix: "vpshld.") || // Added in 8.0
400 Name.starts_with(Prefix: "vpshrd.")); // Added in 8.0
401 }
402
403 if (Name.consume_front(Prefix: "fma."))
404 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
405 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
406 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
407 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
408 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
409
410 if (Name.consume_front(Prefix: "fma4."))
411 return Name.starts_with(Prefix: "vfmadd.s"); // Added in 7.0
412
413 if (Name.consume_front(Prefix: "sse."))
414 return (Name == "add.ss" || // Added in 4.0
415 Name == "cvtsi2ss" || // Added in 7.0
416 Name == "cvtsi642ss" || // Added in 7.0
417 Name == "div.ss" || // Added in 4.0
418 Name == "mul.ss" || // Added in 4.0
419 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
420 Name == "sqrt.ss" || // Added in 7.0
421 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
422 Name == "sub.ss"); // Added in 4.0
423
424 if (Name.consume_front(Prefix: "sse2."))
425 return (Name == "add.sd" || // Added in 4.0
426 Name == "cvtdq2pd" || // Added in 3.9
427 Name == "cvtdq2ps" || // Added in 7.0
428 Name == "cvtps2pd" || // Added in 3.9
429 Name == "cvtsi2sd" || // Added in 7.0
430 Name == "cvtsi642sd" || // Added in 7.0
431 Name == "cvtss2sd" || // Added in 7.0
432 Name == "div.sd" || // Added in 4.0
433 Name == "mul.sd" || // Added in 4.0
434 Name.starts_with(Prefix: "padds.") || // Added in 8.0
435 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
436 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
437 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
438 Name == "pmaxs.w" || // Added in 3.9
439 Name == "pmaxu.b" || // Added in 3.9
440 Name == "pmins.w" || // Added in 3.9
441 Name == "pminu.b" || // Added in 3.9
442 Name == "pmulu.dq" || // Added in 7.0
443 Name.starts_with(Prefix: "pshuf") || // Added in 3.9
444 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
445 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
446 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
447 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
448 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
449 Name == "sqrt.sd" || // Added in 7.0
450 Name == "storel.dq" || // Added in 3.9
451 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
452 Name == "sub.sd"); // Added in 4.0
453
454 if (Name.consume_front(Prefix: "sse41."))
455 return (Name.starts_with(Prefix: "blendp") || // Added in 3.7
456 Name == "movntdqa" || // Added in 5.0
457 Name == "pblendw" || // Added in 3.7
458 Name == "pmaxsb" || // Added in 3.9
459 Name == "pmaxsd" || // Added in 3.9
460 Name == "pmaxud" || // Added in 3.9
461 Name == "pmaxuw" || // Added in 3.9
462 Name == "pminsb" || // Added in 3.9
463 Name == "pminsd" || // Added in 3.9
464 Name == "pminud" || // Added in 3.9
465 Name == "pminuw" || // Added in 3.9
466 Name.starts_with(Prefix: "pmovsx") || // Added in 3.8
467 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
468 Name == "pmuldq"); // Added in 7.0
469
470 if (Name.consume_front(Prefix: "sse42."))
471 return Name == "crc32.64.8"; // Added in 3.4
472
473 if (Name.consume_front(Prefix: "sse4a."))
474 return Name.starts_with(Prefix: "movnt."); // Added in 3.9
475
476 if (Name.consume_front(Prefix: "ssse3."))
477 return (Name == "pabs.b.128" || // Added in 6.0
478 Name == "pabs.d.128" || // Added in 6.0
479 Name == "pabs.w.128"); // Added in 6.0
480
481 if (Name.consume_front(Prefix: "xop."))
482 return (Name == "vpcmov" || // Added in 3.8
483 Name == "vpcmov.256" || // Added in 5.0
484 Name.starts_with(Prefix: "vpcom") || // Added in 3.2, Updated in 9.0
485 Name.starts_with(Prefix: "vprot")); // Added in 8.0
486
487 return (Name == "addcarry.u32" || // Added in 8.0
488 Name == "addcarry.u64" || // Added in 8.0
489 Name == "addcarryx.u32" || // Added in 8.0
490 Name == "addcarryx.u64" || // Added in 8.0
491 Name == "subborrow.u32" || // Added in 8.0
492 Name == "subborrow.u64" || // Added in 8.0
493 Name.starts_with(Prefix: "vcvtph2ps.")); // Added in 11.0
494}
495
496static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
497 Function *&NewFn) {
498 // Only handle intrinsics that start with "x86.".
499 if (!Name.consume_front(Prefix: "x86."))
500 return false;
501
502 if (shouldUpgradeX86Intrinsic(F, Name)) {
503 NewFn = nullptr;
504 return true;
505 }
506
507 if (Name == "rdtscp") { // Added in 8.0
508 // If this intrinsic has 0 operands, it's the new version.
509 if (F->getFunctionType()->getNumParams() == 0)
510 return false;
511
512 rename(GV: F);
513 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
514 id: Intrinsic::x86_rdtscp);
515 return true;
516 }
517
518 Intrinsic::ID ID;
519
520 // SSE4.1 ptest functions may have an old signature.
521 if (Name.consume_front(Prefix: "sse41.ptest")) { // Added in 3.2
522 ID = StringSwitch<Intrinsic::ID>(Name)
523 .Case(S: "c", Value: Intrinsic::x86_sse41_ptestc)
524 .Case(S: "z", Value: Intrinsic::x86_sse41_ptestz)
525 .Case(S: "nzc", Value: Intrinsic::x86_sse41_ptestnzc)
526 .Default(Value: Intrinsic::not_intrinsic);
527 if (ID != Intrinsic::not_intrinsic)
528 return upgradePTESTIntrinsic(F, IID: ID, NewFn);
529
530 return false;
531 }
532
533 // Several blend and other instructions with masks used the wrong number of
534 // bits.
535
536 // Added in 3.6
537 ID = StringSwitch<Intrinsic::ID>(Name)
538 .Case(S: "sse41.insertps", Value: Intrinsic::x86_sse41_insertps)
539 .Case(S: "sse41.dppd", Value: Intrinsic::x86_sse41_dppd)
540 .Case(S: "sse41.dpps", Value: Intrinsic::x86_sse41_dpps)
541 .Case(S: "sse41.mpsadbw", Value: Intrinsic::x86_sse41_mpsadbw)
542 .Case(S: "avx.dp.ps.256", Value: Intrinsic::x86_avx_dp_ps_256)
543 .Case(S: "avx2.mpsadbw", Value: Intrinsic::x86_avx2_mpsadbw)
544 .Default(Value: Intrinsic::not_intrinsic);
545 if (ID != Intrinsic::not_intrinsic)
546 return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn);
547
548 if (Name.consume_front(Prefix: "avx512.mask.cmp.")) {
549 // Added in 7.0
550 ID = StringSwitch<Intrinsic::ID>(Name)
551 .Case(S: "pd.128", Value: Intrinsic::x86_avx512_mask_cmp_pd_128)
552 .Case(S: "pd.256", Value: Intrinsic::x86_avx512_mask_cmp_pd_256)
553 .Case(S: "pd.512", Value: Intrinsic::x86_avx512_mask_cmp_pd_512)
554 .Case(S: "ps.128", Value: Intrinsic::x86_avx512_mask_cmp_ps_128)
555 .Case(S: "ps.256", Value: Intrinsic::x86_avx512_mask_cmp_ps_256)
556 .Case(S: "ps.512", Value: Intrinsic::x86_avx512_mask_cmp_ps_512)
557 .Default(Value: Intrinsic::not_intrinsic);
558 if (ID != Intrinsic::not_intrinsic)
559 return upgradeX86MaskedFPCompare(F, IID: ID, NewFn);
560 return false; // No other 'x86.avx523.mask.cmp.*'.
561 }
562
563 if (Name.consume_front(Prefix: "avx512bf16.")) {
564 // Added in 9.0
565 ID = StringSwitch<Intrinsic::ID>(Name)
566 .Case(S: "cvtne2ps2bf16.128",
567 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
568 .Case(S: "cvtne2ps2bf16.256",
569 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
570 .Case(S: "cvtne2ps2bf16.512",
571 Value: Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
572 .Case(S: "mask.cvtneps2bf16.128",
573 Value: Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
574 .Case(S: "cvtneps2bf16.256",
575 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
576 .Case(S: "cvtneps2bf16.512",
577 Value: Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
578 .Default(Value: Intrinsic::not_intrinsic);
579 if (ID != Intrinsic::not_intrinsic)
580 return upgradeX86BF16Intrinsic(F, IID: ID, NewFn);
581
582 // Added in 9.0
583 ID = StringSwitch<Intrinsic::ID>(Name)
584 .Case(S: "dpbf16ps.128", Value: Intrinsic::x86_avx512bf16_dpbf16ps_128)
585 .Case(S: "dpbf16ps.256", Value: Intrinsic::x86_avx512bf16_dpbf16ps_256)
586 .Case(S: "dpbf16ps.512", Value: Intrinsic::x86_avx512bf16_dpbf16ps_512)
587 .Default(Value: Intrinsic::not_intrinsic);
588 if (ID != Intrinsic::not_intrinsic)
589 return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn);
590 return false; // No other 'x86.avx512bf16.*'.
591 }
592
593 if (Name.consume_front(Prefix: "xop.")) {
594 Intrinsic::ID ID = Intrinsic::not_intrinsic;
595 if (Name.starts_with(Prefix: "vpermil2")) { // Added in 3.9
596 // Upgrade any XOP PERMIL2 index operand still using a float/double
597 // vector.
598 auto Idx = F->getFunctionType()->getParamType(i: 2);
599 if (Idx->isFPOrFPVectorTy()) {
600 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
601 unsigned EltSize = Idx->getScalarSizeInBits();
602 if (EltSize == 64 && IdxSize == 128)
603 ID = Intrinsic::x86_xop_vpermil2pd;
604 else if (EltSize == 32 && IdxSize == 128)
605 ID = Intrinsic::x86_xop_vpermil2ps;
606 else if (EltSize == 64 && IdxSize == 256)
607 ID = Intrinsic::x86_xop_vpermil2pd_256;
608 else
609 ID = Intrinsic::x86_xop_vpermil2ps_256;
610 }
611 } else if (F->arg_size() == 2)
612 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
613 ID = StringSwitch<Intrinsic::ID>(Name)
614 .Case(S: "vfrcz.ss", Value: Intrinsic::x86_xop_vfrcz_ss)
615 .Case(S: "vfrcz.sd", Value: Intrinsic::x86_xop_vfrcz_sd)
616 .Default(Value: Intrinsic::not_intrinsic);
617
618 if (ID != Intrinsic::not_intrinsic) {
619 rename(GV: F);
620 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
621 return true;
622 }
623 return false; // No other 'x86.xop.*'
624 }
625
626 if (Name == "seh.recoverfp") {
627 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
628 id: Intrinsic::eh_recoverfp);
629 return true;
630 }
631
632 return false;
633}
634
635// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
636// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
637static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
638 StringRef Name,
639 Function *&NewFn) {
640 if (Name.starts_with(Prefix: "rbit")) {
641 // '(arm|aarch64).rbit'.
642 NewFn = Intrinsic::getOrInsertDeclaration(
643 M: F->getParent(), id: Intrinsic::bitreverse, Tys: F->arg_begin()->getType());
644 return true;
645 }
646
647 if (Name == "thread.pointer") {
648 // '(arm|aarch64).thread.pointer'.
649 NewFn = Intrinsic::getOrInsertDeclaration(
650 M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType());
651 return true;
652 }
653
654 bool Neon = Name.consume_front(Prefix: "neon.");
655 if (Neon) {
656 // '(arm|aarch64).neon.*'.
657 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
658 // v16i8 respectively.
659 if (Name.consume_front(Prefix: "bfdot.")) {
660 // (arm|aarch64).neon.bfdot.*'.
661 Intrinsic::ID ID =
662 StringSwitch<Intrinsic::ID>(Name)
663 .Cases(S0: "v2f32.v8i8", S1: "v4f32.v16i8",
664 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
665 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
666 .Default(Value: Intrinsic::not_intrinsic);
667 if (ID != Intrinsic::not_intrinsic) {
668 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
669 assert((OperandWidth == 64 || OperandWidth == 128) &&
670 "Unexpected operand width");
671 LLVMContext &Ctx = F->getParent()->getContext();
672 std::array<Type *, 2> Tys{
673 ._M_elems: {F->getReturnType(),
674 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}};
675 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
676 return true;
677 }
678 return false; // No other '(arm|aarch64).neon.bfdot.*'.
679 }
680
681 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
682 // anymore and accept v8bf16 instead of v16i8.
683 if (Name.consume_front(Prefix: "bfm")) {
684 // (arm|aarch64).neon.bfm*'.
685 if (Name.consume_back(Suffix: ".v4f32.v16i8")) {
686 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
687 Intrinsic::ID ID =
688 StringSwitch<Intrinsic::ID>(Name)
689 .Case(S: "mla",
690 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
691 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
692 .Case(S: "lalb",
693 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
694 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
695 .Case(S: "lalt",
696 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
697 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
698 .Default(Value: Intrinsic::not_intrinsic);
699 if (ID != Intrinsic::not_intrinsic) {
700 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
701 return true;
702 }
703 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
704 }
705 return false; // No other '(arm|aarch64).neon.bfm*.
706 }
707 // Continue on to Aarch64 Neon or Arm Neon.
708 }
709 // Continue on to Arm or Aarch64.
710
711 if (IsArm) {
712 // 'arm.*'.
713 if (Neon) {
714 // 'arm.neon.*'.
715 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
716 .StartsWith(S: "vclz.", Value: Intrinsic::ctlz)
717 .StartsWith(S: "vcnt.", Value: Intrinsic::ctpop)
718 .StartsWith(S: "vqadds.", Value: Intrinsic::sadd_sat)
719 .StartsWith(S: "vqaddu.", Value: Intrinsic::uadd_sat)
720 .StartsWith(S: "vqsubs.", Value: Intrinsic::ssub_sat)
721 .StartsWith(S: "vqsubu.", Value: Intrinsic::usub_sat)
722 .StartsWith(S: "vrinta.", Value: Intrinsic::round)
723 .StartsWith(S: "vrintm.", Value: Intrinsic::floor)
724 .StartsWith(S: "vrintp.", Value: Intrinsic::ceil)
725 .StartsWith(S: "vrintx.", Value: Intrinsic::rint)
726 .StartsWith(S: "vrintz.", Value: Intrinsic::trunc)
727 .Default(Value: Intrinsic::not_intrinsic);
728 if (ID != Intrinsic::not_intrinsic) {
729 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
730 Tys: F->arg_begin()->getType());
731 return true;
732 }
733
734 if (Name.consume_front(Prefix: "vst")) {
735 // 'arm.neon.vst*'.
736 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
737 SmallVector<StringRef, 2> Groups;
738 if (vstRegex.match(String: Name, Matches: &Groups)) {
739 static const Intrinsic::ID StoreInts[] = {
740 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
741 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
742
743 static const Intrinsic::ID StoreLaneInts[] = {
744 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
745 Intrinsic::arm_neon_vst4lane};
746
747 auto fArgs = F->getFunctionType()->params();
748 Type *Tys[] = {fArgs[0], fArgs[1]};
749 if (Groups[1].size() == 1)
750 NewFn = Intrinsic::getOrInsertDeclaration(
751 M: F->getParent(), id: StoreInts[fArgs.size() - 3], Tys);
752 else
753 NewFn = Intrinsic::getOrInsertDeclaration(
754 M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys);
755 return true;
756 }
757 return false; // No other 'arm.neon.vst*'.
758 }
759
760 return false; // No other 'arm.neon.*'.
761 }
762
763 if (Name.consume_front(Prefix: "mve.")) {
764 // 'arm.mve.*'.
765 if (Name == "vctp64") {
766 if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) {
767 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
768 // the function and deal with it below in UpgradeIntrinsicCall.
769 rename(GV: F);
770 return true;
771 }
772 return false; // Not 'arm.mve.vctp64'.
773 }
774
775 if (Name.starts_with(Prefix: "vrintn.v")) {
776 NewFn = Intrinsic::getOrInsertDeclaration(
777 M: F->getParent(), id: Intrinsic::roundeven, Tys: F->arg_begin()->getType());
778 return true;
779 }
780
781 // These too are changed to accept a v2i1 instead of the old v4i1.
782 if (Name.consume_back(Suffix: ".v4i1")) {
783 // 'arm.mve.*.v4i1'.
784 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32"))
785 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
786 return Name == "mull.int" || Name == "vqdmull";
787
788 if (Name.consume_back(Suffix: ".v2i64")) {
789 // 'arm.mve.*.v2i64.v4i1'
790 bool IsGather = Name.consume_front(Prefix: "vldr.gather.");
791 if (IsGather || Name.consume_front(Prefix: "vstr.scatter.")) {
792 if (Name.consume_front(Prefix: "base.")) {
793 // Optional 'wb.' prefix.
794 Name.consume_front(Prefix: "wb.");
795 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
796 // predicated.v2i64.v2i64.v4i1'.
797 return Name == "predicated.v2i64";
798 }
799
800 if (Name.consume_front(Prefix: "offset.predicated."))
801 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
802 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
803
804 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
805 return false;
806 }
807
808 return false; // No other 'arm.mve.*.v2i64.v4i1'.
809 }
810 return false; // No other 'arm.mve.*.v4i1'.
811 }
812 return false; // No other 'arm.mve.*'.
813 }
814
815 if (Name.consume_front(Prefix: "cde.vcx")) {
816 // 'arm.cde.vcx*'.
817 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1"))
818 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
819 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
820 Name == "3q" || Name == "3qa";
821
822 return false; // No other 'arm.cde.vcx*'.
823 }
824 } else {
825 // 'aarch64.*'.
826 if (Neon) {
827 // 'aarch64.neon.*'.
828 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
829 .StartsWith(S: "frintn", Value: Intrinsic::roundeven)
830 .StartsWith(S: "rbit", Value: Intrinsic::bitreverse)
831 .Default(Value: Intrinsic::not_intrinsic);
832 if (ID != Intrinsic::not_intrinsic) {
833 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
834 Tys: F->arg_begin()->getType());
835 return true;
836 }
837
838 if (Name.starts_with(Prefix: "addp")) {
839 // 'aarch64.neon.addp*'.
840 if (F->arg_size() != 2)
841 return false; // Invalid IR.
842 VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType());
843 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
844 NewFn = Intrinsic::getOrInsertDeclaration(
845 M: F->getParent(), id: Intrinsic::aarch64_neon_faddp, Tys: Ty);
846 return true;
847 }
848 }
849
850 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
851 if (Name.starts_with(Prefix: "bfcvt")) {
852 NewFn = nullptr;
853 return true;
854 }
855
856 return false; // No other 'aarch64.neon.*'.
857 }
858 if (Name.consume_front(Prefix: "sve.")) {
859 // 'aarch64.sve.*'.
860 if (Name.consume_front(Prefix: "bf")) {
861 if (Name.consume_back(Suffix: ".lane")) {
862 // 'aarch64.sve.bf*.lane'.
863 Intrinsic::ID ID =
864 StringSwitch<Intrinsic::ID>(Name)
865 .Case(S: "dot", Value: Intrinsic::aarch64_sve_bfdot_lane_v2)
866 .Case(S: "mlalb", Value: Intrinsic::aarch64_sve_bfmlalb_lane_v2)
867 .Case(S: "mlalt", Value: Intrinsic::aarch64_sve_bfmlalt_lane_v2)
868 .Default(Value: Intrinsic::not_intrinsic);
869 if (ID != Intrinsic::not_intrinsic) {
870 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
871 return true;
872 }
873 return false; // No other 'aarch64.sve.bf*.lane'.
874 }
875 return false; // No other 'aarch64.sve.bf*'.
876 }
877
878 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
879 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
880 NewFn = nullptr;
881 return true;
882 }
883
884 if (Name.consume_front(Prefix: "addqv")) {
885 // 'aarch64.sve.addqv'.
886 if (!F->getReturnType()->isFPOrFPVectorTy())
887 return false;
888
889 auto Args = F->getFunctionType()->params();
890 Type *Tys[] = {F->getReturnType(), Args[1]};
891 NewFn = Intrinsic::getOrInsertDeclaration(
892 M: F->getParent(), id: Intrinsic::aarch64_sve_faddqv, Tys);
893 return true;
894 }
895
896 if (Name.consume_front(Prefix: "ld")) {
897 // 'aarch64.sve.ld*'.
898 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
899 if (LdRegex.match(String: Name)) {
900 Type *ScalarTy =
901 cast<VectorType>(Val: F->getReturnType())->getElementType();
902 ElementCount EC =
903 cast<VectorType>(Val: F->arg_begin()->getType())->getElementCount();
904 Type *Ty = VectorType::get(ElementType: ScalarTy, EC);
905 static const Intrinsic::ID LoadIDs[] = {
906 Intrinsic::aarch64_sve_ld2_sret,
907 Intrinsic::aarch64_sve_ld3_sret,
908 Intrinsic::aarch64_sve_ld4_sret,
909 };
910 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
911 id: LoadIDs[Name[0] - '2'], Tys: Ty);
912 return true;
913 }
914 return false; // No other 'aarch64.sve.ld*'.
915 }
916
917 if (Name.consume_front(Prefix: "tuple.")) {
918 // 'aarch64.sve.tuple.*'.
919 if (Name.starts_with(Prefix: "get")) {
920 // 'aarch64.sve.tuple.get*'.
921 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
922 NewFn = Intrinsic::getOrInsertDeclaration(
923 M: F->getParent(), id: Intrinsic::vector_extract, Tys);
924 return true;
925 }
926
927 if (Name.starts_with(Prefix: "set")) {
928 // 'aarch64.sve.tuple.set*'.
929 auto Args = F->getFunctionType()->params();
930 Type *Tys[] = {Args[0], Args[2], Args[1]};
931 NewFn = Intrinsic::getOrInsertDeclaration(
932 M: F->getParent(), id: Intrinsic::vector_insert, Tys);
933 return true;
934 }
935
936 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
937 if (CreateTupleRegex.match(String: Name)) {
938 // 'aarch64.sve.tuple.create*'.
939 auto Args = F->getFunctionType()->params();
940 Type *Tys[] = {F->getReturnType(), Args[1]};
941 NewFn = Intrinsic::getOrInsertDeclaration(
942 M: F->getParent(), id: Intrinsic::vector_insert, Tys);
943 return true;
944 }
945 return false; // No other 'aarch64.sve.tuple.*'.
946 }
947 return false; // No other 'aarch64.sve.*'.
948 }
949 }
950 return false; // No other 'arm.*', 'aarch64.*'.
951}
952
953static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F,
954 StringRef Name) {
955 if (Name.consume_front(Prefix: "cp.async.bulk.tensor.g2s.")) {
956 Intrinsic::ID ID =
957 StringSwitch<Intrinsic::ID>(Name)
958 .Case(S: "im2col.3d",
959 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
960 .Case(S: "im2col.4d",
961 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
962 .Case(S: "im2col.5d",
963 Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
964 .Case(S: "tile.1d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
965 .Case(S: "tile.2d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
966 .Case(S: "tile.3d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
967 .Case(S: "tile.4d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
968 .Case(S: "tile.5d", Value: Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
969 .Default(Value: Intrinsic::not_intrinsic);
970
971 if (ID == Intrinsic::not_intrinsic)
972 return ID;
973
974 // These intrinsics may need upgrade for two reasons:
975 // (1) When the address-space of the first argument is shared[AS=3]
976 // (and we upgrade it to use shared_cluster address-space[AS=7])
977 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
978 NVPTXAS::ADDRESS_SPACE_SHARED)
979 return ID;
980
981 // (2) When there are only two boolean flag arguments at the end:
982 //
983 // The last three parameters of the older version of these
984 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
985 //
986 // The newer version reads as:
987 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
988 //
989 // So, when the type of the [N-3]rd argument is "not i1", then
990 // it is the older version and we need to upgrade.
991 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
992 Type *ArgType = F->getFunctionType()->getParamType(i: FlagStartIndex);
993 if (!ArgType->isIntegerTy(Bitwidth: 1))
994 return ID;
995 }
996
997 return Intrinsic::not_intrinsic;
998}
999
1000static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F,
1001 StringRef Name) {
1002 if (Name.consume_front(Prefix: "mapa.shared.cluster"))
1003 if (F->getReturnType()->getPointerAddressSpace() ==
1004 NVPTXAS::ADDRESS_SPACE_SHARED)
1005 return Intrinsic::nvvm_mapa_shared_cluster;
1006
1007 if (Name.consume_front(Prefix: "cp.async.bulk.")) {
1008 Intrinsic::ID ID =
1009 StringSwitch<Intrinsic::ID>(Name)
1010 .Case(S: "global.to.shared.cluster",
1011 Value: Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1012 .Case(S: "shared.cta.to.cluster",
1013 Value: Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1014 .Default(Value: Intrinsic::not_intrinsic);
1015
1016 if (ID != Intrinsic::not_intrinsic)
1017 if (F->getArg(i: 0)->getType()->getPointerAddressSpace() ==
1018 NVPTXAS::ADDRESS_SPACE_SHARED)
1019 return ID;
1020 }
1021
1022 return Intrinsic::not_intrinsic;
1023}
1024
1025static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
1026 if (Name.consume_front(Prefix: "fma.rn."))
1027 return StringSwitch<Intrinsic::ID>(Name)
1028 .Case(S: "bf16", Value: Intrinsic::nvvm_fma_rn_bf16)
1029 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fma_rn_bf16x2)
1030 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fma_rn_ftz_bf16)
1031 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1032 .Case(S: "ftz.relu.bf16", Value: Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1033 .Case(S: "ftz.relu.bf16x2", Value: Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1034 .Case(S: "ftz.sat.bf16", Value: Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1035 .Case(S: "ftz.sat.bf16x2", Value: Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1036 .Case(S: "relu.bf16", Value: Intrinsic::nvvm_fma_rn_relu_bf16)
1037 .Case(S: "relu.bf16x2", Value: Intrinsic::nvvm_fma_rn_relu_bf16x2)
1038 .Case(S: "sat.bf16", Value: Intrinsic::nvvm_fma_rn_sat_bf16)
1039 .Case(S: "sat.bf16x2", Value: Intrinsic::nvvm_fma_rn_sat_bf16x2)
1040 .Default(Value: Intrinsic::not_intrinsic);
1041
1042 if (Name.consume_front(Prefix: "fmax."))
1043 return StringSwitch<Intrinsic::ID>(Name)
1044 .Case(S: "bf16", Value: Intrinsic::nvvm_fmax_bf16)
1045 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmax_bf16x2)
1046 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmax_ftz_bf16)
1047 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_bf16x2)
1048 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16)
1049 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1050 .Case(S: "ftz.nan.xorsign.abs.bf16",
1051 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1052 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1053 Value: Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1054 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1055 .Case(S: "ftz.xorsign.abs.bf16x2",
1056 Value: Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1057 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmax_nan_bf16)
1058 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmax_nan_bf16x2)
1059 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1060 .Case(S: "nan.xorsign.abs.bf16x2",
1061 Value: Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1062 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1063 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1064 .Default(Value: Intrinsic::not_intrinsic);
1065
1066 if (Name.consume_front(Prefix: "fmin."))
1067 return StringSwitch<Intrinsic::ID>(Name)
1068 .Case(S: "bf16", Value: Intrinsic::nvvm_fmin_bf16)
1069 .Case(S: "bf16x2", Value: Intrinsic::nvvm_fmin_bf16x2)
1070 .Case(S: "ftz.bf16", Value: Intrinsic::nvvm_fmin_ftz_bf16)
1071 .Case(S: "ftz.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_bf16x2)
1072 .Case(S: "ftz.nan.bf16", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16)
1073 .Case(S: "ftz.nan.bf16x2", Value: Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1074 .Case(S: "ftz.nan.xorsign.abs.bf16",
1075 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1076 .Case(S: "ftz.nan.xorsign.abs.bf16x2",
1077 Value: Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1078 .Case(S: "ftz.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1079 .Case(S: "ftz.xorsign.abs.bf16x2",
1080 Value: Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1081 .Case(S: "nan.bf16", Value: Intrinsic::nvvm_fmin_nan_bf16)
1082 .Case(S: "nan.bf16x2", Value: Intrinsic::nvvm_fmin_nan_bf16x2)
1083 .Case(S: "nan.xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1084 .Case(S: "nan.xorsign.abs.bf16x2",
1085 Value: Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1086 .Case(S: "xorsign.abs.bf16", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1087 .Case(S: "xorsign.abs.bf16x2", Value: Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1088 .Default(Value: Intrinsic::not_intrinsic);
1089
1090 if (Name.consume_front(Prefix: "neg."))
1091 return StringSwitch<Intrinsic::ID>(Name)
1092 .Case(S: "bf16", Value: Intrinsic::nvvm_neg_bf16)
1093 .Case(S: "bf16x2", Value: Intrinsic::nvvm_neg_bf16x2)
1094 .Default(Value: Intrinsic::not_intrinsic);
1095
1096 return Intrinsic::not_intrinsic;
1097}
1098
1099static bool consumeNVVMPtrAddrSpace(StringRef &Name) {
1100 return Name.consume_front(Prefix: "local") || Name.consume_front(Prefix: "shared") ||
1101 Name.consume_front(Prefix: "global") || Name.consume_front(Prefix: "constant") ||
1102 Name.consume_front(Prefix: "param");
1103}
1104
1105static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1106 bool CanUpgradeDebugIntrinsicsToRecords) {
1107 assert(F && "Illegal to upgrade a non-existent Function.");
1108
1109 StringRef Name = F->getName();
1110
1111 // Quickly eliminate it, if it's not a candidate.
1112 if (!Name.consume_front(Prefix: "llvm.") || Name.empty())
1113 return false;
1114
1115 switch (Name[0]) {
1116 default: break;
1117 case 'a': {
1118 bool IsArm = Name.consume_front(Prefix: "arm.");
1119 if (IsArm || Name.consume_front(Prefix: "aarch64.")) {
1120 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1121 return true;
1122 break;
1123 }
1124
1125 if (Name.consume_front(Prefix: "amdgcn.")) {
1126 if (Name == "alignbit") {
1127 // Target specific intrinsic became redundant
1128 NewFn = Intrinsic::getOrInsertDeclaration(
1129 M: F->getParent(), id: Intrinsic::fshr, Tys: {F->getReturnType()});
1130 return true;
1131 }
1132
1133 if (Name.consume_front(Prefix: "atomic.")) {
1134 if (Name.starts_with(Prefix: "inc") || Name.starts_with(Prefix: "dec")) {
1135 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1136 // there's no new declaration.
1137 NewFn = nullptr;
1138 return true;
1139 }
1140 break; // No other 'amdgcn.atomic.*'
1141 }
1142
1143 if (Name.consume_front(Prefix: "ds.") || Name.consume_front(Prefix: "global.atomic.") ||
1144 Name.consume_front(Prefix: "flat.atomic.")) {
1145 if (Name.starts_with(Prefix: "fadd") ||
1146 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1147 (Name.starts_with(Prefix: "fmin") && !Name.starts_with(Prefix: "fmin.num")) ||
1148 (Name.starts_with(Prefix: "fmax") && !Name.starts_with(Prefix: "fmax.num"))) {
1149 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1150 // declaration.
1151 NewFn = nullptr;
1152 return true;
1153 }
1154 }
1155
1156 if (Name.starts_with(Prefix: "ldexp.")) {
1157 // Target specific intrinsic became redundant
1158 NewFn = Intrinsic::getOrInsertDeclaration(
1159 M: F->getParent(), id: Intrinsic::ldexp,
1160 Tys: {F->getReturnType(), F->getArg(i: 1)->getType()});
1161 return true;
1162 }
1163 break; // No other 'amdgcn.*'
1164 }
1165
1166 break;
1167 }
1168 case 'c': {
1169 if (F->arg_size() == 1) {
1170 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1171 .StartsWith(S: "ctlz.", Value: Intrinsic::ctlz)
1172 .StartsWith(S: "cttz.", Value: Intrinsic::cttz)
1173 .Default(Value: Intrinsic::not_intrinsic);
1174 if (ID != Intrinsic::not_intrinsic) {
1175 rename(GV: F);
1176 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1177 Tys: F->arg_begin()->getType());
1178 return true;
1179 }
1180 }
1181
1182 if (F->arg_size() == 2 && Name == "coro.end") {
1183 rename(GV: F);
1184 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1185 id: Intrinsic::coro_end);
1186 return true;
1187 }
1188
1189 break;
1190 }
1191 case 'd':
1192 if (Name.consume_front(Prefix: "dbg.")) {
1193 // Mark debug intrinsics for upgrade to new debug format.
1194 if (CanUpgradeDebugIntrinsicsToRecords) {
1195 if (Name == "addr" || Name == "value" || Name == "assign" ||
1196 Name == "declare" || Name == "label") {
1197 // There's no function to replace these with.
1198 NewFn = nullptr;
1199 // But we do want these to get upgraded.
1200 return true;
1201 }
1202 }
1203 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1204 // converted to DbgVariableRecords later.
1205 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1206 rename(GV: F);
1207 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1208 id: Intrinsic::dbg_value);
1209 return true;
1210 }
1211 break; // No other 'dbg.*'.
1212 }
1213 break;
1214 case 'e':
1215 if (Name.consume_front(Prefix: "experimental.vector.")) {
1216 Intrinsic::ID ID =
1217 StringSwitch<Intrinsic::ID>(Name)
1218 // Skip over extract.last.active, otherwise it will be 'upgraded'
1219 // to a regular vector extract which is a different operation.
1220 .StartsWith(S: "extract.last.active.", Value: Intrinsic::not_intrinsic)
1221 .StartsWith(S: "extract.", Value: Intrinsic::vector_extract)
1222 .StartsWith(S: "insert.", Value: Intrinsic::vector_insert)
1223 .StartsWith(S: "splice.", Value: Intrinsic::vector_splice)
1224 .StartsWith(S: "reverse.", Value: Intrinsic::vector_reverse)
1225 .StartsWith(S: "interleave2.", Value: Intrinsic::vector_interleave2)
1226 .StartsWith(S: "deinterleave2.", Value: Intrinsic::vector_deinterleave2)
1227 .Default(Value: Intrinsic::not_intrinsic);
1228 if (ID != Intrinsic::not_intrinsic) {
1229 const auto *FT = F->getFunctionType();
1230 SmallVector<Type *, 2> Tys;
1231 if (ID == Intrinsic::vector_extract ||
1232 ID == Intrinsic::vector_interleave2)
1233 // Extracting overloads the return type.
1234 Tys.push_back(Elt: FT->getReturnType());
1235 if (ID != Intrinsic::vector_interleave2)
1236 Tys.push_back(Elt: FT->getParamType(i: 0));
1237 if (ID == Intrinsic::vector_insert)
1238 // Inserting overloads the inserted type.
1239 Tys.push_back(Elt: FT->getParamType(i: 1));
1240 rename(GV: F);
1241 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
1242 return true;
1243 }
1244
1245 if (Name.consume_front(Prefix: "reduce.")) {
1246 SmallVector<StringRef, 2> Groups;
1247 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1248 if (R.match(String: Name, Matches: &Groups))
1249 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1250 .Case(S: "add", Value: Intrinsic::vector_reduce_add)
1251 .Case(S: "mul", Value: Intrinsic::vector_reduce_mul)
1252 .Case(S: "and", Value: Intrinsic::vector_reduce_and)
1253 .Case(S: "or", Value: Intrinsic::vector_reduce_or)
1254 .Case(S: "xor", Value: Intrinsic::vector_reduce_xor)
1255 .Case(S: "smax", Value: Intrinsic::vector_reduce_smax)
1256 .Case(S: "smin", Value: Intrinsic::vector_reduce_smin)
1257 .Case(S: "umax", Value: Intrinsic::vector_reduce_umax)
1258 .Case(S: "umin", Value: Intrinsic::vector_reduce_umin)
1259 .Case(S: "fmax", Value: Intrinsic::vector_reduce_fmax)
1260 .Case(S: "fmin", Value: Intrinsic::vector_reduce_fmin)
1261 .Default(Value: Intrinsic::not_intrinsic);
1262
1263 bool V2 = false;
1264 if (ID == Intrinsic::not_intrinsic) {
1265 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1266 Groups.clear();
1267 V2 = true;
1268 if (R2.match(String: Name, Matches: &Groups))
1269 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1270 .Case(S: "fadd", Value: Intrinsic::vector_reduce_fadd)
1271 .Case(S: "fmul", Value: Intrinsic::vector_reduce_fmul)
1272 .Default(Value: Intrinsic::not_intrinsic);
1273 }
1274 if (ID != Intrinsic::not_intrinsic) {
1275 rename(GV: F);
1276 auto Args = F->getFunctionType()->params();
1277 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1278 Tys: {Args[V2 ? 1 : 0]});
1279 return true;
1280 }
1281 break; // No other 'expermental.vector.reduce.*'.
1282 }
1283 break; // No other 'experimental.vector.*'.
1284 }
1285 if (Name.consume_front(Prefix: "experimental.stepvector.")) {
1286 Intrinsic::ID ID = Intrinsic::stepvector;
1287 rename(GV: F);
1288 NewFn = Intrinsic::getOrInsertDeclaration(
1289 M: F->getParent(), id: ID, Tys: F->getFunctionType()->getReturnType());
1290 return true;
1291 }
1292 break; // No other 'e*'.
1293 case 'f':
1294 if (Name.starts_with(Prefix: "flt.rounds")) {
1295 rename(GV: F);
1296 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1297 id: Intrinsic::get_rounding);
1298 return true;
1299 }
1300 break;
1301 case 'i':
1302 if (Name.starts_with(Prefix: "invariant.group.barrier")) {
1303 // Rename invariant.group.barrier to launder.invariant.group
1304 auto Args = F->getFunctionType()->params();
1305 Type* ObjectPtr[1] = {Args[0]};
1306 rename(GV: F);
1307 NewFn = Intrinsic::getOrInsertDeclaration(
1308 M: F->getParent(), id: Intrinsic::launder_invariant_group, Tys: ObjectPtr);
1309 return true;
1310 }
1311 break;
1312 case 'm': {
1313 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1314 // alignment parameter to embedding the alignment as an attribute of
1315 // the pointer args.
1316 if (unsigned ID = StringSwitch<unsigned>(Name)
1317 .StartsWith(S: "memcpy.", Value: Intrinsic::memcpy)
1318 .StartsWith(S: "memmove.", Value: Intrinsic::memmove)
1319 .Default(Value: 0)) {
1320 if (F->arg_size() == 5) {
1321 rename(GV: F);
1322 // Get the types of dest, src, and len
1323 ArrayRef<Type *> ParamTypes =
1324 F->getFunctionType()->params().slice(N: 0, M: 3);
1325 NewFn =
1326 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes);
1327 return true;
1328 }
1329 }
1330 if (Name.starts_with(Prefix: "memset.") && F->arg_size() == 5) {
1331 rename(GV: F);
1332 // Get the types of dest, and len
1333 const auto *FT = F->getFunctionType();
1334 Type *ParamTypes[2] = {
1335 FT->getParamType(i: 0), // Dest
1336 FT->getParamType(i: 2) // len
1337 };
1338 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1339 id: Intrinsic::memset, Tys: ParamTypes);
1340 return true;
1341 }
1342 break;
1343 }
1344 case 'n': {
1345 if (Name.consume_front(Prefix: "nvvm.")) {
1346 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1347 if (F->arg_size() == 1) {
1348 Intrinsic::ID IID =
1349 StringSwitch<Intrinsic::ID>(Name)
1350 .Cases(S0: "brev32", S1: "brev64", Value: Intrinsic::bitreverse)
1351 .Case(S: "clz.i", Value: Intrinsic::ctlz)
1352 .Case(S: "popc.i", Value: Intrinsic::ctpop)
1353 .Default(Value: Intrinsic::not_intrinsic);
1354 if (IID != Intrinsic::not_intrinsic) {
1355 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID,
1356 Tys: {F->getReturnType()});
1357 return true;
1358 }
1359 }
1360
1361 // Check for nvvm intrinsics that need a return type adjustment.
1362 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1363 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1364 if (IID != Intrinsic::not_intrinsic) {
1365 NewFn = nullptr;
1366 return true;
1367 }
1368 }
1369
1370 // Upgrade Distributed Shared Memory Intrinsics
1371 Intrinsic::ID IID = shouldUpgradeNVPTXSharedClusterIntrinsic(F, Name);
1372 if (IID != Intrinsic::not_intrinsic) {
1373 rename(GV: F);
1374 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1375 return true;
1376 }
1377
1378 // Upgrade TMA copy G2S Intrinsics
1379 IID = shouldUpgradeNVPTXTMAG2SIntrinsics(F, Name);
1380 if (IID != Intrinsic::not_intrinsic) {
1381 rename(GV: F);
1382 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
1383 return true;
1384 }
1385
1386 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1387 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1388 //
1389 // TODO: We could add lohi.i2d.
1390 bool Expand = false;
1391 if (Name.consume_front(Prefix: "abs."))
1392 // nvvm.abs.{i,ii}
1393 Expand =
1394 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1395 else if (Name.consume_front(Prefix: "fabs."))
1396 // nvvm.fabs.{f,ftz.f,d}
1397 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1398 else if (Name.consume_front(Prefix: "max.") || Name.consume_front(Prefix: "min."))
1399 // nvvm.{min,max}.{i,ii,ui,ull}
1400 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1401 Name == "ui" || Name == "ull";
1402 else if (Name.consume_front(Prefix: "atomic.load."))
1403 // nvvm.atomic.load.add.{f32,f64}.p
1404 // nvvm.atomic.load.{inc,dec}.32.p
1405 Expand = StringSwitch<bool>(Name)
1406 .StartsWith(S: "add.f32.p", Value: true)
1407 .StartsWith(S: "add.f64.p", Value: true)
1408 .StartsWith(S: "inc.32.p", Value: true)
1409 .StartsWith(S: "dec.32.p", Value: true)
1410 .Default(Value: false);
1411 else if (Name.consume_front(Prefix: "bitcast."))
1412 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1413 Expand =
1414 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1415 else if (Name.consume_front(Prefix: "rotate."))
1416 // nvvm.rotate.{b32,b64,right.b64}
1417 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1418 else if (Name.consume_front(Prefix: "ptr.gen.to."))
1419 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1420 Expand = consumeNVVMPtrAddrSpace(Name);
1421 else if (Name.consume_front(Prefix: "ptr."))
1422 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1423 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(Prefix: ".to.gen");
1424 else if (Name.consume_front(Prefix: "ldg.global."))
1425 // nvvm.ldg.global.{i,p,f}
1426 Expand = (Name.starts_with(Prefix: "i.") || Name.starts_with(Prefix: "f.") ||
1427 Name.starts_with(Prefix: "p."));
1428 else
1429 Expand = StringSwitch<bool>(Name)
1430 .Case(S: "barrier0", Value: true)
1431 .Case(S: "barrier.n", Value: true)
1432 .Case(S: "barrier.sync.cnt", Value: true)
1433 .Case(S: "barrier.sync", Value: true)
1434 .Case(S: "barrier", Value: true)
1435 .Case(S: "bar.sync", Value: true)
1436 .Case(S: "clz.ll", Value: true)
1437 .Case(S: "popc.ll", Value: true)
1438 .Case(S: "h2f", Value: true)
1439 .Case(S: "swap.lo.hi.b64", Value: true)
1440 .Default(Value: false);
1441
1442 if (Expand) {
1443 NewFn = nullptr;
1444 return true;
1445 }
1446 break; // No other 'nvvm.*'.
1447 }
1448 break;
1449 }
1450 case 'o':
1451 if (Name.starts_with(Prefix: "objectsize.")) {
1452 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1453 if (F->arg_size() == 2 || F->arg_size() == 3) {
1454 rename(GV: F);
1455 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(),
1456 id: Intrinsic::objectsize, Tys);
1457 return true;
1458 }
1459 }
1460 break;
1461
1462 case 'p':
1463 if (Name.starts_with(Prefix: "ptr.annotation.") && F->arg_size() == 4) {
1464 rename(GV: F);
1465 NewFn = Intrinsic::getOrInsertDeclaration(
1466 M: F->getParent(), id: Intrinsic::ptr_annotation,
1467 Tys: {F->arg_begin()->getType(), F->getArg(i: 1)->getType()});
1468 return true;
1469 }
1470 break;
1471
1472 case 'r': {
1473 if (Name.consume_front(Prefix: "riscv.")) {
1474 Intrinsic::ID ID;
1475 ID = StringSwitch<Intrinsic::ID>(Name)
1476 .Case(S: "aes32dsi", Value: Intrinsic::riscv_aes32dsi)
1477 .Case(S: "aes32dsmi", Value: Intrinsic::riscv_aes32dsmi)
1478 .Case(S: "aes32esi", Value: Intrinsic::riscv_aes32esi)
1479 .Case(S: "aes32esmi", Value: Intrinsic::riscv_aes32esmi)
1480 .Default(Value: Intrinsic::not_intrinsic);
1481 if (ID != Intrinsic::not_intrinsic) {
1482 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) {
1483 rename(GV: F);
1484 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1485 return true;
1486 }
1487 break; // No other applicable upgrades.
1488 }
1489
1490 ID = StringSwitch<Intrinsic::ID>(Name)
1491 .StartsWith(S: "sm4ks", Value: Intrinsic::riscv_sm4ks)
1492 .StartsWith(S: "sm4ed", Value: Intrinsic::riscv_sm4ed)
1493 .Default(Value: Intrinsic::not_intrinsic);
1494 if (ID != Intrinsic::not_intrinsic) {
1495 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) ||
1496 F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1497 rename(GV: F);
1498 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1499 return true;
1500 }
1501 break; // No other applicable upgrades.
1502 }
1503
1504 ID = StringSwitch<Intrinsic::ID>(Name)
1505 .StartsWith(S: "sha256sig0", Value: Intrinsic::riscv_sha256sig0)
1506 .StartsWith(S: "sha256sig1", Value: Intrinsic::riscv_sha256sig1)
1507 .StartsWith(S: "sha256sum0", Value: Intrinsic::riscv_sha256sum0)
1508 .StartsWith(S: "sha256sum1", Value: Intrinsic::riscv_sha256sum1)
1509 .StartsWith(S: "sm3p0", Value: Intrinsic::riscv_sm3p0)
1510 .StartsWith(S: "sm3p1", Value: Intrinsic::riscv_sm3p1)
1511 .Default(Value: Intrinsic::not_intrinsic);
1512 if (ID != Intrinsic::not_intrinsic) {
1513 if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1514 rename(GV: F);
1515 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1516 return true;
1517 }
1518 break; // No other applicable upgrades.
1519 }
1520 break; // No other 'riscv.*' intrinsics
1521 }
1522 } break;
1523
1524 case 's':
1525 if (Name == "stackprotectorcheck") {
1526 NewFn = nullptr;
1527 return true;
1528 }
1529 break;
1530
1531 case 't':
1532 if (Name == "thread.pointer") {
1533 NewFn = Intrinsic::getOrInsertDeclaration(
1534 M: F->getParent(), id: Intrinsic::thread_pointer, Tys: F->getReturnType());
1535 return true;
1536 }
1537 break;
1538
1539 case 'v': {
1540 if (Name == "var.annotation" && F->arg_size() == 4) {
1541 rename(GV: F);
1542 NewFn = Intrinsic::getOrInsertDeclaration(
1543 M: F->getParent(), id: Intrinsic::var_annotation,
1544 Tys: {{F->arg_begin()->getType(), F->getArg(i: 1)->getType()}});
1545 return true;
1546 }
1547 break;
1548 }
1549
1550 case 'w':
1551 if (Name.consume_front(Prefix: "wasm.")) {
1552 Intrinsic::ID ID =
1553 StringSwitch<Intrinsic::ID>(Name)
1554 .StartsWith(S: "fma.", Value: Intrinsic::wasm_relaxed_madd)
1555 .StartsWith(S: "fms.", Value: Intrinsic::wasm_relaxed_nmadd)
1556 .StartsWith(S: "laneselect.", Value: Intrinsic::wasm_relaxed_laneselect)
1557 .Default(Value: Intrinsic::not_intrinsic);
1558 if (ID != Intrinsic::not_intrinsic) {
1559 rename(GV: F);
1560 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID,
1561 Tys: F->getReturnType());
1562 return true;
1563 }
1564
1565 if (Name.consume_front(Prefix: "dot.i8x16.i7x16.")) {
1566 ID = StringSwitch<Intrinsic::ID>(Name)
1567 .Case(S: "signed", Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1568 .Case(S: "add.signed",
1569 Value: Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1570 .Default(Value: Intrinsic::not_intrinsic);
1571 if (ID != Intrinsic::not_intrinsic) {
1572 rename(GV: F);
1573 NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID);
1574 return true;
1575 }
1576 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1577 }
1578 break; // No other 'wasm.*'.
1579 }
1580 break;
1581
1582 case 'x':
1583 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1584 return true;
1585 }
1586
1587 auto *ST = dyn_cast<StructType>(Val: F->getReturnType());
1588 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1589 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1590 // Replace return type with literal non-packed struct. Only do this for
1591 // intrinsics declared to return a struct, not for intrinsics with
1592 // overloaded return type, in which case the exact struct type will be
1593 // mangled into the name.
1594 SmallVector<Intrinsic::IITDescriptor> Desc;
1595 Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc);
1596 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1597 auto *FT = F->getFunctionType();
1598 auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements());
1599 auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg());
1600 std::string Name = F->getName().str();
1601 rename(GV: F);
1602 NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
1603 N: Name, M: F->getParent());
1604
1605 // The new function may also need remangling.
1606 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn))
1607 NewFn = *Result;
1608 return true;
1609 }
1610 }
1611
1612 // Remangle our intrinsic since we upgrade the mangling
1613 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1614 if (Result != std::nullopt) {
1615 NewFn = *Result;
1616 return true;
1617 }
1618
1619 // This may not belong here. This function is effectively being overloaded
1620 // to both detect an intrinsic which needs upgrading, and to provide the
1621 // upgraded form of the intrinsic. We should perhaps have two separate
1622 // functions for this.
1623 return false;
1624}
1625
1626bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1627 bool CanUpgradeDebugIntrinsicsToRecords) {
1628 NewFn = nullptr;
1629 bool Upgraded =
1630 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1631 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1632
1633 // Upgrade intrinsic attributes. This does not change the function.
1634 if (NewFn)
1635 F = NewFn;
1636 if (Intrinsic::ID id = F->getIntrinsicID()) {
1637 // Only do this if the intrinsic signature is valid.
1638 SmallVector<Type *> OverloadTys;
1639 if (Intrinsic::getIntrinsicSignature(id, FT: F->getFunctionType(), ArgTys&: OverloadTys))
1640 F->setAttributes(
1641 Intrinsic::getAttributes(C&: F->getContext(), id, FT: F->getFunctionType()));
1642 }
1643 return Upgraded;
1644}
1645
1646GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1647 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1648 GV->getName() == "llvm.global_dtors")) ||
1649 !GV->hasInitializer())
1650 return nullptr;
1651 ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType());
1652 if (!ATy)
1653 return nullptr;
1654 StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType());
1655 if (!STy || STy->getNumElements() != 2)
1656 return nullptr;
1657
1658 LLVMContext &C = GV->getContext();
1659 IRBuilder<> IRB(C);
1660 auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1),
1661 elts: IRB.getPtrTy());
1662 Constant *Init = GV->getInitializer();
1663 unsigned N = Init->getNumOperands();
1664 std::vector<Constant *> NewCtors(N);
1665 for (unsigned i = 0; i != N; ++i) {
1666 auto Ctor = cast<Constant>(Val: Init->getOperand(i));
1667 NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u),
1668 Vs: Ctor->getAggregateElement(Elt: 1),
1669 Vs: ConstantPointerNull::get(T: IRB.getPtrTy()));
1670 }
1671 Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors);
1672
1673 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1674 NewInit, GV->getName());
1675}
1676
1677// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1678// to byte shuffles.
1679static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1680 unsigned Shift) {
1681 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1682 unsigned NumElts = ResultTy->getNumElements() * 8;
1683
1684 // Bitcast from a 64-bit element type to a byte element type.
1685 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1686 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1687
1688 // We'll be shuffling in zeroes.
1689 Value *Res = Constant::getNullValue(Ty: VecTy);
1690
1691 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1692 // we'll just return the zero vector.
1693 if (Shift < 16) {
1694 int Idxs[64];
1695 // 256/512-bit version is split into 2/4 16-byte lanes.
1696 for (unsigned l = 0; l != NumElts; l += 16)
1697 for (unsigned i = 0; i != 16; ++i) {
1698 unsigned Idx = NumElts + i - Shift;
1699 if (Idx < NumElts)
1700 Idx -= NumElts - 16; // end of lane, switch operand.
1701 Idxs[l + i] = Idx + l;
1702 }
1703
1704 Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts));
1705 }
1706
1707 // Bitcast back to a 64-bit element type.
1708 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1709}
1710
1711// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1712// to byte shuffles.
1713static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1714 unsigned Shift) {
1715 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1716 unsigned NumElts = ResultTy->getNumElements() * 8;
1717
1718 // Bitcast from a 64-bit element type to a byte element type.
1719 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1720 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1721
1722 // We'll be shuffling in zeroes.
1723 Value *Res = Constant::getNullValue(Ty: VecTy);
1724
1725 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1726 // we'll just return the zero vector.
1727 if (Shift < 16) {
1728 int Idxs[64];
1729 // 256/512-bit version is split into 2/4 16-byte lanes.
1730 for (unsigned l = 0; l != NumElts; l += 16)
1731 for (unsigned i = 0; i != 16; ++i) {
1732 unsigned Idx = i + Shift;
1733 if (Idx >= 16)
1734 Idx += NumElts - 16; // end of lane, switch operand.
1735 Idxs[l + i] = Idx + l;
1736 }
1737
1738 Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts));
1739 }
1740
1741 // Bitcast back to a 64-bit element type.
1742 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1743}
1744
1745static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1746 unsigned NumElts) {
1747 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1748 llvm::VectorType *MaskTy = FixedVectorType::get(
1749 ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
1750 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
1751
1752 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1753 // i8 and we need to extract down to the right number of elements.
1754 if (NumElts <= 4) {
1755 int Indices[4];
1756 for (unsigned i = 0; i != NumElts; ++i)
1757 Indices[i] = i;
1758 Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts),
1759 Name: "extract");
1760 }
1761
1762 return Mask;
1763}
1764
1765static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1766 Value *Op1) {
1767 // If the mask is all ones just emit the first operation.
1768 if (const auto *C = dyn_cast<Constant>(Val: Mask))
1769 if (C->isAllOnesValue())
1770 return Op0;
1771
1772 Mask = getX86MaskVec(Builder, Mask,
1773 NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements());
1774 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
1775}
1776
1777static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1778 Value *Op1) {
1779 // If the mask is all ones just emit the first operation.
1780 if (const auto *C = dyn_cast<Constant>(Val: Mask))
1781 if (C->isAllOnesValue())
1782 return Op0;
1783
1784 auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(),
1785 NumElts: Mask->getType()->getIntegerBitWidth());
1786 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
1787 Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
1788 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
1789}
1790
1791// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1792// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1793// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1794static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1795 Value *Op1, Value *Shift,
1796 Value *Passthru, Value *Mask,
1797 bool IsVALIGN) {
1798 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue();
1799
1800 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
1801 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1802 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1803 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1804
1805 // Mask the immediate for VALIGN.
1806 if (IsVALIGN)
1807 ShiftVal &= (NumElts - 1);
1808
1809 // If palignr is shifting the pair of vectors more than the size of two
1810 // lanes, emit zero.
1811 if (ShiftVal >= 32)
1812 return llvm::Constant::getNullValue(Ty: Op0->getType());
1813
1814 // If palignr is shifting the pair of input vectors more than one lane,
1815 // but less than two lanes, convert to shifting in zeroes.
1816 if (ShiftVal > 16) {
1817 ShiftVal -= 16;
1818 Op1 = Op0;
1819 Op0 = llvm::Constant::getNullValue(Ty: Op0->getType());
1820 }
1821
1822 int Indices[64];
1823 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1824 for (unsigned l = 0; l < NumElts; l += 16) {
1825 for (unsigned i = 0; i != 16; ++i) {
1826 unsigned Idx = ShiftVal + i;
1827 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1828 Idx += NumElts - 16; // End of lane, switch operand.
1829 Indices[l + i] = Idx + l;
1830 }
1831 }
1832
1833 Value *Align = Builder.CreateShuffleVector(
1834 V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr");
1835
1836 return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru);
1837}
1838
1839static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1840 bool ZeroMask, bool IndexForm) {
1841 Type *Ty = CI.getType();
1842 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1843 unsigned EltWidth = Ty->getScalarSizeInBits();
1844 bool IsFloat = Ty->isFPOrFPVectorTy();
1845 Intrinsic::ID IID;
1846 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1847 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1848 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1849 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1850 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1851 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1852 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1853 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1854 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1855 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1856 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1857 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1858 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1859 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1860 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1861 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1862 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1863 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1864 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1865 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1866 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1867 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1868 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1869 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1870 else if (VecWidth == 128 && EltWidth == 16)
1871 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1872 else if (VecWidth == 256 && EltWidth == 16)
1873 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1874 else if (VecWidth == 512 && EltWidth == 16)
1875 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1876 else if (VecWidth == 128 && EltWidth == 8)
1877 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1878 else if (VecWidth == 256 && EltWidth == 8)
1879 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1880 else if (VecWidth == 512 && EltWidth == 8)
1881 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1882 else
1883 llvm_unreachable("Unexpected intrinsic");
1884
1885 Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1),
1886 CI.getArgOperand(i: 2) };
1887
1888 // If this isn't index form we need to swap operand 0 and 1.
1889 if (!IndexForm)
1890 std::swap(a&: Args[0], b&: Args[1]);
1891
1892 Value *V = Builder.CreateIntrinsic(ID: IID, Args);
1893 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1894 : Builder.CreateBitCast(V: CI.getArgOperand(i: 1),
1895 DestTy: Ty);
1896 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru);
1897}
1898
1899static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1900 Intrinsic::ID IID) {
1901 Type *Ty = CI.getType();
1902 Value *Op0 = CI.getOperand(i_nocapture: 0);
1903 Value *Op1 = CI.getOperand(i_nocapture: 1);
1904 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1});
1905
1906 if (CI.arg_size() == 4) { // For masked intrinsics.
1907 Value *VecSrc = CI.getOperand(i_nocapture: 2);
1908 Value *Mask = CI.getOperand(i_nocapture: 3);
1909 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
1910 }
1911 return Res;
1912}
1913
1914static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1915 bool IsRotateRight) {
1916 Type *Ty = CI.getType();
1917 Value *Src = CI.getArgOperand(i: 0);
1918 Value *Amt = CI.getArgOperand(i: 1);
1919
1920 // Amount may be scalar immediate, in which case create a splat vector.
1921 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1922 // we only care about the lowest log2 bits anyway.
1923 if (Amt->getType() != Ty) {
1924 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
1925 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
1926 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
1927 }
1928
1929 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1930 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Src, Src, Amt});
1931
1932 if (CI.arg_size() == 4) { // For masked intrinsics.
1933 Value *VecSrc = CI.getOperand(i_nocapture: 2);
1934 Value *Mask = CI.getOperand(i_nocapture: 3);
1935 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
1936 }
1937 return Res;
1938}
1939
1940static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1941 bool IsSigned) {
1942 Type *Ty = CI.getType();
1943 Value *LHS = CI.getArgOperand(i: 0);
1944 Value *RHS = CI.getArgOperand(i: 1);
1945
1946 CmpInst::Predicate Pred;
1947 switch (Imm) {
1948 case 0x0:
1949 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1950 break;
1951 case 0x1:
1952 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1953 break;
1954 case 0x2:
1955 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1956 break;
1957 case 0x3:
1958 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1959 break;
1960 case 0x4:
1961 Pred = ICmpInst::ICMP_EQ;
1962 break;
1963 case 0x5:
1964 Pred = ICmpInst::ICMP_NE;
1965 break;
1966 case 0x6:
1967 return Constant::getNullValue(Ty); // FALSE
1968 case 0x7:
1969 return Constant::getAllOnesValue(Ty); // TRUE
1970 default:
1971 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1972 }
1973
1974 Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS);
1975 Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty);
1976 return Ext;
1977}
1978
1979static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1980 bool IsShiftRight, bool ZeroMask) {
1981 Type *Ty = CI.getType();
1982 Value *Op0 = CI.getArgOperand(i: 0);
1983 Value *Op1 = CI.getArgOperand(i: 1);
1984 Value *Amt = CI.getArgOperand(i: 2);
1985
1986 if (IsShiftRight)
1987 std::swap(a&: Op0, b&: Op1);
1988
1989 // Amount may be scalar immediate, in which case create a splat vector.
1990 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1991 // we only care about the lowest log2 bits anyway.
1992 if (Amt->getType() != Ty) {
1993 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
1994 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
1995 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
1996 }
1997
1998 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1999 Value *Res = Builder.CreateIntrinsic(ID: IID, Types: Ty, Args: {Op0, Op1, Amt});
2000
2001 unsigned NumArgs = CI.arg_size();
2002 if (NumArgs >= 4) { // For masked intrinsics.
2003 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) :
2004 ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) :
2005 CI.getArgOperand(i: 0);
2006 Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1);
2007 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
2008 }
2009 return Res;
2010}
2011
2012static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
2013 Value *Mask, bool Aligned) {
2014 const Align Alignment =
2015 Aligned
2016 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2017 : Align(1);
2018
2019 // If the mask is all ones just emit a regular store.
2020 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2021 if (C->isAllOnesValue())
2022 return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment);
2023
2024 // Convert the mask from an integer type to a vector of i1.
2025 unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements();
2026 Mask = getX86MaskVec(Builder, Mask, NumElts);
2027 return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask);
2028}
2029
2030static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
2031 Value *Passthru, Value *Mask, bool Aligned) {
2032 Type *ValTy = Passthru->getType();
2033 const Align Alignment =
2034 Aligned
2035 ? Align(
2036 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
2037 8)
2038 : Align(1);
2039
2040 // If the mask is all ones just emit a regular store.
2041 if (const auto *C = dyn_cast<Constant>(Val: Mask))
2042 if (C->isAllOnesValue())
2043 return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment);
2044
2045 // Convert the mask from an integer type to a vector of i1.
2046 unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements();
2047 Mask = getX86MaskVec(Builder, Mask, NumElts);
2048 return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru);
2049}
2050
2051static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2052 Type *Ty = CI.getType();
2053 Value *Op0 = CI.getArgOperand(i: 0);
2054 Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::abs, Types: Ty,
2055 Args: {Op0, Builder.getInt1(V: false)});
2056 if (CI.arg_size() == 3)
2057 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1));
2058 return Res;
2059}
2060
2061static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2062 Type *Ty = CI.getType();
2063
2064 // Arguments have a vXi32 type so cast to vXi64.
2065 Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty);
2066 Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty);
2067
2068 if (IsSigned) {
2069 // Shift left then arithmetic shift right.
2070 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
2071 LHS = Builder.CreateShl(LHS, RHS: ShiftAmt);
2072 LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt);
2073 RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
2074 RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
2075 } else {
2076 // Clear the upper bits.
2077 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
2078 LHS = Builder.CreateAnd(LHS, RHS: Mask);
2079 RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask);
2080 }
2081
2082 Value *Res = Builder.CreateMul(LHS, RHS);
2083
2084 if (CI.arg_size() == 4)
2085 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2));
2086
2087 return Res;
2088}
2089
2090// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2091static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
2092 Value *Mask) {
2093 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
2094 if (Mask) {
2095 const auto *C = dyn_cast<Constant>(Val: Mask);
2096 if (!C || !C->isAllOnesValue())
2097 Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts));
2098 }
2099
2100 if (NumElts < 8) {
2101 int Indices[8];
2102 for (unsigned i = 0; i != NumElts; ++i)
2103 Indices[i] = i;
2104 for (unsigned i = NumElts; i != 8; ++i)
2105 Indices[i] = NumElts + i % NumElts;
2106 Vec = Builder.CreateShuffleVector(V1: Vec,
2107 V2: Constant::getNullValue(Ty: Vec->getType()),
2108 Mask: Indices);
2109 }
2110 return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U)));
2111}
2112
2113static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
2114 unsigned CC, bool Signed) {
2115 Value *Op0 = CI.getArgOperand(i: 0);
2116 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
2117
2118 Value *Cmp;
2119 if (CC == 3) {
2120 Cmp = Constant::getNullValue(
2121 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2122 } else if (CC == 7) {
2123 Cmp = Constant::getAllOnesValue(
2124 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
2125 } else {
2126 ICmpInst::Predicate Pred;
2127 switch (CC) {
2128 default: llvm_unreachable("Unknown condition code");
2129 case 0: Pred = ICmpInst::ICMP_EQ; break;
2130 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2131 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2132 case 4: Pred = ICmpInst::ICMP_NE; break;
2133 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2134 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2135 }
2136 Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1));
2137 }
2138
2139 Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1);
2140
2141 return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask);
2142}
2143
2144// Replace a masked intrinsic with an older unmasked intrinsic.
2145static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2146 Intrinsic::ID IID) {
2147 Value *Rep =
2148 Builder.CreateIntrinsic(ID: IID, Args: {CI.getArgOperand(i: 0), CI.getArgOperand(i: 1)});
2149 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2));
2150}
2151
2152static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2153 Value* A = CI.getArgOperand(i: 0);
2154 Value* B = CI.getArgOperand(i: 1);
2155 Value* Src = CI.getArgOperand(i: 2);
2156 Value* Mask = CI.getArgOperand(i: 3);
2157
2158 Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1));
2159 Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode);
2160 Value* Extract1 = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
2161 Value* Extract2 = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0);
2162 Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2);
2163 return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0);
2164}
2165
2166static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2167 Value* Op = CI.getArgOperand(i: 0);
2168 Type* ReturnOp = CI.getType();
2169 unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements();
2170 Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts);
2171 return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2");
2172}
2173
2174// Replace intrinsic with unmasked version and a select.
2175static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2176 CallBase &CI, Value *&Rep) {
2177 Name = Name.substr(Start: 12); // Remove avx512.mask.
2178
2179 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2180 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2181 Intrinsic::ID IID;
2182 if (Name.starts_with(Prefix: "max.p")) {
2183 if (VecWidth == 128 && EltWidth == 32)
2184 IID = Intrinsic::x86_sse_max_ps;
2185 else if (VecWidth == 128 && EltWidth == 64)
2186 IID = Intrinsic::x86_sse2_max_pd;
2187 else if (VecWidth == 256 && EltWidth == 32)
2188 IID = Intrinsic::x86_avx_max_ps_256;
2189 else if (VecWidth == 256 && EltWidth == 64)
2190 IID = Intrinsic::x86_avx_max_pd_256;
2191 else
2192 llvm_unreachable("Unexpected intrinsic");
2193 } else if (Name.starts_with(Prefix: "min.p")) {
2194 if (VecWidth == 128 && EltWidth == 32)
2195 IID = Intrinsic::x86_sse_min_ps;
2196 else if (VecWidth == 128 && EltWidth == 64)
2197 IID = Intrinsic::x86_sse2_min_pd;
2198 else if (VecWidth == 256 && EltWidth == 32)
2199 IID = Intrinsic::x86_avx_min_ps_256;
2200 else if (VecWidth == 256 && EltWidth == 64)
2201 IID = Intrinsic::x86_avx_min_pd_256;
2202 else
2203 llvm_unreachable("Unexpected intrinsic");
2204 } else if (Name.starts_with(Prefix: "pshuf.b.")) {
2205 if (VecWidth == 128)
2206 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2207 else if (VecWidth == 256)
2208 IID = Intrinsic::x86_avx2_pshuf_b;
2209 else if (VecWidth == 512)
2210 IID = Intrinsic::x86_avx512_pshuf_b_512;
2211 else
2212 llvm_unreachable("Unexpected intrinsic");
2213 } else if (Name.starts_with(Prefix: "pmul.hr.sw.")) {
2214 if (VecWidth == 128)
2215 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2216 else if (VecWidth == 256)
2217 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2218 else if (VecWidth == 512)
2219 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2220 else
2221 llvm_unreachable("Unexpected intrinsic");
2222 } else if (Name.starts_with(Prefix: "pmulh.w.")) {
2223 if (VecWidth == 128)
2224 IID = Intrinsic::x86_sse2_pmulh_w;
2225 else if (VecWidth == 256)
2226 IID = Intrinsic::x86_avx2_pmulh_w;
2227 else if (VecWidth == 512)
2228 IID = Intrinsic::x86_avx512_pmulh_w_512;
2229 else
2230 llvm_unreachable("Unexpected intrinsic");
2231 } else if (Name.starts_with(Prefix: "pmulhu.w.")) {
2232 if (VecWidth == 128)
2233 IID = Intrinsic::x86_sse2_pmulhu_w;
2234 else if (VecWidth == 256)
2235 IID = Intrinsic::x86_avx2_pmulhu_w;
2236 else if (VecWidth == 512)
2237 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2238 else
2239 llvm_unreachable("Unexpected intrinsic");
2240 } else if (Name.starts_with(Prefix: "pmaddw.d.")) {
2241 if (VecWidth == 128)
2242 IID = Intrinsic::x86_sse2_pmadd_wd;
2243 else if (VecWidth == 256)
2244 IID = Intrinsic::x86_avx2_pmadd_wd;
2245 else if (VecWidth == 512)
2246 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2247 else
2248 llvm_unreachable("Unexpected intrinsic");
2249 } else if (Name.starts_with(Prefix: "pmaddubs.w.")) {
2250 if (VecWidth == 128)
2251 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2252 else if (VecWidth == 256)
2253 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2254 else if (VecWidth == 512)
2255 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2256 else
2257 llvm_unreachable("Unexpected intrinsic");
2258 } else if (Name.starts_with(Prefix: "packsswb.")) {
2259 if (VecWidth == 128)
2260 IID = Intrinsic::x86_sse2_packsswb_128;
2261 else if (VecWidth == 256)
2262 IID = Intrinsic::x86_avx2_packsswb;
2263 else if (VecWidth == 512)
2264 IID = Intrinsic::x86_avx512_packsswb_512;
2265 else
2266 llvm_unreachable("Unexpected intrinsic");
2267 } else if (Name.starts_with(Prefix: "packssdw.")) {
2268 if (VecWidth == 128)
2269 IID = Intrinsic::x86_sse2_packssdw_128;
2270 else if (VecWidth == 256)
2271 IID = Intrinsic::x86_avx2_packssdw;
2272 else if (VecWidth == 512)
2273 IID = Intrinsic::x86_avx512_packssdw_512;
2274 else
2275 llvm_unreachable("Unexpected intrinsic");
2276 } else if (Name.starts_with(Prefix: "packuswb.")) {
2277 if (VecWidth == 128)
2278 IID = Intrinsic::x86_sse2_packuswb_128;
2279 else if (VecWidth == 256)
2280 IID = Intrinsic::x86_avx2_packuswb;
2281 else if (VecWidth == 512)
2282 IID = Intrinsic::x86_avx512_packuswb_512;
2283 else
2284 llvm_unreachable("Unexpected intrinsic");
2285 } else if (Name.starts_with(Prefix: "packusdw.")) {
2286 if (VecWidth == 128)
2287 IID = Intrinsic::x86_sse41_packusdw;
2288 else if (VecWidth == 256)
2289 IID = Intrinsic::x86_avx2_packusdw;
2290 else if (VecWidth == 512)
2291 IID = Intrinsic::x86_avx512_packusdw_512;
2292 else
2293 llvm_unreachable("Unexpected intrinsic");
2294 } else if (Name.starts_with(Prefix: "vpermilvar.")) {
2295 if (VecWidth == 128 && EltWidth == 32)
2296 IID = Intrinsic::x86_avx_vpermilvar_ps;
2297 else if (VecWidth == 128 && EltWidth == 64)
2298 IID = Intrinsic::x86_avx_vpermilvar_pd;
2299 else if (VecWidth == 256 && EltWidth == 32)
2300 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2301 else if (VecWidth == 256 && EltWidth == 64)
2302 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2303 else if (VecWidth == 512 && EltWidth == 32)
2304 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2305 else if (VecWidth == 512 && EltWidth == 64)
2306 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2307 else
2308 llvm_unreachable("Unexpected intrinsic");
2309 } else if (Name == "cvtpd2dq.256") {
2310 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2311 } else if (Name == "cvtpd2ps.256") {
2312 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2313 } else if (Name == "cvttpd2dq.256") {
2314 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2315 } else if (Name == "cvttps2dq.128") {
2316 IID = Intrinsic::x86_sse2_cvttps2dq;
2317 } else if (Name == "cvttps2dq.256") {
2318 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2319 } else if (Name.starts_with(Prefix: "permvar.")) {
2320 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2321 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2322 IID = Intrinsic::x86_avx2_permps;
2323 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2324 IID = Intrinsic::x86_avx2_permd;
2325 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2326 IID = Intrinsic::x86_avx512_permvar_df_256;
2327 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2328 IID = Intrinsic::x86_avx512_permvar_di_256;
2329 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2330 IID = Intrinsic::x86_avx512_permvar_sf_512;
2331 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2332 IID = Intrinsic::x86_avx512_permvar_si_512;
2333 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2334 IID = Intrinsic::x86_avx512_permvar_df_512;
2335 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2336 IID = Intrinsic::x86_avx512_permvar_di_512;
2337 else if (VecWidth == 128 && EltWidth == 16)
2338 IID = Intrinsic::x86_avx512_permvar_hi_128;
2339 else if (VecWidth == 256 && EltWidth == 16)
2340 IID = Intrinsic::x86_avx512_permvar_hi_256;
2341 else if (VecWidth == 512 && EltWidth == 16)
2342 IID = Intrinsic::x86_avx512_permvar_hi_512;
2343 else if (VecWidth == 128 && EltWidth == 8)
2344 IID = Intrinsic::x86_avx512_permvar_qi_128;
2345 else if (VecWidth == 256 && EltWidth == 8)
2346 IID = Intrinsic::x86_avx512_permvar_qi_256;
2347 else if (VecWidth == 512 && EltWidth == 8)
2348 IID = Intrinsic::x86_avx512_permvar_qi_512;
2349 else
2350 llvm_unreachable("Unexpected intrinsic");
2351 } else if (Name.starts_with(Prefix: "dbpsadbw.")) {
2352 if (VecWidth == 128)
2353 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2354 else if (VecWidth == 256)
2355 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2356 else if (VecWidth == 512)
2357 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2358 else
2359 llvm_unreachable("Unexpected intrinsic");
2360 } else if (Name.starts_with(Prefix: "pmultishift.qb.")) {
2361 if (VecWidth == 128)
2362 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2363 else if (VecWidth == 256)
2364 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2365 else if (VecWidth == 512)
2366 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2367 else
2368 llvm_unreachable("Unexpected intrinsic");
2369 } else if (Name.starts_with(Prefix: "conflict.")) {
2370 if (Name[9] == 'd' && VecWidth == 128)
2371 IID = Intrinsic::x86_avx512_conflict_d_128;
2372 else if (Name[9] == 'd' && VecWidth == 256)
2373 IID = Intrinsic::x86_avx512_conflict_d_256;
2374 else if (Name[9] == 'd' && VecWidth == 512)
2375 IID = Intrinsic::x86_avx512_conflict_d_512;
2376 else if (Name[9] == 'q' && VecWidth == 128)
2377 IID = Intrinsic::x86_avx512_conflict_q_128;
2378 else if (Name[9] == 'q' && VecWidth == 256)
2379 IID = Intrinsic::x86_avx512_conflict_q_256;
2380 else if (Name[9] == 'q' && VecWidth == 512)
2381 IID = Intrinsic::x86_avx512_conflict_q_512;
2382 else
2383 llvm_unreachable("Unexpected intrinsic");
2384 } else if (Name.starts_with(Prefix: "pavg.")) {
2385 if (Name[5] == 'b' && VecWidth == 128)
2386 IID = Intrinsic::x86_sse2_pavg_b;
2387 else if (Name[5] == 'b' && VecWidth == 256)
2388 IID = Intrinsic::x86_avx2_pavg_b;
2389 else if (Name[5] == 'b' && VecWidth == 512)
2390 IID = Intrinsic::x86_avx512_pavg_b_512;
2391 else if (Name[5] == 'w' && VecWidth == 128)
2392 IID = Intrinsic::x86_sse2_pavg_w;
2393 else if (Name[5] == 'w' && VecWidth == 256)
2394 IID = Intrinsic::x86_avx2_pavg_w;
2395 else if (Name[5] == 'w' && VecWidth == 512)
2396 IID = Intrinsic::x86_avx512_pavg_w_512;
2397 else
2398 llvm_unreachable("Unexpected intrinsic");
2399 } else
2400 return false;
2401
2402 SmallVector<Value *, 4> Args(CI.args());
2403 Args.pop_back();
2404 Args.pop_back();
2405 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2406 unsigned NumArgs = CI.arg_size();
2407 Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep,
2408 Op1: CI.getArgOperand(i: NumArgs - 2));
2409 return true;
2410}
2411
2412/// Upgrade comment in call to inline asm that represents an objc retain release
2413/// marker.
2414void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2415 size_t Pos;
2416 if (AsmStr->find(s: "mov\tfp") == 0 &&
2417 AsmStr->find(s: "objc_retainAutoreleaseReturnValue") != std::string::npos &&
2418 (Pos = AsmStr->find(s: "# marker")) != std::string::npos) {
2419 AsmStr->replace(pos: Pos, n1: 1, s: ";");
2420 }
2421}
2422
2423static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2424 Function *F, IRBuilder<> &Builder) {
2425 Value *Rep = nullptr;
2426
2427 if (Name == "abs.i" || Name == "abs.ll") {
2428 Value *Arg = CI->getArgOperand(i: 0);
2429 Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg");
2430 Value *Cmp = Builder.CreateICmpSGE(
2431 LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond");
2432 Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs");
2433 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2434 Type *Ty = (Name == "abs.bf16")
2435 ? Builder.getBFloatTy()
2436 : FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 2);
2437 Value *Arg = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: Ty);
2438 Value *Abs = Builder.CreateUnaryIntrinsic(ID: Intrinsic::nvvm_fabs, V: Arg);
2439 Rep = Builder.CreateBitCast(V: Abs, DestTy: CI->getType());
2440 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2441 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2442 : Intrinsic::nvvm_fabs;
2443 Rep = Builder.CreateUnaryIntrinsic(ID: IID, V: CI->getArgOperand(i: 0));
2444 } else if (Name.starts_with(Prefix: "atomic.load.add.f32.p") ||
2445 Name.starts_with(Prefix: "atomic.load.add.f64.p")) {
2446 Value *Ptr = CI->getArgOperand(i: 0);
2447 Value *Val = CI->getArgOperand(i: 1);
2448 Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(),
2449 Ordering: AtomicOrdering::SequentiallyConsistent);
2450 } else if (Name.starts_with(Prefix: "atomic.load.inc.32.p") ||
2451 Name.starts_with(Prefix: "atomic.load.dec.32.p")) {
2452 Value *Ptr = CI->getArgOperand(i: 0);
2453 Value *Val = CI->getArgOperand(i: 1);
2454 auto Op = Name.starts_with(Prefix: "atomic.load.inc") ? AtomicRMWInst::UIncWrap
2455 : AtomicRMWInst::UDecWrap;
2456 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, Align: MaybeAlign(),
2457 Ordering: AtomicOrdering::SequentiallyConsistent);
2458 } else if (Name.consume_front(Prefix: "max.") &&
2459 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2460 Name == "ui" || Name == "ull")) {
2461 Value *Arg0 = CI->getArgOperand(i: 0);
2462 Value *Arg1 = CI->getArgOperand(i: 1);
2463 Value *Cmp = Name.starts_with(Prefix: "u")
2464 ? Builder.CreateICmpUGE(LHS: Arg0, RHS: Arg1, Name: "max.cond")
2465 : Builder.CreateICmpSGE(LHS: Arg0, RHS: Arg1, Name: "max.cond");
2466 Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "max");
2467 } else if (Name.consume_front(Prefix: "min.") &&
2468 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2469 Name == "ui" || Name == "ull")) {
2470 Value *Arg0 = CI->getArgOperand(i: 0);
2471 Value *Arg1 = CI->getArgOperand(i: 1);
2472 Value *Cmp = Name.starts_with(Prefix: "u")
2473 ? Builder.CreateICmpULE(LHS: Arg0, RHS: Arg1, Name: "min.cond")
2474 : Builder.CreateICmpSLE(LHS: Arg0, RHS: Arg1, Name: "min.cond");
2475 Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "min");
2476 } else if (Name == "clz.ll") {
2477 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2478 Value *Arg = CI->getArgOperand(i: 0);
2479 Value *Ctlz = Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: {Arg->getType()},
2480 Args: {Arg, Builder.getFalse()},
2481 /*FMFSource=*/nullptr, Name: "ctlz");
2482 Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc");
2483 } else if (Name == "popc.ll") {
2484 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2485 // i64.
2486 Value *Arg = CI->getArgOperand(i: 0);
2487 Value *Popc = Builder.CreateIntrinsic(ID: Intrinsic::ctpop, Types: {Arg->getType()},
2488 Args: Arg, /*FMFSource=*/nullptr, Name: "ctpop");
2489 Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc");
2490 } else if (Name == "h2f") {
2491 Rep = Builder.CreateIntrinsic(ID: Intrinsic::convert_from_fp16,
2492 Types: {Builder.getFloatTy()}, Args: CI->getArgOperand(i: 0),
2493 /*FMFSource=*/nullptr, Name: "h2f");
2494 } else if (Name.consume_front(Prefix: "bitcast.") &&
2495 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2496 Name == "d2ll")) {
2497 Rep = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2498 } else if (Name == "rotate.b32") {
2499 Value *Arg = CI->getOperand(i_nocapture: 0);
2500 Value *ShiftAmt = CI->getOperand(i_nocapture: 1);
2501 Rep = Builder.CreateIntrinsic(RetTy: Builder.getInt32Ty(), ID: Intrinsic::fshl,
2502 Args: {Arg, Arg, ShiftAmt});
2503 } else if (Name == "rotate.b64") {
2504 Type *Int64Ty = Builder.getInt64Ty();
2505 Value *Arg = CI->getOperand(i_nocapture: 0);
2506 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2507 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2508 Args: {Arg, Arg, ZExtShiftAmt});
2509 } else if (Name == "rotate.right.b64") {
2510 Type *Int64Ty = Builder.getInt64Ty();
2511 Value *Arg = CI->getOperand(i_nocapture: 0);
2512 Value *ZExtShiftAmt = Builder.CreateZExt(V: CI->getOperand(i_nocapture: 1), DestTy: Int64Ty);
2513 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshr,
2514 Args: {Arg, Arg, ZExtShiftAmt});
2515 } else if (Name == "swap.lo.hi.b64") {
2516 Type *Int64Ty = Builder.getInt64Ty();
2517 Value *Arg = CI->getOperand(i_nocapture: 0);
2518 Rep = Builder.CreateIntrinsic(RetTy: Int64Ty, ID: Intrinsic::fshl,
2519 Args: {Arg, Arg, Builder.getInt64(C: 32)});
2520 } else if ((Name.consume_front(Prefix: "ptr.gen.to.") &&
2521 consumeNVVMPtrAddrSpace(Name)) ||
2522 (Name.consume_front(Prefix: "ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2523 Name.starts_with(Prefix: ".to.gen"))) {
2524 Rep = Builder.CreateAddrSpaceCast(V: CI->getArgOperand(i: 0), DestTy: CI->getType());
2525 } else if (Name.consume_front(Prefix: "ldg.global")) {
2526 Value *Ptr = CI->getArgOperand(i: 0);
2527 Align PtrAlign = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getAlignValue();
2528 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2529 Value *ASC = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Builder.getPtrTy(AddrSpace: 1));
2530 Instruction *LD = Builder.CreateAlignedLoad(Ty: CI->getType(), Ptr: ASC, Align: PtrAlign);
2531 MDNode *MD = MDNode::get(Context&: Builder.getContext(), MDs: {});
2532 LD->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
2533 return LD;
2534 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2535 Value *Arg =
2536 Name.ends_with(Suffix: '0') ? Builder.getInt32(C: 0) : CI->getArgOperand(i: 0);
2537 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2538 Types: {}, Args: {Arg});
2539 } else if (Name == "barrier") {
2540 Rep = Builder.CreateIntrinsic(
2541 ID: Intrinsic::nvvm_barrier_cta_sync_aligned_count, Types: {},
2542 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2543 } else if (Name == "barrier.sync") {
2544 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_all, Types: {},
2545 Args: {CI->getArgOperand(i: 0)});
2546 } else if (Name == "barrier.sync.cnt") {
2547 Rep = Builder.CreateIntrinsic(ID: Intrinsic::nvvm_barrier_cta_sync_count, Types: {},
2548 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
2549 } else {
2550 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2551 if (IID != Intrinsic::not_intrinsic &&
2552 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2553 rename(GV: F);
2554 Function *NewFn = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: IID);
2555 SmallVector<Value *, 2> Args;
2556 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2557 Value *Arg = CI->getArgOperand(i: I);
2558 Type *OldType = Arg->getType();
2559 Type *NewType = NewFn->getArg(i: I)->getType();
2560 Args.push_back(
2561 Elt: (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2562 ? Builder.CreateBitCast(V: Arg, DestTy: NewType)
2563 : Arg);
2564 }
2565 Rep = Builder.CreateCall(Callee: NewFn, Args);
2566 if (F->getReturnType()->isIntegerTy())
2567 Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType());
2568 }
2569 }
2570
2571 return Rep;
2572}
2573
2574static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2575 IRBuilder<> &Builder) {
2576 LLVMContext &C = F->getContext();
2577 Value *Rep = nullptr;
2578
2579 if (Name.starts_with(Prefix: "sse4a.movnt.")) {
2580 SmallVector<Metadata *, 1> Elts;
2581 Elts.push_back(
2582 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2583 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2584
2585 Value *Arg0 = CI->getArgOperand(i: 0);
2586 Value *Arg1 = CI->getArgOperand(i: 1);
2587
2588 // Nontemporal (unaligned) store of the 0'th element of the float/double
2589 // vector.
2590 Value *Extract =
2591 Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement");
2592
2593 StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Arg0, Align: Align(1));
2594 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2595 } else if (Name.starts_with(Prefix: "avx.movnt.") ||
2596 Name.starts_with(Prefix: "avx512.storent.")) {
2597 SmallVector<Metadata *, 1> Elts;
2598 Elts.push_back(
2599 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2600 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2601
2602 Value *Arg0 = CI->getArgOperand(i: 0);
2603 Value *Arg1 = CI->getArgOperand(i: 1);
2604
2605 StoreInst *SI = Builder.CreateAlignedStore(
2606 Val: Arg1, Ptr: Arg0,
2607 Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2608 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2609 } else if (Name == "sse2.storel.dq") {
2610 Value *Arg0 = CI->getArgOperand(i: 0);
2611 Value *Arg1 = CI->getArgOperand(i: 1);
2612
2613 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
2614 Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
2615 Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0);
2616 Builder.CreateAlignedStore(Val: Elt, Ptr: Arg0, Align: Align(1));
2617 } else if (Name.starts_with(Prefix: "sse.storeu.") ||
2618 Name.starts_with(Prefix: "sse2.storeu.") ||
2619 Name.starts_with(Prefix: "avx.storeu.")) {
2620 Value *Arg0 = CI->getArgOperand(i: 0);
2621 Value *Arg1 = CI->getArgOperand(i: 1);
2622 Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1));
2623 } else if (Name == "avx512.mask.store.ss") {
2624 Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1));
2625 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2626 Mask, Aligned: false);
2627 } else if (Name.starts_with(Prefix: "avx512.mask.store")) {
2628 // "avx512.mask.storeu." or "avx512.mask.store."
2629 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2630 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2631 Mask: CI->getArgOperand(i: 2), Aligned);
2632 } else if (Name.starts_with(Prefix: "sse2.pcmp") || Name.starts_with(Prefix: "avx2.pcmp")) {
2633 // Upgrade packed integer vector compare intrinsics to compare instructions.
2634 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2635 bool CmpEq = Name[9] == 'e';
2636 Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2637 LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
2638 Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "");
2639 } else if (Name.starts_with(Prefix: "avx512.broadcastm")) {
2640 Type *ExtTy = Type::getInt32Ty(C);
2641 if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8))
2642 ExtTy = Type::getInt64Ty(C);
2643 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2644 ExtTy->getPrimitiveSizeInBits();
2645 Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy);
2646 Rep = Builder.CreateVectorSplat(NumElts, V: Rep);
2647 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2648 Value *Vec = CI->getArgOperand(i: 0);
2649 Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0);
2650 Elt0 = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: Elt0->getType(), Args: Elt0);
2651 Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0);
2652 } else if (Name.starts_with(Prefix: "avx.sqrt.p") ||
2653 Name.starts_with(Prefix: "sse2.sqrt.p") ||
2654 Name.starts_with(Prefix: "sse.sqrt.p")) {
2655 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2656 Args: {CI->getArgOperand(i: 0)});
2657 } else if (Name.starts_with(Prefix: "avx512.mask.sqrt.p")) {
2658 if (CI->arg_size() == 4 &&
2659 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2660 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2661 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2662 : Intrinsic::x86_avx512_sqrt_pd_512;
2663
2664 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 3)};
2665 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2666 } else {
2667 Rep = Builder.CreateIntrinsic(ID: Intrinsic::sqrt, Types: CI->getType(),
2668 Args: {CI->getArgOperand(i: 0)});
2669 }
2670 Rep =
2671 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2672 } else if (Name.starts_with(Prefix: "avx512.ptestm") ||
2673 Name.starts_with(Prefix: "avx512.ptestnm")) {
2674 Value *Op0 = CI->getArgOperand(i: 0);
2675 Value *Op1 = CI->getArgOperand(i: 1);
2676 Value *Mask = CI->getArgOperand(i: 2);
2677 Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1);
2678 llvm::Type *Ty = Op0->getType();
2679 Value *Zero = llvm::Constant::getNullValue(Ty);
2680 ICmpInst::Predicate Pred = Name.starts_with(Prefix: "avx512.ptestm")
2681 ? ICmpInst::ICMP_NE
2682 : ICmpInst::ICMP_EQ;
2683 Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero);
2684 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask);
2685 } else if (Name.starts_with(Prefix: "avx512.mask.pbroadcast")) {
2686 unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType())
2687 ->getNumElements();
2688 Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0));
2689 Rep =
2690 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
2691 } else if (Name.starts_with(Prefix: "avx512.kunpck")) {
2692 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2693 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts);
2694 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts);
2695 int Indices[64];
2696 for (unsigned i = 0; i != NumElts; ++i)
2697 Indices[i] = i;
2698
2699 // First extract half of each vector. This gives better codegen than
2700 // doing it in a single shuffle.
2701 LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
2702 RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
2703 // Concat the vectors.
2704 // NOTE: Operands have to be swapped to match intrinsic definition.
2705 Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
2706 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2707 } else if (Name == "avx512.kand.w") {
2708 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2709 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2710 Rep = Builder.CreateAnd(LHS, RHS);
2711 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2712 } else if (Name == "avx512.kandn.w") {
2713 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2714 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2715 LHS = Builder.CreateNot(V: LHS);
2716 Rep = Builder.CreateAnd(LHS, RHS);
2717 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2718 } else if (Name == "avx512.kor.w") {
2719 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2720 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2721 Rep = Builder.CreateOr(LHS, RHS);
2722 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2723 } else if (Name == "avx512.kxor.w") {
2724 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2725 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2726 Rep = Builder.CreateXor(LHS, RHS);
2727 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2728 } else if (Name == "avx512.kxnor.w") {
2729 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2730 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2731 LHS = Builder.CreateNot(V: LHS);
2732 Rep = Builder.CreateXor(LHS, RHS);
2733 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2734 } else if (Name == "avx512.knot.w") {
2735 Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2736 Rep = Builder.CreateNot(V: Rep);
2737 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2738 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2739 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2740 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2741 Rep = Builder.CreateOr(LHS, RHS);
2742 Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty());
2743 Value *C;
2744 if (Name[14] == 'c')
2745 C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty());
2746 else
2747 C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty());
2748 Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C);
2749 Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty());
2750 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2751 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2752 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2753 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2754 Type *I32Ty = Type::getInt32Ty(C);
2755 Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0),
2756 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
2757 Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1),
2758 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
2759 Value *EltOp;
2760 if (Name.contains(Other: ".add."))
2761 EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1);
2762 else if (Name.contains(Other: ".sub."))
2763 EltOp = Builder.CreateFSub(L: Elt0, R: Elt1);
2764 else if (Name.contains(Other: ".mul."))
2765 EltOp = Builder.CreateFMul(L: Elt0, R: Elt1);
2766 else
2767 EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1);
2768 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp,
2769 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
2770 } else if (Name.starts_with(Prefix: "avx512.mask.pcmp")) {
2771 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2772 bool CmpEq = Name[16] == 'e';
2773 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true);
2774 } else if (Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb.")) {
2775 Type *OpTy = CI->getArgOperand(i: 0)->getType();
2776 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2777 Intrinsic::ID IID;
2778 switch (VecWidth) {
2779 default:
2780 llvm_unreachable("Unexpected intrinsic");
2781 case 128:
2782 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2783 break;
2784 case 256:
2785 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2786 break;
2787 case 512:
2788 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2789 break;
2790 }
2791
2792 Rep =
2793 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
2794 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
2795 } else if (Name.starts_with(Prefix: "avx512.mask.fpclass.p")) {
2796 Type *OpTy = CI->getArgOperand(i: 0)->getType();
2797 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2798 unsigned EltWidth = OpTy->getScalarSizeInBits();
2799 Intrinsic::ID IID;
2800 if (VecWidth == 128 && EltWidth == 32)
2801 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2802 else if (VecWidth == 256 && EltWidth == 32)
2803 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2804 else if (VecWidth == 512 && EltWidth == 32)
2805 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2806 else if (VecWidth == 128 && EltWidth == 64)
2807 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2808 else if (VecWidth == 256 && EltWidth == 64)
2809 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2810 else if (VecWidth == 512 && EltWidth == 64)
2811 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2812 else
2813 llvm_unreachable("Unexpected intrinsic");
2814
2815 Rep =
2816 Builder.CreateIntrinsic(ID: IID, Args: {CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1)});
2817 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
2818 } else if (Name.starts_with(Prefix: "avx512.cmp.p")) {
2819 SmallVector<Value *, 4> Args(CI->args());
2820 Type *OpTy = Args[0]->getType();
2821 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2822 unsigned EltWidth = OpTy->getScalarSizeInBits();
2823 Intrinsic::ID IID;
2824 if (VecWidth == 128 && EltWidth == 32)
2825 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2826 else if (VecWidth == 256 && EltWidth == 32)
2827 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2828 else if (VecWidth == 512 && EltWidth == 32)
2829 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2830 else if (VecWidth == 128 && EltWidth == 64)
2831 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2832 else if (VecWidth == 256 && EltWidth == 64)
2833 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2834 else if (VecWidth == 512 && EltWidth == 64)
2835 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2836 else
2837 llvm_unreachable("Unexpected intrinsic");
2838
2839 Value *Mask = Constant::getAllOnesValue(Ty: CI->getType());
2840 if (VecWidth == 512)
2841 std::swap(a&: Mask, b&: Args.back());
2842 Args.push_back(Elt: Mask);
2843
2844 Rep = Builder.CreateIntrinsic(ID: IID, Args);
2845 } else if (Name.starts_with(Prefix: "avx512.mask.cmp.")) {
2846 // Integer compare intrinsics.
2847 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
2848 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true);
2849 } else if (Name.starts_with(Prefix: "avx512.mask.ucmp.")) {
2850 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
2851 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false);
2852 } else if (Name.starts_with(Prefix: "avx512.cvtb2mask.") ||
2853 Name.starts_with(Prefix: "avx512.cvtw2mask.") ||
2854 Name.starts_with(Prefix: "avx512.cvtd2mask.") ||
2855 Name.starts_with(Prefix: "avx512.cvtq2mask.")) {
2856 Value *Op = CI->getArgOperand(i: 0);
2857 Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType());
2858 Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero);
2859 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr);
2860 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2861 Name == "ssse3.pabs.d.128" || Name.starts_with(Prefix: "avx2.pabs") ||
2862 Name.starts_with(Prefix: "avx512.mask.pabs")) {
2863 Rep = upgradeAbs(Builder, CI&: *CI);
2864 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2865 Name == "sse41.pmaxsd" || Name.starts_with(Prefix: "avx2.pmaxs") ||
2866 Name.starts_with(Prefix: "avx512.mask.pmaxs")) {
2867 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smax);
2868 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2869 Name == "sse41.pmaxud" || Name.starts_with(Prefix: "avx2.pmaxu") ||
2870 Name.starts_with(Prefix: "avx512.mask.pmaxu")) {
2871 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umax);
2872 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2873 Name == "sse41.pminsd" || Name.starts_with(Prefix: "avx2.pmins") ||
2874 Name.starts_with(Prefix: "avx512.mask.pmins")) {
2875 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::smin);
2876 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2877 Name == "sse41.pminud" || Name.starts_with(Prefix: "avx2.pminu") ||
2878 Name.starts_with(Prefix: "avx512.mask.pminu")) {
2879 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::umin);
2880 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2881 Name == "avx512.pmulu.dq.512" ||
2882 Name.starts_with(Prefix: "avx512.mask.pmulu.dq.")) {
2883 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: false);
2884 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2885 Name == "avx512.pmul.dq.512" ||
2886 Name.starts_with(Prefix: "avx512.mask.pmul.dq.")) {
2887 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/ IsSigned: true);
2888 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2889 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2890 Rep =
2891 Builder.CreateSIToFP(V: CI->getArgOperand(i: 1),
2892 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
2893 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
2894 } else if (Name == "avx512.cvtusi2sd") {
2895 Rep =
2896 Builder.CreateUIToFP(V: CI->getArgOperand(i: 1),
2897 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
2898 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
2899 } else if (Name == "sse2.cvtss2sd") {
2900 Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0);
2901 Rep = Builder.CreateFPExt(
2902 V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
2903 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
2904 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2905 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2906 Name.starts_with(Prefix: "avx512.mask.cvtdq2pd.") ||
2907 Name.starts_with(Prefix: "avx512.mask.cvtudq2pd.") ||
2908 Name.starts_with(Prefix: "avx512.mask.cvtdq2ps.") ||
2909 Name.starts_with(Prefix: "avx512.mask.cvtudq2ps.") ||
2910 Name.starts_with(Prefix: "avx512.mask.cvtqq2pd.") ||
2911 Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd.") ||
2912 Name == "avx512.mask.cvtqq2ps.256" ||
2913 Name == "avx512.mask.cvtqq2ps.512" ||
2914 Name == "avx512.mask.cvtuqq2ps.256" ||
2915 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2916 Name == "avx.cvt.ps2.pd.256" ||
2917 Name == "avx512.mask.cvtps2pd.128" ||
2918 Name == "avx512.mask.cvtps2pd.256") {
2919 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
2920 Rep = CI->getArgOperand(i: 0);
2921 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
2922
2923 unsigned NumDstElts = DstTy->getNumElements();
2924 if (NumDstElts < SrcTy->getNumElements()) {
2925 assert(NumDstElts == 2 && "Unexpected vector size");
2926 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1});
2927 }
2928
2929 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2930 bool IsUnsigned = Name.contains(Other: "cvtu");
2931 if (IsPS2PD)
2932 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd");
2933 else if (CI->arg_size() == 4 &&
2934 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2935 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2936 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2937 : Intrinsic::x86_avx512_sitofp_round;
2938 Rep = Builder.CreateIntrinsic(ID: IID, Types: {DstTy, SrcTy},
2939 Args: {Rep, CI->getArgOperand(i: 3)});
2940 } else {
2941 Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt")
2942 : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt");
2943 }
2944
2945 if (CI->arg_size() >= 3)
2946 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
2947 Op1: CI->getArgOperand(i: 1));
2948 } else if (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps.") ||
2949 Name.starts_with(Prefix: "vcvtph2ps.")) {
2950 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
2951 Rep = CI->getArgOperand(i: 0);
2952 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
2953 unsigned NumDstElts = DstTy->getNumElements();
2954 if (NumDstElts != SrcTy->getNumElements()) {
2955 assert(NumDstElts == 4 && "Unexpected vector size");
2956 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3});
2957 }
2958 Rep = Builder.CreateBitCast(
2959 V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts));
2960 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps");
2961 if (CI->arg_size() >= 3)
2962 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
2963 Op1: CI->getArgOperand(i: 1));
2964 } else if (Name.starts_with(Prefix: "avx512.mask.load")) {
2965 // "avx512.mask.loadu." or "avx512.mask.load."
2966 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2967 Rep = upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1),
2968 Mask: CI->getArgOperand(i: 2), Aligned);
2969 } else if (Name.starts_with(Prefix: "avx512.mask.expand.load.")) {
2970 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
2971 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
2972 NumElts: ResultTy->getNumElements());
2973
2974 Rep = Builder.CreateIntrinsic(
2975 ID: Intrinsic::masked_expandload, Types: ResultTy,
2976 Args: {CI->getOperand(i_nocapture: 0), MaskVec, CI->getOperand(i_nocapture: 1)});
2977 } else if (Name.starts_with(Prefix: "avx512.mask.compress.store.")) {
2978 auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType());
2979 Value *MaskVec =
2980 getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
2981 NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
2982
2983 Rep = Builder.CreateIntrinsic(
2984 ID: Intrinsic::masked_compressstore, Types: ResultTy,
2985 Args: {CI->getArgOperand(i: 1), CI->getArgOperand(i: 0), MaskVec});
2986 } else if (Name.starts_with(Prefix: "avx512.mask.compress.") ||
2987 Name.starts_with(Prefix: "avx512.mask.expand.")) {
2988 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
2989
2990 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
2991 NumElts: ResultTy->getNumElements());
2992
2993 bool IsCompress = Name[12] == 'c';
2994 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2995 : Intrinsic::x86_avx512_mask_expand;
2996 Rep = Builder.CreateIntrinsic(
2997 ID: IID, Types: ResultTy, Args: {CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), MaskVec});
2998 } else if (Name.starts_with(Prefix: "xop.vpcom")) {
2999 bool IsSigned;
3000 if (Name.ends_with(Suffix: "ub") || Name.ends_with(Suffix: "uw") || Name.ends_with(Suffix: "ud") ||
3001 Name.ends_with(Suffix: "uq"))
3002 IsSigned = false;
3003 else if (Name.ends_with(Suffix: "b") || Name.ends_with(Suffix: "w") ||
3004 Name.ends_with(Suffix: "d") || Name.ends_with(Suffix: "q"))
3005 IsSigned = true;
3006 else
3007 llvm_unreachable("Unknown suffix");
3008
3009 unsigned Imm;
3010 if (CI->arg_size() == 3) {
3011 Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3012 } else {
3013 Name = Name.substr(Start: 9); // strip off "xop.vpcom"
3014 if (Name.starts_with(Prefix: "lt"))
3015 Imm = 0;
3016 else if (Name.starts_with(Prefix: "le"))
3017 Imm = 1;
3018 else if (Name.starts_with(Prefix: "gt"))
3019 Imm = 2;
3020 else if (Name.starts_with(Prefix: "ge"))
3021 Imm = 3;
3022 else if (Name.starts_with(Prefix: "eq"))
3023 Imm = 4;
3024 else if (Name.starts_with(Prefix: "ne"))
3025 Imm = 5;
3026 else if (Name.starts_with(Prefix: "false"))
3027 Imm = 6;
3028 else if (Name.starts_with(Prefix: "true"))
3029 Imm = 7;
3030 else
3031 llvm_unreachable("Unknown condition");
3032 }
3033
3034 Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned);
3035 } else if (Name.starts_with(Prefix: "xop.vpcmov")) {
3036 Value *Sel = CI->getArgOperand(i: 2);
3037 Value *NotSel = Builder.CreateNot(V: Sel);
3038 Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel);
3039 Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel);
3040 Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1);
3041 } else if (Name.starts_with(Prefix: "xop.vprot") || Name.starts_with(Prefix: "avx512.prol") ||
3042 Name.starts_with(Prefix: "avx512.mask.prol")) {
3043 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false);
3044 } else if (Name.starts_with(Prefix: "avx512.pror") ||
3045 Name.starts_with(Prefix: "avx512.mask.pror")) {
3046 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true);
3047 } else if (Name.starts_with(Prefix: "avx512.vpshld.") ||
3048 Name.starts_with(Prefix: "avx512.mask.vpshld") ||
3049 Name.starts_with(Prefix: "avx512.maskz.vpshld")) {
3050 bool ZeroMask = Name[11] == 'z';
3051 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask);
3052 } else if (Name.starts_with(Prefix: "avx512.vpshrd.") ||
3053 Name.starts_with(Prefix: "avx512.mask.vpshrd") ||
3054 Name.starts_with(Prefix: "avx512.maskz.vpshrd")) {
3055 bool ZeroMask = Name[11] == 'z';
3056 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask);
3057 } else if (Name == "sse42.crc32.64.8") {
3058 Value *Trunc0 =
3059 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C));
3060 Rep = Builder.CreateIntrinsic(ID: Intrinsic::x86_sse42_crc32_32_8,
3061 Args: {Trunc0, CI->getArgOperand(i: 1)});
3062 Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "");
3063 } else if (Name.starts_with(Prefix: "avx.vbroadcast.s") ||
3064 Name.starts_with(Prefix: "avx512.vbroadcast.s")) {
3065 // Replace broadcasts with a series of insertelements.
3066 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3067 Type *EltTy = VecTy->getElementType();
3068 unsigned EltNum = VecTy->getNumElements();
3069 Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0));
3070 Type *I32Ty = Type::getInt32Ty(C);
3071 Rep = PoisonValue::get(T: VecTy);
3072 for (unsigned I = 0; I < EltNum; ++I)
3073 Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, Idx: ConstantInt::get(Ty: I32Ty, V: I));
3074 } else if (Name.starts_with(Prefix: "sse41.pmovsx") ||
3075 Name.starts_with(Prefix: "sse41.pmovzx") ||
3076 Name.starts_with(Prefix: "avx2.pmovsx") ||
3077 Name.starts_with(Prefix: "avx2.pmovzx") ||
3078 Name.starts_with(Prefix: "avx512.mask.pmovsx") ||
3079 Name.starts_with(Prefix: "avx512.mask.pmovzx")) {
3080 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3081 unsigned NumDstElts = DstTy->getNumElements();
3082
3083 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3084 SmallVector<int, 8> ShuffleMask(NumDstElts);
3085 for (unsigned i = 0; i != NumDstElts; ++i)
3086 ShuffleMask[i] = i;
3087
3088 Value *SV = Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3089
3090 bool DoSext = Name.contains(Other: "pmovsx");
3091 Rep =
3092 DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) : Builder.CreateZExt(V: SV, DestTy: DstTy);
3093 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3094 if (CI->arg_size() == 3)
3095 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3096 Op1: CI->getArgOperand(i: 1));
3097 } else if (Name == "avx512.mask.pmov.qd.256" ||
3098 Name == "avx512.mask.pmov.qd.512" ||
3099 Name == "avx512.mask.pmov.wb.256" ||
3100 Name == "avx512.mask.pmov.wb.512") {
3101 Type *Ty = CI->getArgOperand(i: 1)->getType();
3102 Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty);
3103 Rep =
3104 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3105 } else if (Name.starts_with(Prefix: "avx.vbroadcastf128") ||
3106 Name == "avx2.vbroadcasti128") {
3107 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3108 Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType();
3109 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3110 auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts);
3111 Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
3112 if (NumSrcElts == 2)
3113 Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1});
3114 else
3115 Rep = Builder.CreateShuffleVector(V: Load,
3116 Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3117 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.i") ||
3118 Name.starts_with(Prefix: "avx512.mask.shuf.f")) {
3119 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3120 Type *VT = CI->getType();
3121 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3122 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3123 unsigned ControlBitsMask = NumLanes - 1;
3124 unsigned NumControlBits = NumLanes / 2;
3125 SmallVector<int, 8> ShuffleMask(0);
3126
3127 for (unsigned l = 0; l != NumLanes; ++l) {
3128 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3129 // We actually need the other source.
3130 if (l >= NumLanes / 2)
3131 LaneMask += NumLanes;
3132 for (unsigned i = 0; i != NumElementsInLane; ++i)
3133 ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i);
3134 }
3135 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3136 V2: CI->getArgOperand(i: 1), Mask: ShuffleMask);
3137 Rep =
3138 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3139 } else if (Name.starts_with(Prefix: "avx512.mask.broadcastf") ||
3140 Name.starts_with(Prefix: "avx512.mask.broadcasti")) {
3141 unsigned NumSrcElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType())
3142 ->getNumElements();
3143 unsigned NumDstElts =
3144 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3145
3146 SmallVector<int, 8> ShuffleMask(NumDstElts);
3147 for (unsigned i = 0; i != NumDstElts; ++i)
3148 ShuffleMask[i] = i % NumSrcElts;
3149
3150 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3151 V2: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3152 Rep =
3153 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3154 } else if (Name.starts_with(Prefix: "avx2.pbroadcast") ||
3155 Name.starts_with(Prefix: "avx2.vbroadcast") ||
3156 Name.starts_with(Prefix: "avx512.pbroadcast") ||
3157 Name.starts_with(Prefix: "avx512.mask.broadcast.s")) {
3158 // Replace vp?broadcasts with a vector shuffle.
3159 Value *Op = CI->getArgOperand(i: 0);
3160 ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount();
3161 Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC);
3162 SmallVector<int, 8> M;
3163 ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M);
3164 Rep = Builder.CreateShuffleVector(V: Op, Mask: M);
3165
3166 if (CI->arg_size() == 3)
3167 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3168 Op1: CI->getArgOperand(i: 1));
3169 } else if (Name.starts_with(Prefix: "sse2.padds.") ||
3170 Name.starts_with(Prefix: "avx2.padds.") ||
3171 Name.starts_with(Prefix: "avx512.padds.") ||
3172 Name.starts_with(Prefix: "avx512.mask.padds.")) {
3173 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::sadd_sat);
3174 } else if (Name.starts_with(Prefix: "sse2.psubs.") ||
3175 Name.starts_with(Prefix: "avx2.psubs.") ||
3176 Name.starts_with(Prefix: "avx512.psubs.") ||
3177 Name.starts_with(Prefix: "avx512.mask.psubs.")) {
3178 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::ssub_sat);
3179 } else if (Name.starts_with(Prefix: "sse2.paddus.") ||
3180 Name.starts_with(Prefix: "avx2.paddus.") ||
3181 Name.starts_with(Prefix: "avx512.mask.paddus.")) {
3182 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::uadd_sat);
3183 } else if (Name.starts_with(Prefix: "sse2.psubus.") ||
3184 Name.starts_with(Prefix: "avx2.psubus.") ||
3185 Name.starts_with(Prefix: "avx512.mask.psubus.")) {
3186 Rep = upgradeX86BinaryIntrinsics(Builder, CI&: *CI, IID: Intrinsic::usub_sat);
3187 } else if (Name.starts_with(Prefix: "avx512.mask.palignr.")) {
3188 Rep = upgradeX86ALIGNIntrinsics(Builder, Op0: CI->getArgOperand(i: 0),
3189 Op1: CI->getArgOperand(i: 1), Shift: CI->getArgOperand(i: 2),
3190 Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3191 IsVALIGN: false);
3192 } else if (Name.starts_with(Prefix: "avx512.mask.valign.")) {
3193 Rep = upgradeX86ALIGNIntrinsics(
3194 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3195 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), IsVALIGN: true);
3196 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3197 // 128/256-bit shift left specified in bits.
3198 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3199 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3200 Shift: Shift / 8); // Shift is in bits.
3201 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3202 // 128/256-bit shift right specified in bits.
3203 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3204 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3205 Shift: Shift / 8); // Shift is in bits.
3206 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3207 Name == "avx512.psll.dq.512") {
3208 // 128/256/512-bit shift left specified in bytes.
3209 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3210 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3211 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3212 Name == "avx512.psrl.dq.512") {
3213 // 128/256/512-bit shift right specified in bytes.
3214 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3215 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3216 } else if (Name == "sse41.pblendw" || Name.starts_with(Prefix: "sse41.blendp") ||
3217 Name.starts_with(Prefix: "avx.blend.p") || Name == "avx2.pblendw" ||
3218 Name.starts_with(Prefix: "avx2.pblendd.")) {
3219 Value *Op0 = CI->getArgOperand(i: 0);
3220 Value *Op1 = CI->getArgOperand(i: 1);
3221 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3222 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3223 unsigned NumElts = VecTy->getNumElements();
3224
3225 SmallVector<int, 16> Idxs(NumElts);
3226 for (unsigned i = 0; i != NumElts; ++i)
3227 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3228
3229 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3230 } else if (Name.starts_with(Prefix: "avx.vinsertf128.") ||
3231 Name == "avx2.vinserti128" ||
3232 Name.starts_with(Prefix: "avx512.mask.insert")) {
3233 Value *Op0 = CI->getArgOperand(i: 0);
3234 Value *Op1 = CI->getArgOperand(i: 1);
3235 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3236 unsigned DstNumElts =
3237 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3238 unsigned SrcNumElts =
3239 cast<FixedVectorType>(Val: Op1->getType())->getNumElements();
3240 unsigned Scale = DstNumElts / SrcNumElts;
3241
3242 // Mask off the high bits of the immediate value; hardware ignores those.
3243 Imm = Imm % Scale;
3244
3245 // Extend the second operand into a vector the size of the destination.
3246 SmallVector<int, 8> Idxs(DstNumElts);
3247 for (unsigned i = 0; i != SrcNumElts; ++i)
3248 Idxs[i] = i;
3249 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3250 Idxs[i] = SrcNumElts;
3251 Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs);
3252
3253 // Insert the second operand into the first operand.
3254
3255 // Note that there is no guarantee that instruction lowering will actually
3256 // produce a vinsertf128 instruction for the created shuffles. In
3257 // particular, the 0 immediate case involves no lane changes, so it can
3258 // be handled as a blend.
3259
3260 // Example of shuffle mask for 32-bit elements:
3261 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3262 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3263
3264 // First fill with identify mask.
3265 for (unsigned i = 0; i != DstNumElts; ++i)
3266 Idxs[i] = i;
3267 // Then replace the elements where we need to insert.
3268 for (unsigned i = 0; i != SrcNumElts; ++i)
3269 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3270 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs);
3271
3272 // If the intrinsic has a mask operand, handle that.
3273 if (CI->arg_size() == 5)
3274 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3275 Op1: CI->getArgOperand(i: 3));
3276 } else if (Name.starts_with(Prefix: "avx.vextractf128.") ||
3277 Name == "avx2.vextracti128" ||
3278 Name.starts_with(Prefix: "avx512.mask.vextract")) {
3279 Value *Op0 = CI->getArgOperand(i: 0);
3280 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3281 unsigned DstNumElts =
3282 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3283 unsigned SrcNumElts =
3284 cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
3285 unsigned Scale = SrcNumElts / DstNumElts;
3286
3287 // Mask off the high bits of the immediate value; hardware ignores those.
3288 Imm = Imm % Scale;
3289
3290 // Get indexes for the subvector of the input vector.
3291 SmallVector<int, 8> Idxs(DstNumElts);
3292 for (unsigned i = 0; i != DstNumElts; ++i) {
3293 Idxs[i] = i + (Imm * DstNumElts);
3294 }
3295 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3296
3297 // If the intrinsic has a mask operand, handle that.
3298 if (CI->arg_size() == 4)
3299 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3300 Op1: CI->getArgOperand(i: 2));
3301 } else if (Name.starts_with(Prefix: "avx512.mask.perm.df.") ||
3302 Name.starts_with(Prefix: "avx512.mask.perm.di.")) {
3303 Value *Op0 = CI->getArgOperand(i: 0);
3304 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3305 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3306 unsigned NumElts = VecTy->getNumElements();
3307
3308 SmallVector<int, 8> Idxs(NumElts);
3309 for (unsigned i = 0; i != NumElts; ++i)
3310 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3311
3312 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3313
3314 if (CI->arg_size() == 4)
3315 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3316 Op1: CI->getArgOperand(i: 2));
3317 } else if (Name.starts_with(Prefix: "avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3318 // The immediate permute control byte looks like this:
3319 // [1:0] - select 128 bits from sources for low half of destination
3320 // [2] - ignore
3321 // [3] - zero low half of destination
3322 // [5:4] - select 128 bits from sources for high half of destination
3323 // [6] - ignore
3324 // [7] - zero high half of destination
3325
3326 uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3327
3328 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3329 unsigned HalfSize = NumElts / 2;
3330 SmallVector<int, 8> ShuffleMask(NumElts);
3331
3332 // Determine which operand(s) are actually in use for this instruction.
3333 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3334 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3335
3336 // If needed, replace operands based on zero mask.
3337 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0;
3338 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1;
3339
3340 // Permute low half of result.
3341 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3342 for (unsigned i = 0; i < HalfSize; ++i)
3343 ShuffleMask[i] = StartIndex + i;
3344
3345 // Permute high half of result.
3346 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3347 for (unsigned i = 0; i < HalfSize; ++i)
3348 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3349
3350 Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask);
3351
3352 } else if (Name.starts_with(Prefix: "avx.vpermil.") || Name == "sse2.pshuf.d" ||
3353 Name.starts_with(Prefix: "avx512.mask.vpermil.p") ||
3354 Name.starts_with(Prefix: "avx512.mask.pshuf.d.")) {
3355 Value *Op0 = CI->getArgOperand(i: 0);
3356 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3357 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3358 unsigned NumElts = VecTy->getNumElements();
3359 // Calculate the size of each index in the immediate.
3360 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3361 unsigned IdxMask = ((1 << IdxSize) - 1);
3362
3363 SmallVector<int, 8> Idxs(NumElts);
3364 // Lookup the bits for this element, wrapping around the immediate every
3365 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3366 // to offset by the first index of each group.
3367 for (unsigned i = 0; i != NumElts; ++i)
3368 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3369
3370 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3371
3372 if (CI->arg_size() == 4)
3373 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3374 Op1: CI->getArgOperand(i: 2));
3375 } else if (Name == "sse2.pshufl.w" ||
3376 Name.starts_with(Prefix: "avx512.mask.pshufl.w.")) {
3377 Value *Op0 = CI->getArgOperand(i: 0);
3378 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3379 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3380
3381 SmallVector<int, 16> Idxs(NumElts);
3382 for (unsigned l = 0; l != NumElts; l += 8) {
3383 for (unsigned i = 0; i != 4; ++i)
3384 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3385 for (unsigned i = 4; i != 8; ++i)
3386 Idxs[i + l] = i + l;
3387 }
3388
3389 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3390
3391 if (CI->arg_size() == 4)
3392 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3393 Op1: CI->getArgOperand(i: 2));
3394 } else if (Name == "sse2.pshufh.w" ||
3395 Name.starts_with(Prefix: "avx512.mask.pshufh.w.")) {
3396 Value *Op0 = CI->getArgOperand(i: 0);
3397 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3398 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3399
3400 SmallVector<int, 16> Idxs(NumElts);
3401 for (unsigned l = 0; l != NumElts; l += 8) {
3402 for (unsigned i = 0; i != 4; ++i)
3403 Idxs[i + l] = i + l;
3404 for (unsigned i = 0; i != 4; ++i)
3405 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3406 }
3407
3408 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3409
3410 if (CI->arg_size() == 4)
3411 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3412 Op1: CI->getArgOperand(i: 2));
3413 } else if (Name.starts_with(Prefix: "avx512.mask.shuf.p")) {
3414 Value *Op0 = CI->getArgOperand(i: 0);
3415 Value *Op1 = CI->getArgOperand(i: 1);
3416 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3417 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3418
3419 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3420 unsigned HalfLaneElts = NumLaneElts / 2;
3421
3422 SmallVector<int, 16> Idxs(NumElts);
3423 for (unsigned i = 0; i != NumElts; ++i) {
3424 // Base index is the starting element of the lane.
3425 Idxs[i] = i - (i % NumLaneElts);
3426 // If we are half way through the lane switch to the other source.
3427 if ((i % NumLaneElts) >= HalfLaneElts)
3428 Idxs[i] += NumElts;
3429 // Now select the specific element. By adding HalfLaneElts bits from
3430 // the immediate. Wrapping around the immediate every 8-bits.
3431 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3432 }
3433
3434 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3435
3436 Rep =
3437 emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: CI->getArgOperand(i: 3));
3438 } else if (Name.starts_with(Prefix: "avx512.mask.movddup") ||
3439 Name.starts_with(Prefix: "avx512.mask.movshdup") ||
3440 Name.starts_with(Prefix: "avx512.mask.movsldup")) {
3441 Value *Op0 = CI->getArgOperand(i: 0);
3442 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3443 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3444
3445 unsigned Offset = 0;
3446 if (Name.starts_with(Prefix: "avx512.mask.movshdup."))
3447 Offset = 1;
3448
3449 SmallVector<int, 16> Idxs(NumElts);
3450 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3451 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3452 Idxs[i + l + 0] = i + l + Offset;
3453 Idxs[i + l + 1] = i + l + Offset;
3454 }
3455
3456 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3457
3458 Rep =
3459 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3460 } else if (Name.starts_with(Prefix: "avx512.mask.punpckl") ||
3461 Name.starts_with(Prefix: "avx512.mask.unpckl.")) {
3462 Value *Op0 = CI->getArgOperand(i: 0);
3463 Value *Op1 = CI->getArgOperand(i: 1);
3464 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3465 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3466
3467 SmallVector<int, 64> Idxs(NumElts);
3468 for (int l = 0; l != NumElts; l += NumLaneElts)
3469 for (int i = 0; i != NumLaneElts; ++i)
3470 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3471
3472 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3473
3474 Rep =
3475 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3476 } else if (Name.starts_with(Prefix: "avx512.mask.punpckh") ||
3477 Name.starts_with(Prefix: "avx512.mask.unpckh.")) {
3478 Value *Op0 = CI->getArgOperand(i: 0);
3479 Value *Op1 = CI->getArgOperand(i: 1);
3480 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3481 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3482
3483 SmallVector<int, 64> Idxs(NumElts);
3484 for (int l = 0; l != NumElts; l += NumLaneElts)
3485 for (int i = 0; i != NumLaneElts; ++i)
3486 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3487
3488 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3489
3490 Rep =
3491 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3492 } else if (Name.starts_with(Prefix: "avx512.mask.and.") ||
3493 Name.starts_with(Prefix: "avx512.mask.pand.")) {
3494 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3495 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3496 Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3497 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3498 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3499 Rep =
3500 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3501 } else if (Name.starts_with(Prefix: "avx512.mask.andn.") ||
3502 Name.starts_with(Prefix: "avx512.mask.pandn.")) {
3503 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3504 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3505 Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy));
3506 Rep = Builder.CreateAnd(LHS: Rep,
3507 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3508 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3509 Rep =
3510 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3511 } else if (Name.starts_with(Prefix: "avx512.mask.or.") ||
3512 Name.starts_with(Prefix: "avx512.mask.por.")) {
3513 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3514 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3515 Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3516 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3517 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3518 Rep =
3519 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3520 } else if (Name.starts_with(Prefix: "avx512.mask.xor.") ||
3521 Name.starts_with(Prefix: "avx512.mask.pxor.")) {
3522 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3523 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3524 Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3525 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3526 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3527 Rep =
3528 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3529 } else if (Name.starts_with(Prefix: "avx512.mask.padd.")) {
3530 Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3531 Rep =
3532 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3533 } else if (Name.starts_with(Prefix: "avx512.mask.psub.")) {
3534 Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3535 Rep =
3536 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3537 } else if (Name.starts_with(Prefix: "avx512.mask.pmull.")) {
3538 Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3539 Rep =
3540 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3541 } else if (Name.starts_with(Prefix: "avx512.mask.add.p")) {
3542 if (Name.ends_with(Suffix: ".512")) {
3543 Intrinsic::ID IID;
3544 if (Name[17] == 's')
3545 IID = Intrinsic::x86_avx512_add_ps_512;
3546 else
3547 IID = Intrinsic::x86_avx512_add_pd_512;
3548
3549 Rep = Builder.CreateIntrinsic(
3550 ID: IID,
3551 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3552 } else {
3553 Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3554 }
3555 Rep =
3556 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3557 } else if (Name.starts_with(Prefix: "avx512.mask.div.p")) {
3558 if (Name.ends_with(Suffix: ".512")) {
3559 Intrinsic::ID IID;
3560 if (Name[17] == 's')
3561 IID = Intrinsic::x86_avx512_div_ps_512;
3562 else
3563 IID = Intrinsic::x86_avx512_div_pd_512;
3564
3565 Rep = Builder.CreateIntrinsic(
3566 ID: IID,
3567 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3568 } else {
3569 Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3570 }
3571 Rep =
3572 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3573 } else if (Name.starts_with(Prefix: "avx512.mask.mul.p")) {
3574 if (Name.ends_with(Suffix: ".512")) {
3575 Intrinsic::ID IID;
3576 if (Name[17] == 's')
3577 IID = Intrinsic::x86_avx512_mul_ps_512;
3578 else
3579 IID = Intrinsic::x86_avx512_mul_pd_512;
3580
3581 Rep = Builder.CreateIntrinsic(
3582 ID: IID,
3583 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3584 } else {
3585 Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3586 }
3587 Rep =
3588 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3589 } else if (Name.starts_with(Prefix: "avx512.mask.sub.p")) {
3590 if (Name.ends_with(Suffix: ".512")) {
3591 Intrinsic::ID IID;
3592 if (Name[17] == 's')
3593 IID = Intrinsic::x86_avx512_sub_ps_512;
3594 else
3595 IID = Intrinsic::x86_avx512_sub_pd_512;
3596
3597 Rep = Builder.CreateIntrinsic(
3598 ID: IID,
3599 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3600 } else {
3601 Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3602 }
3603 Rep =
3604 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3605 } else if ((Name.starts_with(Prefix: "avx512.mask.max.p") ||
3606 Name.starts_with(Prefix: "avx512.mask.min.p")) &&
3607 Name.drop_front(N: 18) == ".512") {
3608 bool IsDouble = Name[17] == 'd';
3609 bool IsMin = Name[13] == 'i';
3610 static const Intrinsic::ID MinMaxTbl[2][2] = {
3611 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3612 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3613 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3614
3615 Rep = Builder.CreateIntrinsic(
3616 ID: IID,
3617 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 4)});
3618 Rep =
3619 emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: CI->getArgOperand(i: 2));
3620 } else if (Name.starts_with(Prefix: "avx512.mask.lzcnt.")) {
3621 Rep =
3622 Builder.CreateIntrinsic(ID: Intrinsic::ctlz, Types: CI->getType(),
3623 Args: {CI->getArgOperand(i: 0), Builder.getInt1(V: false)});
3624 Rep =
3625 emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, Op1: CI->getArgOperand(i: 1));
3626 } else if (Name.starts_with(Prefix: "avx512.mask.psll")) {
3627 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3628 bool IsVariable = Name[16] == 'v';
3629 char Size = Name[16] == '.' ? Name[17]
3630 : Name[17] == '.' ? Name[18]
3631 : Name[18] == '.' ? Name[19]
3632 : Name[20];
3633
3634 Intrinsic::ID IID;
3635 if (IsVariable && Name[17] != '.') {
3636 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3637 IID = Intrinsic::x86_avx2_psllv_q;
3638 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3639 IID = Intrinsic::x86_avx2_psllv_q_256;
3640 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3641 IID = Intrinsic::x86_avx2_psllv_d;
3642 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3643 IID = Intrinsic::x86_avx2_psllv_d_256;
3644 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3645 IID = Intrinsic::x86_avx512_psllv_w_128;
3646 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3647 IID = Intrinsic::x86_avx512_psllv_w_256;
3648 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3649 IID = Intrinsic::x86_avx512_psllv_w_512;
3650 else
3651 llvm_unreachable("Unexpected size");
3652 } else if (Name.ends_with(Suffix: ".128")) {
3653 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3654 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3655 : Intrinsic::x86_sse2_psll_d;
3656 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3657 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3658 : Intrinsic::x86_sse2_psll_q;
3659 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3660 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3661 : Intrinsic::x86_sse2_psll_w;
3662 else
3663 llvm_unreachable("Unexpected size");
3664 } else if (Name.ends_with(Suffix: ".256")) {
3665 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3666 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3667 : Intrinsic::x86_avx2_psll_d;
3668 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3669 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3670 : Intrinsic::x86_avx2_psll_q;
3671 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3672 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3673 : Intrinsic::x86_avx2_psll_w;
3674 else
3675 llvm_unreachable("Unexpected size");
3676 } else {
3677 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3678 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3679 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3680 : Intrinsic::x86_avx512_psll_d_512;
3681 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3682 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3683 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3684 : Intrinsic::x86_avx512_psll_q_512;
3685 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3686 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3687 : Intrinsic::x86_avx512_psll_w_512;
3688 else
3689 llvm_unreachable("Unexpected size");
3690 }
3691
3692 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3693 } else if (Name.starts_with(Prefix: "avx512.mask.psrl")) {
3694 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3695 bool IsVariable = Name[16] == 'v';
3696 char Size = Name[16] == '.' ? Name[17]
3697 : Name[17] == '.' ? Name[18]
3698 : Name[18] == '.' ? Name[19]
3699 : Name[20];
3700
3701 Intrinsic::ID IID;
3702 if (IsVariable && Name[17] != '.') {
3703 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3704 IID = Intrinsic::x86_avx2_psrlv_q;
3705 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3706 IID = Intrinsic::x86_avx2_psrlv_q_256;
3707 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3708 IID = Intrinsic::x86_avx2_psrlv_d;
3709 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3710 IID = Intrinsic::x86_avx2_psrlv_d_256;
3711 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3712 IID = Intrinsic::x86_avx512_psrlv_w_128;
3713 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3714 IID = Intrinsic::x86_avx512_psrlv_w_256;
3715 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3716 IID = Intrinsic::x86_avx512_psrlv_w_512;
3717 else
3718 llvm_unreachable("Unexpected size");
3719 } else if (Name.ends_with(Suffix: ".128")) {
3720 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3721 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3722 : Intrinsic::x86_sse2_psrl_d;
3723 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3724 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3725 : Intrinsic::x86_sse2_psrl_q;
3726 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3727 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3728 : Intrinsic::x86_sse2_psrl_w;
3729 else
3730 llvm_unreachable("Unexpected size");
3731 } else if (Name.ends_with(Suffix: ".256")) {
3732 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3733 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3734 : Intrinsic::x86_avx2_psrl_d;
3735 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3736 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3737 : Intrinsic::x86_avx2_psrl_q;
3738 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3739 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3740 : Intrinsic::x86_avx2_psrl_w;
3741 else
3742 llvm_unreachable("Unexpected size");
3743 } else {
3744 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3745 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3746 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3747 : Intrinsic::x86_avx512_psrl_d_512;
3748 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3749 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3750 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3751 : Intrinsic::x86_avx512_psrl_q_512;
3752 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3753 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3754 : Intrinsic::x86_avx512_psrl_w_512;
3755 else
3756 llvm_unreachable("Unexpected size");
3757 }
3758
3759 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3760 } else if (Name.starts_with(Prefix: "avx512.mask.psra")) {
3761 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3762 bool IsVariable = Name[16] == 'v';
3763 char Size = Name[16] == '.' ? Name[17]
3764 : Name[17] == '.' ? Name[18]
3765 : Name[18] == '.' ? Name[19]
3766 : Name[20];
3767
3768 Intrinsic::ID IID;
3769 if (IsVariable && Name[17] != '.') {
3770 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3771 IID = Intrinsic::x86_avx2_psrav_d;
3772 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3773 IID = Intrinsic::x86_avx2_psrav_d_256;
3774 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3775 IID = Intrinsic::x86_avx512_psrav_w_128;
3776 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3777 IID = Intrinsic::x86_avx512_psrav_w_256;
3778 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3779 IID = Intrinsic::x86_avx512_psrav_w_512;
3780 else
3781 llvm_unreachable("Unexpected size");
3782 } else if (Name.ends_with(Suffix: ".128")) {
3783 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3784 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3785 : Intrinsic::x86_sse2_psra_d;
3786 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3787 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3788 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3789 : Intrinsic::x86_avx512_psra_q_128;
3790 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3791 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3792 : Intrinsic::x86_sse2_psra_w;
3793 else
3794 llvm_unreachable("Unexpected size");
3795 } else if (Name.ends_with(Suffix: ".256")) {
3796 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3797 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3798 : Intrinsic::x86_avx2_psra_d;
3799 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3800 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3801 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3802 : Intrinsic::x86_avx512_psra_q_256;
3803 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3804 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3805 : Intrinsic::x86_avx2_psra_w;
3806 else
3807 llvm_unreachable("Unexpected size");
3808 } else {
3809 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3810 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3811 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3812 : Intrinsic::x86_avx512_psra_d_512;
3813 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3814 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3815 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3816 : Intrinsic::x86_avx512_psra_q_512;
3817 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3818 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3819 : Intrinsic::x86_avx512_psra_w_512;
3820 else
3821 llvm_unreachable("Unexpected size");
3822 }
3823
3824 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3825 } else if (Name.starts_with(Prefix: "avx512.mask.move.s")) {
3826 Rep = upgradeMaskedMove(Builder, CI&: *CI);
3827 } else if (Name.starts_with(Prefix: "avx512.cvtmask2")) {
3828 Rep = upgradeMaskToInt(Builder, CI&: *CI);
3829 } else if (Name.ends_with(Suffix: ".movntdqa")) {
3830 MDNode *Node = MDNode::get(
3831 Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
3832
3833 LoadInst *LI = Builder.CreateAlignedLoad(
3834 Ty: CI->getType(), Ptr: CI->getArgOperand(i: 0),
3835 Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3836 LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
3837 Rep = LI;
3838 } else if (Name.starts_with(Prefix: "fma.vfmadd.") ||
3839 Name.starts_with(Prefix: "fma.vfmsub.") ||
3840 Name.starts_with(Prefix: "fma.vfnmadd.") ||
3841 Name.starts_with(Prefix: "fma.vfnmsub.")) {
3842 bool NegMul = Name[6] == 'n';
3843 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3844 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3845
3846 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3847 CI->getArgOperand(i: 2)};
3848
3849 if (IsScalar) {
3850 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
3851 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
3852 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
3853 }
3854
3855 if (NegMul && !IsScalar)
3856 Ops[0] = Builder.CreateFNeg(V: Ops[0]);
3857 if (NegMul && IsScalar)
3858 Ops[1] = Builder.CreateFNeg(V: Ops[1]);
3859 if (NegAcc)
3860 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
3861
3862 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
3863
3864 if (IsScalar)
3865 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
3866 } else if (Name.starts_with(Prefix: "fma4.vfmadd.s")) {
3867 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3868 CI->getArgOperand(i: 2)};
3869
3870 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
3871 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
3872 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
3873
3874 Rep = Builder.CreateIntrinsic(ID: Intrinsic::fma, Types: Ops[0]->getType(), Args: Ops);
3875
3876 Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()),
3877 NewElt: Rep, Idx: (uint64_t)0);
3878 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.s") ||
3879 Name.starts_with(Prefix: "avx512.maskz.vfmadd.s") ||
3880 Name.starts_with(Prefix: "avx512.mask3.vfmadd.s") ||
3881 Name.starts_with(Prefix: "avx512.mask3.vfmsub.s") ||
3882 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s")) {
3883 bool IsMask3 = Name[11] == '3';
3884 bool IsMaskZ = Name[11] == 'z';
3885 // Drop the "avx512.mask." to make it easier.
3886 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
3887 bool NegMul = Name[2] == 'n';
3888 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3889
3890 Value *A = CI->getArgOperand(i: 0);
3891 Value *B = CI->getArgOperand(i: 1);
3892 Value *C = CI->getArgOperand(i: 2);
3893
3894 if (NegMul && (IsMask3 || IsMaskZ))
3895 A = Builder.CreateFNeg(V: A);
3896 if (NegMul && !(IsMask3 || IsMaskZ))
3897 B = Builder.CreateFNeg(V: B);
3898 if (NegAcc)
3899 C = Builder.CreateFNeg(V: C);
3900
3901 A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0);
3902 B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
3903 C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0);
3904
3905 if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
3906 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) {
3907 Value *Ops[] = {A, B, C, CI->getArgOperand(i: 4)};
3908
3909 Intrinsic::ID IID;
3910 if (Name.back() == 'd')
3911 IID = Intrinsic::x86_avx512_vfmadd_f64;
3912 else
3913 IID = Intrinsic::x86_avx512_vfmadd_f32;
3914 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
3915 } else {
3916 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
3917 }
3918
3919 Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType())
3920 : IsMask3 ? C
3921 : A;
3922
3923 // For Mask3 with NegAcc, we need to create a new extractelement that
3924 // avoids the negation above.
3925 if (NegAcc && IsMask3)
3926 PassThru =
3927 Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), Idx: (uint64_t)0);
3928
3929 Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
3930 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), NewElt: Rep,
3931 Idx: (uint64_t)0);
3932 } else if (Name.starts_with(Prefix: "avx512.mask.vfmadd.p") ||
3933 Name.starts_with(Prefix: "avx512.mask.vfnmadd.p") ||
3934 Name.starts_with(Prefix: "avx512.mask.vfnmsub.p") ||
3935 Name.starts_with(Prefix: "avx512.mask3.vfmadd.p") ||
3936 Name.starts_with(Prefix: "avx512.mask3.vfmsub.p") ||
3937 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p") ||
3938 Name.starts_with(Prefix: "avx512.maskz.vfmadd.p")) {
3939 bool IsMask3 = Name[11] == '3';
3940 bool IsMaskZ = Name[11] == 'z';
3941 // Drop the "avx512.mask." to make it easier.
3942 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
3943 bool NegMul = Name[2] == 'n';
3944 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3945
3946 Value *A = CI->getArgOperand(i: 0);
3947 Value *B = CI->getArgOperand(i: 1);
3948 Value *C = CI->getArgOperand(i: 2);
3949
3950 if (NegMul && (IsMask3 || IsMaskZ))
3951 A = Builder.CreateFNeg(V: A);
3952 if (NegMul && !(IsMask3 || IsMaskZ))
3953 B = Builder.CreateFNeg(V: B);
3954 if (NegAcc)
3955 C = Builder.CreateFNeg(V: C);
3956
3957 if (CI->arg_size() == 5 &&
3958 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
3959 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) {
3960 Intrinsic::ID IID;
3961 // Check the character before ".512" in string.
3962 if (Name[Name.size() - 5] == 's')
3963 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3964 else
3965 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3966
3967 Rep = Builder.CreateIntrinsic(ID: IID, Args: {A, B, C, CI->getArgOperand(i: 4)});
3968 } else {
3969 Rep = Builder.CreateFMA(Factor1: A, Factor2: B, Summand: C);
3970 }
3971
3972 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
3973 : IsMask3 ? CI->getArgOperand(i: 2)
3974 : CI->getArgOperand(i: 0);
3975
3976 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
3977 } else if (Name.starts_with(Prefix: "fma.vfmsubadd.p")) {
3978 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3979 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3980 Intrinsic::ID IID;
3981 if (VecWidth == 128 && EltWidth == 32)
3982 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3983 else if (VecWidth == 256 && EltWidth == 32)
3984 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3985 else if (VecWidth == 128 && EltWidth == 64)
3986 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3987 else if (VecWidth == 256 && EltWidth == 64)
3988 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3989 else
3990 llvm_unreachable("Unexpected intrinsic");
3991
3992 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3993 CI->getArgOperand(i: 2)};
3994 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
3995 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
3996 } else if (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p") ||
3997 Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p") ||
3998 Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p") ||
3999 Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p")) {
4000 bool IsMask3 = Name[11] == '3';
4001 bool IsMaskZ = Name[11] == 'z';
4002 // Drop the "avx512.mask." to make it easier.
4003 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
4004 bool IsSubAdd = Name[3] == 's';
4005 if (CI->arg_size() == 5) {
4006 Intrinsic::ID IID;
4007 // Check the character before ".512" in string.
4008 if (Name[Name.size() - 5] == 's')
4009 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4010 else
4011 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4012
4013 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4014 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
4015 if (IsSubAdd)
4016 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4017
4018 Rep = Builder.CreateIntrinsic(ID: IID, Args: Ops);
4019 } else {
4020 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4021
4022 Value *Ops[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4023 CI->getArgOperand(i: 2)};
4024
4025 Function *FMA = Intrinsic::getOrInsertDeclaration(
4026 M: CI->getModule(), id: Intrinsic::fma, Tys: Ops[0]->getType());
4027 Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops);
4028 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4029 Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops);
4030
4031 if (IsSubAdd)
4032 std::swap(a&: Even, b&: Odd);
4033
4034 SmallVector<int, 32> Idxs(NumElts);
4035 for (int i = 0; i != NumElts; ++i)
4036 Idxs[i] = i + (i % 2) * NumElts;
4037
4038 Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs);
4039 }
4040
4041 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType())
4042 : IsMask3 ? CI->getArgOperand(i: 2)
4043 : CI->getArgOperand(i: 0);
4044
4045 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4046 } else if (Name.starts_with(Prefix: "avx512.mask.pternlog.") ||
4047 Name.starts_with(Prefix: "avx512.maskz.pternlog.")) {
4048 bool ZeroMask = Name[11] == 'z';
4049 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4050 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4051 Intrinsic::ID IID;
4052 if (VecWidth == 128 && EltWidth == 32)
4053 IID = Intrinsic::x86_avx512_pternlog_d_128;
4054 else if (VecWidth == 256 && EltWidth == 32)
4055 IID = Intrinsic::x86_avx512_pternlog_d_256;
4056 else if (VecWidth == 512 && EltWidth == 32)
4057 IID = Intrinsic::x86_avx512_pternlog_d_512;
4058 else if (VecWidth == 128 && EltWidth == 64)
4059 IID = Intrinsic::x86_avx512_pternlog_q_128;
4060 else if (VecWidth == 256 && EltWidth == 64)
4061 IID = Intrinsic::x86_avx512_pternlog_q_256;
4062 else if (VecWidth == 512 && EltWidth == 64)
4063 IID = Intrinsic::x86_avx512_pternlog_q_512;
4064 else
4065 llvm_unreachable("Unexpected intrinsic");
4066
4067 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4068 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)};
4069 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4070 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4071 : CI->getArgOperand(i: 0);
4072 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru);
4073 } else if (Name.starts_with(Prefix: "avx512.mask.vpmadd52") ||
4074 Name.starts_with(Prefix: "avx512.maskz.vpmadd52")) {
4075 bool ZeroMask = Name[11] == 'z';
4076 bool High = Name[20] == 'h' || Name[21] == 'h';
4077 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4078 Intrinsic::ID IID;
4079 if (VecWidth == 128 && !High)
4080 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4081 else if (VecWidth == 256 && !High)
4082 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4083 else if (VecWidth == 512 && !High)
4084 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4085 else if (VecWidth == 128 && High)
4086 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4087 else if (VecWidth == 256 && High)
4088 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4089 else if (VecWidth == 512 && High)
4090 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4091 else
4092 llvm_unreachable("Unexpected intrinsic");
4093
4094 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4095 CI->getArgOperand(i: 2)};
4096 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4097 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4098 : CI->getArgOperand(i: 0);
4099 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4100 } else if (Name.starts_with(Prefix: "avx512.mask.vpermi2var.") ||
4101 Name.starts_with(Prefix: "avx512.mask.vpermt2var.") ||
4102 Name.starts_with(Prefix: "avx512.maskz.vpermt2var.")) {
4103 bool ZeroMask = Name[11] == 'z';
4104 bool IndexForm = Name[17] == 'i';
4105 Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm);
4106 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpbusd.") ||
4107 Name.starts_with(Prefix: "avx512.maskz.vpdpbusd.") ||
4108 Name.starts_with(Prefix: "avx512.mask.vpdpbusds.") ||
4109 Name.starts_with(Prefix: "avx512.maskz.vpdpbusds.")) {
4110 bool ZeroMask = Name[11] == 'z';
4111 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4112 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4113 Intrinsic::ID IID;
4114 if (VecWidth == 128 && !IsSaturating)
4115 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4116 else if (VecWidth == 256 && !IsSaturating)
4117 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4118 else if (VecWidth == 512 && !IsSaturating)
4119 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4120 else if (VecWidth == 128 && IsSaturating)
4121 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4122 else if (VecWidth == 256 && IsSaturating)
4123 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4124 else if (VecWidth == 512 && IsSaturating)
4125 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4126 else
4127 llvm_unreachable("Unexpected intrinsic");
4128
4129 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4130 CI->getArgOperand(i: 2)};
4131 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4132 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4133 : CI->getArgOperand(i: 0);
4134 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4135 } else if (Name.starts_with(Prefix: "avx512.mask.vpdpwssd.") ||
4136 Name.starts_with(Prefix: "avx512.maskz.vpdpwssd.") ||
4137 Name.starts_with(Prefix: "avx512.mask.vpdpwssds.") ||
4138 Name.starts_with(Prefix: "avx512.maskz.vpdpwssds.")) {
4139 bool ZeroMask = Name[11] == 'z';
4140 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4141 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4142 Intrinsic::ID IID;
4143 if (VecWidth == 128 && !IsSaturating)
4144 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4145 else if (VecWidth == 256 && !IsSaturating)
4146 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4147 else if (VecWidth == 512 && !IsSaturating)
4148 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4149 else if (VecWidth == 128 && IsSaturating)
4150 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4151 else if (VecWidth == 256 && IsSaturating)
4152 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4153 else if (VecWidth == 512 && IsSaturating)
4154 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4155 else
4156 llvm_unreachable("Unexpected intrinsic");
4157
4158 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4159 CI->getArgOperand(i: 2)};
4160 Rep = Builder.CreateIntrinsic(ID: IID, Args);
4161 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4162 : CI->getArgOperand(i: 0);
4163 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4164 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4165 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4166 Name == "subborrow.u32" || Name == "subborrow.u64") {
4167 Intrinsic::ID IID;
4168 if (Name[0] == 'a' && Name.back() == '2')
4169 IID = Intrinsic::x86_addcarry_32;
4170 else if (Name[0] == 'a' && Name.back() == '4')
4171 IID = Intrinsic::x86_addcarry_64;
4172 else if (Name[0] == 's' && Name.back() == '2')
4173 IID = Intrinsic::x86_subborrow_32;
4174 else if (Name[0] == 's' && Name.back() == '4')
4175 IID = Intrinsic::x86_subborrow_64;
4176 else
4177 llvm_unreachable("Unexpected intrinsic");
4178
4179 // Make a call with 3 operands.
4180 Value *Args[] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4181 CI->getArgOperand(i: 2)};
4182 Value *NewCall = Builder.CreateIntrinsic(ID: IID, Args);
4183
4184 // Extract the second result and store it.
4185 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4186 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 3), Align: Align(1));
4187 // Replace the original call result with the first result of the new call.
4188 Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4189
4190 CI->replaceAllUsesWith(V: CF);
4191 Rep = nullptr;
4192 } else if (Name.starts_with(Prefix: "avx512.mask.") &&
4193 upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) {
4194 // Rep will be updated by the call in the condition.
4195 }
4196
4197 return Rep;
4198}
4199
4200static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4201 Function *F, IRBuilder<> &Builder) {
4202 if (Name.starts_with(Prefix: "neon.bfcvt")) {
4203 if (Name.starts_with(Prefix: "neon.bfcvtn2")) {
4204 SmallVector<int, 32> LoMask(4);
4205 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
4206 SmallVector<int, 32> ConcatMask(8);
4207 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4208 Value *Inactive = Builder.CreateShuffleVector(V: CI->getOperand(i_nocapture: 0), Mask: LoMask);
4209 Value *Trunc =
4210 Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 1), DestTy: Inactive->getType());
4211 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
4212 } else if (Name.starts_with(Prefix: "neon.bfcvtn")) {
4213 SmallVector<int, 32> ConcatMask(8);
4214 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
4215 Type *V4BF16 =
4216 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: F->getContext()), NumElts: 4);
4217 Value *Trunc = Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0), DestTy: V4BF16);
4218 dbgs() << "Trunc: " << *Trunc << "\n";
4219 return Builder.CreateShuffleVector(
4220 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
4221 } else {
4222 return Builder.CreateFPTrunc(V: CI->getOperand(i_nocapture: 0),
4223 DestTy: Type::getBFloatTy(C&: F->getContext()));
4224 }
4225 } else if (Name.starts_with(Prefix: "sve.fcvt")) {
4226 Intrinsic::ID NewID =
4227 StringSwitch<Intrinsic::ID>(Name)
4228 .Case(S: "sve.fcvt.bf16f32", Value: Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4229 .Case(S: "sve.fcvtnt.bf16f32",
4230 Value: Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4231 .Default(Value: Intrinsic::not_intrinsic);
4232 if (NewID == Intrinsic::not_intrinsic)
4233 llvm_unreachable("Unhandled Intrinsic!");
4234
4235 SmallVector<Value *, 3> Args(CI->args());
4236
4237 // The original intrinsics incorrectly used a predicate based on the
4238 // smallest element type rather than the largest.
4239 Type *BadPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
4240 Type *GoodPredTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
4241
4242 if (Args[1]->getType() != BadPredTy)
4243 llvm_unreachable("Unexpected predicate type!");
4244
4245 Args[1] = Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_to_svbool,
4246 Types: BadPredTy, Args: Args[1]);
4247 Args[1] = Builder.CreateIntrinsic(
4248 ID: Intrinsic::aarch64_sve_convert_from_svbool, Types: GoodPredTy, Args: Args[1]);
4249
4250 return Builder.CreateIntrinsic(ID: NewID, Args, /*FMFSource=*/nullptr,
4251 Name: CI->getName());
4252 }
4253
4254 llvm_unreachable("Unhandled Intrinsic!");
4255}
4256
4257static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4258 IRBuilder<> &Builder) {
4259 if (Name == "mve.vctp64.old") {
4260 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4261 // correct type.
4262 Value *VCTP = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_vctp64, Types: {},
4263 Args: CI->getArgOperand(i: 0),
4264 /*FMFSource=*/nullptr, Name: CI->getName());
4265 Value *C1 = Builder.CreateIntrinsic(
4266 ID: Intrinsic::arm_mve_pred_v2i,
4267 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 2, Scalable: false)}, Args: VCTP);
4268 return Builder.CreateIntrinsic(
4269 ID: Intrinsic::arm_mve_pred_i2v,
4270 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: C1);
4271 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4272 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4273 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4274 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4275 Name ==
4276 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4277 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4278 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4279 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4280 Name ==
4281 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4282 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4283 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4284 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4285 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4286 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4287 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4288 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4289 std::vector<Type *> Tys;
4290 unsigned ID = CI->getIntrinsicID();
4291 Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2);
4292 switch (ID) {
4293 case Intrinsic::arm_mve_mull_int_predicated:
4294 case Intrinsic::arm_mve_vqdmull_predicated:
4295 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4296 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty};
4297 break;
4298 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4299 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4300 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4301 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4302 V2I1Ty};
4303 break;
4304 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4305 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(),
4306 CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4307 break;
4308 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4309 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(),
4310 CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty};
4311 break;
4312 case Intrinsic::arm_cde_vcx1q_predicated:
4313 case Intrinsic::arm_cde_vcx1qa_predicated:
4314 case Intrinsic::arm_cde_vcx2q_predicated:
4315 case Intrinsic::arm_cde_vcx2qa_predicated:
4316 case Intrinsic::arm_cde_vcx3q_predicated:
4317 case Intrinsic::arm_cde_vcx3qa_predicated:
4318 Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
4319 break;
4320 default:
4321 llvm_unreachable("Unhandled Intrinsic!");
4322 }
4323
4324 std::vector<Value *> Ops;
4325 for (Value *Op : CI->args()) {
4326 Type *Ty = Op->getType();
4327 if (Ty->getScalarSizeInBits() == 1) {
4328 Value *C1 = Builder.CreateIntrinsic(
4329 ID: Intrinsic::arm_mve_pred_v2i,
4330 Types: {VectorType::get(ElementType: Builder.getInt1Ty(), NumElements: 4, Scalable: false)}, Args: Op);
4331 Op = Builder.CreateIntrinsic(ID: Intrinsic::arm_mve_pred_i2v, Types: {V2I1Ty}, Args: C1);
4332 }
4333 Ops.push_back(x: Op);
4334 }
4335
4336 return Builder.CreateIntrinsic(ID, Types: Tys, Args: Ops, /*FMFSource=*/nullptr,
4337 Name: CI->getName());
4338 }
4339 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4340}
4341
4342// These are expected to have the arguments:
4343// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4344//
4345// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4346//
4347static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4348 Function *F, IRBuilder<> &Builder) {
4349 AtomicRMWInst::BinOp RMWOp =
4350 StringSwitch<AtomicRMWInst::BinOp>(Name)
4351 .StartsWith(S: "ds.fadd", Value: AtomicRMWInst::FAdd)
4352 .StartsWith(S: "ds.fmin", Value: AtomicRMWInst::FMin)
4353 .StartsWith(S: "ds.fmax", Value: AtomicRMWInst::FMax)
4354 .StartsWith(S: "atomic.inc.", Value: AtomicRMWInst::UIncWrap)
4355 .StartsWith(S: "atomic.dec.", Value: AtomicRMWInst::UDecWrap)
4356 .StartsWith(S: "global.atomic.fadd", Value: AtomicRMWInst::FAdd)
4357 .StartsWith(S: "flat.atomic.fadd", Value: AtomicRMWInst::FAdd)
4358 .StartsWith(S: "global.atomic.fmin", Value: AtomicRMWInst::FMin)
4359 .StartsWith(S: "flat.atomic.fmin", Value: AtomicRMWInst::FMin)
4360 .StartsWith(S: "global.atomic.fmax", Value: AtomicRMWInst::FMax)
4361 .StartsWith(S: "flat.atomic.fmax", Value: AtomicRMWInst::FMax);
4362
4363 unsigned NumOperands = CI->getNumOperands();
4364 if (NumOperands < 3) // Malformed bitcode.
4365 return nullptr;
4366
4367 Value *Ptr = CI->getArgOperand(i: 0);
4368 PointerType *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType());
4369 if (!PtrTy) // Malformed.
4370 return nullptr;
4371
4372 Value *Val = CI->getArgOperand(i: 1);
4373 if (Val->getType() != CI->getType()) // Malformed.
4374 return nullptr;
4375
4376 ConstantInt *OrderArg = nullptr;
4377 bool IsVolatile = false;
4378
4379 // These should have 5 arguments (plus the callee). A separate version of the
4380 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4381 if (NumOperands > 3)
4382 OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
4383
4384 // Ignore scope argument at 3
4385
4386 if (NumOperands > 5) {
4387 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4));
4388 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4389 }
4390
4391 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4392 if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue()))
4393 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4394 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4395 Order = AtomicOrdering::SequentiallyConsistent;
4396
4397 LLVMContext &Ctx = F->getContext();
4398
4399 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4400 Type *RetTy = CI->getType();
4401 if (VectorType *VT = dyn_cast<VectorType>(Val: RetTy)) {
4402 if (VT->getElementType()->isIntegerTy(Bitwidth: 16)) {
4403 VectorType *AsBF16 =
4404 VectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), EC: VT->getElementCount());
4405 Val = Builder.CreateBitCast(V: Val, DestTy: AsBF16);
4406 }
4407 }
4408
4409 // The scope argument never really worked correctly. Use agent as the most
4410 // conservative option which should still always produce the instruction.
4411 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID(SSN: "agent");
4412 AtomicRMWInst *RMW =
4413 Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID);
4414
4415 unsigned AddrSpace = PtrTy->getAddressSpace();
4416 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4417 MDNode *EmptyMD = MDNode::get(Context&: F->getContext(), MDs: {});
4418 RMW->setMetadata(Kind: "amdgpu.no.fine.grained.memory", Node: EmptyMD);
4419 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4420 RMW->setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: EmptyMD);
4421 }
4422
4423 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4424 MDBuilder MDB(F->getContext());
4425 MDNode *RangeNotPrivate =
4426 MDB.createRange(Lo: APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4427 Hi: APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4428 RMW->setMetadata(KindID: LLVMContext::MD_noalias_addrspace, Node: RangeNotPrivate);
4429 }
4430
4431 if (IsVolatile)
4432 RMW->setVolatile(true);
4433
4434 return Builder.CreateBitCast(V: RMW, DestTy: RetTy);
4435}
4436
4437/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4438/// plain MDNode, as it's the verifier's job to check these are the correct
4439/// types later.
4440static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4441 if (Op < CI->arg_size()) {
4442 if (MetadataAsValue *MAV =
4443 dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) {
4444 Metadata *MD = MAV->getMetadata();
4445 return dyn_cast_if_present<MDNode>(Val: MD);
4446 }
4447 }
4448 return nullptr;
4449}
4450
4451/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4452static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4453 if (Op < CI->arg_size())
4454 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op)))
4455 return MAV->getMetadata();
4456 return nullptr;
4457}
4458
4459static MDNode *getDebugLocSafe(const Instruction *I) {
4460 // The MDNode attached to this instruction might not be the correct type,
4461 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4462 return I->getDebugLoc().getAsMDNode();
4463}
4464
4465/// Convert debug intrinsic calls to non-instruction debug records.
4466/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4467/// \p CI - The debug intrinsic call.
4468static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4469 DbgRecord *DR = nullptr;
4470 if (Name == "label") {
4471 DR = DbgLabelRecord::createUnresolvedDbgLabelRecord(Label: unwrapMAVOp(CI, Op: 0),
4472 DL: CI->getDebugLoc());
4473 } else if (Name == "assign") {
4474 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4475 Type: DbgVariableRecord::LocationType::Assign, Val: unwrapMAVMetadataOp(CI, Op: 0),
4476 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: unwrapMAVOp(CI, Op: 3),
4477 Address: unwrapMAVMetadataOp(CI, Op: 4),
4478 /*The address is a Value ref, it will be stored as a Metadata */
4479 AddressExpression: unwrapMAVOp(CI, Op: 5), DI: getDebugLocSafe(I: CI));
4480 } else if (Name == "declare") {
4481 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4482 Type: DbgVariableRecord::LocationType::Declare, Val: unwrapMAVMetadataOp(CI, Op: 0),
4483 Variable: unwrapMAVOp(CI, Op: 1), Expression: unwrapMAVOp(CI, Op: 2), AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4484 DI: getDebugLocSafe(I: CI));
4485 } else if (Name == "addr") {
4486 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4487 MDNode *ExprNode = unwrapMAVOp(CI, Op: 2);
4488 // Don't try to add something to the expression if it's not an expression.
4489 // Instead, allow the verifier to fail later.
4490 if (DIExpression *Expr = dyn_cast<DIExpression>(Val: ExprNode)) {
4491 ExprNode = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
4492 }
4493 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4494 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4495 Variable: unwrapMAVOp(CI, Op: 1), Expression: ExprNode, AssignID: nullptr, Address: nullptr, AddressExpression: nullptr,
4496 DI: getDebugLocSafe(I: CI));
4497 } else if (Name == "value") {
4498 // An old version of dbg.value had an extra offset argument.
4499 unsigned VarOp = 1;
4500 unsigned ExprOp = 2;
4501 if (CI->arg_size() == 4) {
4502 auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1));
4503 // Nonzero offset dbg.values get dropped without a replacement.
4504 if (!Offset || !Offset->isZeroValue())
4505 return;
4506 VarOp = 2;
4507 ExprOp = 3;
4508 }
4509 DR = DbgVariableRecord::createUnresolvedDbgVariableRecord(
4510 Type: DbgVariableRecord::LocationType::Value, Val: unwrapMAVMetadataOp(CI, Op: 0),
4511 Variable: unwrapMAVOp(CI, Op: VarOp), Expression: unwrapMAVOp(CI, Op: ExprOp), AssignID: nullptr, Address: nullptr,
4512 AddressExpression: nullptr, DI: getDebugLocSafe(I: CI));
4513 }
4514 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4515 CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator());
4516}
4517
4518/// Upgrade a call to an old intrinsic. All argument and return casting must be
4519/// provided to seamlessly integrate with existing context.
4520void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4521 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4522 // checks the callee's function type matches. It's likely we need to handle
4523 // type changes here.
4524 Function *F = dyn_cast<Function>(Val: CI->getCalledOperand());
4525 if (!F)
4526 return;
4527
4528 LLVMContext &C = CI->getContext();
4529 IRBuilder<> Builder(C);
4530 Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator());
4531
4532 if (!NewFn) {
4533 // Get the Function's name.
4534 StringRef Name = F->getName();
4535
4536 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4537 Name = Name.substr(Start: 5);
4538
4539 bool IsX86 = Name.consume_front(Prefix: "x86.");
4540 bool IsNVVM = Name.consume_front(Prefix: "nvvm.");
4541 bool IsAArch64 = Name.consume_front(Prefix: "aarch64.");
4542 bool IsARM = Name.consume_front(Prefix: "arm.");
4543 bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn.");
4544 bool IsDbg = Name.consume_front(Prefix: "dbg.");
4545 Value *Rep = nullptr;
4546
4547 if (!IsX86 && Name == "stackprotectorcheck") {
4548 Rep = nullptr;
4549 } else if (IsNVVM) {
4550 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4551 } else if (IsX86) {
4552 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4553 } else if (IsAArch64) {
4554 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4555 } else if (IsARM) {
4556 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4557 } else if (IsAMDGCN) {
4558 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4559 } else if (IsDbg) {
4560 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4561 } else {
4562 llvm_unreachable("Unknown function for CallBase upgrade.");
4563 }
4564
4565 if (Rep)
4566 CI->replaceAllUsesWith(V: Rep);
4567 CI->eraseFromParent();
4568 return;
4569 }
4570
4571 const auto &DefaultCase = [&]() -> void {
4572 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4573 // Handle generic mangling change.
4574 assert(
4575 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4576 "Unknown function for CallBase upgrade and isn't just a name change");
4577 CI->setCalledFunction(NewFn);
4578 return;
4579 }
4580
4581 // This must be an upgrade from a named to a literal struct.
4582 if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) {
4583 assert(OldST != NewFn->getReturnType() &&
4584 "Return type must have changed");
4585 assert(OldST->getNumElements() ==
4586 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4587 "Must have same number of elements");
4588
4589 SmallVector<Value *> Args(CI->args());
4590 CallInst *NewCI = Builder.CreateCall(Callee: NewFn, Args);
4591 NewCI->setAttributes(CI->getAttributes());
4592 Value *Res = PoisonValue::get(T: OldST);
4593 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4594 Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx);
4595 Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx);
4596 }
4597 CI->replaceAllUsesWith(V: Res);
4598 CI->eraseFromParent();
4599 return;
4600 }
4601
4602 // We're probably about to produce something invalid. Let the verifier catch
4603 // it instead of dying here.
4604 CI->setCalledOperand(
4605 ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType()));
4606 return;
4607 };
4608 CallInst *NewCall = nullptr;
4609 switch (NewFn->getIntrinsicID()) {
4610 default: {
4611 DefaultCase();
4612 return;
4613 }
4614 case Intrinsic::arm_neon_vst1:
4615 case Intrinsic::arm_neon_vst2:
4616 case Intrinsic::arm_neon_vst3:
4617 case Intrinsic::arm_neon_vst4:
4618 case Intrinsic::arm_neon_vst2lane:
4619 case Intrinsic::arm_neon_vst3lane:
4620 case Intrinsic::arm_neon_vst4lane: {
4621 SmallVector<Value *, 4> Args(CI->args());
4622 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4623 break;
4624 }
4625 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4626 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4627 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4628 LLVMContext &Ctx = F->getParent()->getContext();
4629 SmallVector<Value *, 4> Args(CI->args());
4630 Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx),
4631 V: cast<ConstantInt>(Val: Args[3])->getZExtValue());
4632 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4633 break;
4634 }
4635 case Intrinsic::aarch64_sve_ld3_sret:
4636 case Intrinsic::aarch64_sve_ld4_sret:
4637 case Intrinsic::aarch64_sve_ld2_sret: {
4638 StringRef Name = F->getName();
4639 Name = Name.substr(Start: 5);
4640 unsigned N = StringSwitch<unsigned>(Name)
4641 .StartsWith(S: "aarch64.sve.ld2", Value: 2)
4642 .StartsWith(S: "aarch64.sve.ld3", Value: 3)
4643 .StartsWith(S: "aarch64.sve.ld4", Value: 4)
4644 .Default(Value: 0);
4645 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
4646 unsigned MinElts = RetTy->getMinNumElements() / N;
4647 SmallVector<Value *, 2> Args(CI->args());
4648 Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args);
4649 Value *Ret = llvm::PoisonValue::get(T: RetTy);
4650 for (unsigned I = 0; I < N; I++) {
4651 Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I);
4652 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx: I * MinElts);
4653 }
4654 NewCall = dyn_cast<CallInst>(Val: Ret);
4655 break;
4656 }
4657
4658 case Intrinsic::coro_end: {
4659 SmallVector<Value *, 3> Args(CI->args());
4660 Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext()));
4661 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4662 break;
4663 }
4664
4665 case Intrinsic::vector_extract: {
4666 StringRef Name = F->getName();
4667 Name = Name.substr(Start: 5); // Strip llvm
4668 if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get")) {
4669 DefaultCase();
4670 return;
4671 }
4672 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
4673 unsigned MinElts = RetTy->getMinNumElements();
4674 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
4675 Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
4676 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx});
4677 break;
4678 }
4679
4680 case Intrinsic::vector_insert: {
4681 StringRef Name = F->getName();
4682 Name = Name.substr(Start: 5);
4683 if (!Name.starts_with(Prefix: "aarch64.sve.tuple")) {
4684 DefaultCase();
4685 return;
4686 }
4687 if (Name.starts_with(Prefix: "aarch64.sve.tuple.set")) {
4688 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
4689 auto *Ty = cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType());
4690 Value *NewIdx =
4691 ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements());
4692 NewCall = Builder.CreateCall(
4693 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx});
4694 break;
4695 }
4696 if (Name.starts_with(Prefix: "aarch64.sve.tuple.create")) {
4697 unsigned N = StringSwitch<unsigned>(Name)
4698 .StartsWith(S: "aarch64.sve.tuple.create2", Value: 2)
4699 .StartsWith(S: "aarch64.sve.tuple.create3", Value: 3)
4700 .StartsWith(S: "aarch64.sve.tuple.create4", Value: 4)
4701 .Default(Value: 0);
4702 assert(N > 1 && "Create is expected to be between 2-4");
4703 auto *RetTy = cast<ScalableVectorType>(Val: F->getReturnType());
4704 Value *Ret = llvm::PoisonValue::get(T: RetTy);
4705 unsigned MinElts = RetTy->getMinNumElements() / N;
4706 for (unsigned I = 0; I < N; I++) {
4707 Value *V = CI->getArgOperand(i: I);
4708 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx: I * MinElts);
4709 }
4710 NewCall = dyn_cast<CallInst>(Val: Ret);
4711 }
4712 break;
4713 }
4714
4715 case Intrinsic::arm_neon_bfdot:
4716 case Intrinsic::arm_neon_bfmmla:
4717 case Intrinsic::arm_neon_bfmlalb:
4718 case Intrinsic::arm_neon_bfmlalt:
4719 case Intrinsic::aarch64_neon_bfdot:
4720 case Intrinsic::aarch64_neon_bfmmla:
4721 case Intrinsic::aarch64_neon_bfmlalb:
4722 case Intrinsic::aarch64_neon_bfmlalt: {
4723 SmallVector<Value *, 3> Args;
4724 assert(CI->arg_size() == 3 &&
4725 "Mismatch between function args and call args");
4726 size_t OperandWidth =
4727 CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits();
4728 assert((OperandWidth == 64 || OperandWidth == 128) &&
4729 "Unexpected operand width");
4730 Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16);
4731 auto Iter = CI->args().begin();
4732 Args.push_back(Elt: *Iter++);
4733 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
4734 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
4735 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4736 break;
4737 }
4738
4739 case Intrinsic::bitreverse:
4740 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
4741 break;
4742
4743 case Intrinsic::ctlz:
4744 case Intrinsic::cttz:
4745 assert(CI->arg_size() == 1 &&
4746 "Mismatch between function args and call args");
4747 NewCall =
4748 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()});
4749 break;
4750
4751 case Intrinsic::objectsize: {
4752 Value *NullIsUnknownSize =
4753 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2);
4754 Value *Dynamic =
4755 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3);
4756 NewCall = Builder.CreateCall(
4757 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic});
4758 break;
4759 }
4760
4761 case Intrinsic::ctpop:
4762 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
4763 break;
4764
4765 case Intrinsic::convert_from_fp16:
4766 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
4767 break;
4768
4769 case Intrinsic::dbg_value: {
4770 StringRef Name = F->getName();
4771 Name = Name.substr(Start: 5); // Strip llvm.
4772 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4773 if (Name.starts_with(Prefix: "dbg.addr")) {
4774 DIExpression *Expr = cast<DIExpression>(
4775 Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata());
4776 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
4777 NewCall =
4778 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4779 MetadataAsValue::get(Context&: C, MD: Expr)});
4780 break;
4781 }
4782
4783 // Upgrade from the old version that had an extra offset argument.
4784 assert(CI->arg_size() == 4);
4785 // Drop nonzero offsets instead of attempting to upgrade them.
4786 if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)))
4787 if (Offset->isZeroValue()) {
4788 NewCall = Builder.CreateCall(
4789 Callee: NewFn,
4790 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)});
4791 break;
4792 }
4793 CI->eraseFromParent();
4794 return;
4795 }
4796
4797 case Intrinsic::ptr_annotation:
4798 // Upgrade from versions that lacked the annotation attribute argument.
4799 if (CI->arg_size() != 4) {
4800 DefaultCase();
4801 return;
4802 }
4803
4804 // Create a new call with an added null annotation attribute argument.
4805 NewCall = Builder.CreateCall(
4806 Callee: NewFn,
4807 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
4808 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
4809 NewCall->takeName(V: CI);
4810 CI->replaceAllUsesWith(V: NewCall);
4811 CI->eraseFromParent();
4812 return;
4813
4814 case Intrinsic::var_annotation:
4815 // Upgrade from versions that lacked the annotation attribute argument.
4816 if (CI->arg_size() != 4) {
4817 DefaultCase();
4818 return;
4819 }
4820 // Create a new call with an added null annotation attribute argument.
4821 NewCall = Builder.CreateCall(
4822 Callee: NewFn,
4823 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), CI->getArgOperand(i: 2),
4824 CI->getArgOperand(i: 3), ConstantPointerNull::get(T: Builder.getPtrTy())});
4825 NewCall->takeName(V: CI);
4826 CI->replaceAllUsesWith(V: NewCall);
4827 CI->eraseFromParent();
4828 return;
4829
4830 case Intrinsic::riscv_aes32dsi:
4831 case Intrinsic::riscv_aes32dsmi:
4832 case Intrinsic::riscv_aes32esi:
4833 case Intrinsic::riscv_aes32esmi:
4834 case Intrinsic::riscv_sm4ks:
4835 case Intrinsic::riscv_sm4ed: {
4836 // The last argument to these intrinsics used to be i8 and changed to i32.
4837 // The type overload for sm4ks and sm4ed was removed.
4838 Value *Arg2 = CI->getArgOperand(i: 2);
4839 if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64))
4840 return;
4841
4842 Value *Arg0 = CI->getArgOperand(i: 0);
4843 Value *Arg1 = CI->getArgOperand(i: 1);
4844 if (CI->getType()->isIntegerTy(Bitwidth: 64)) {
4845 Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty());
4846 Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty());
4847 }
4848
4849 Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C),
4850 V: cast<ConstantInt>(Val: Arg2)->getZExtValue());
4851
4852 NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2});
4853 Value *Res = NewCall;
4854 if (Res->getType() != CI->getType())
4855 Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
4856 NewCall->takeName(V: CI);
4857 CI->replaceAllUsesWith(V: Res);
4858 CI->eraseFromParent();
4859 return;
4860 }
4861 case Intrinsic::nvvm_mapa_shared_cluster: {
4862 // Create a new call with the correct address space.
4863 NewCall =
4864 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1)});
4865 Value *Res = NewCall;
4866 Res = Builder.CreateAddrSpaceCast(
4867 V: Res, DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED));
4868 NewCall->takeName(V: CI);
4869 CI->replaceAllUsesWith(V: Res);
4870 CI->eraseFromParent();
4871 return;
4872 }
4873 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4874 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4875 // Create a new call with the correct address space.
4876 SmallVector<Value *, 4> Args(CI->args());
4877 Args[0] = Builder.CreateAddrSpaceCast(
4878 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4879
4880 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4881 NewCall->takeName(V: CI);
4882 CI->replaceAllUsesWith(V: NewCall);
4883 CI->eraseFromParent();
4884 return;
4885 }
4886 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
4887 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
4888 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
4889 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
4890 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
4891 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
4892 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
4893 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
4894 SmallVector<Value *, 16> Args(CI->args());
4895
4896 // Create AddrSpaceCast to shared_cluster if needed.
4897 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
4898 unsigned AS = CI->getArgOperand(i: 0)->getType()->getPointerAddressSpace();
4899 if (AS == NVPTXAS::ADDRESS_SPACE_SHARED)
4900 Args[0] = Builder.CreateAddrSpaceCast(
4901 V: Args[0], DestTy: Builder.getPtrTy(AddrSpace: NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
4902
4903 // Attach the flag argument for cta_group, with a
4904 // default value of 0. This handles case (2) in
4905 // shouldUpgradeNVPTXTMAG2SIntrinsics().
4906 size_t NumArgs = CI->arg_size();
4907 Value *FlagArg = CI->getArgOperand(i: NumArgs - 3);
4908 if (!FlagArg->getType()->isIntegerTy(Bitwidth: 1))
4909 Args.push_back(Elt: ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0));
4910
4911 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4912 NewCall->takeName(V: CI);
4913 CI->replaceAllUsesWith(V: NewCall);
4914 CI->eraseFromParent();
4915 return;
4916 }
4917 case Intrinsic::riscv_sha256sig0:
4918 case Intrinsic::riscv_sha256sig1:
4919 case Intrinsic::riscv_sha256sum0:
4920 case Intrinsic::riscv_sha256sum1:
4921 case Intrinsic::riscv_sm3p0:
4922 case Intrinsic::riscv_sm3p1: {
4923 // The last argument to these intrinsics used to be i8 and changed to i32.
4924 // The type overload for sm4ks and sm4ed was removed.
4925 if (!CI->getType()->isIntegerTy(Bitwidth: 64))
4926 return;
4927
4928 Value *Arg =
4929 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty());
4930
4931 NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg);
4932 Value *Res =
4933 Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
4934 NewCall->takeName(V: CI);
4935 CI->replaceAllUsesWith(V: Res);
4936 CI->eraseFromParent();
4937 return;
4938 }
4939
4940 case Intrinsic::x86_xop_vfrcz_ss:
4941 case Intrinsic::x86_xop_vfrcz_sd:
4942 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)});
4943 break;
4944
4945 case Intrinsic::x86_xop_vpermil2pd:
4946 case Intrinsic::x86_xop_vpermil2ps:
4947 case Intrinsic::x86_xop_vpermil2pd_256:
4948 case Intrinsic::x86_xop_vpermil2ps_256: {
4949 SmallVector<Value *, 4> Args(CI->args());
4950 VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType());
4951 VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy);
4952 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy);
4953 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4954 break;
4955 }
4956
4957 case Intrinsic::x86_sse41_ptestc:
4958 case Intrinsic::x86_sse41_ptestz:
4959 case Intrinsic::x86_sse41_ptestnzc: {
4960 // The arguments for these intrinsics used to be v4f32, and changed
4961 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4962 // So, the only thing required is a bitcast for both arguments.
4963 // First, check the arguments have the old type.
4964 Value *Arg0 = CI->getArgOperand(i: 0);
4965 if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4))
4966 return;
4967
4968 // Old intrinsic, add bitcasts
4969 Value *Arg1 = CI->getArgOperand(i: 1);
4970
4971 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
4972
4973 Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast");
4974 Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
4975
4976 NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1});
4977 break;
4978 }
4979
4980 case Intrinsic::x86_rdtscp: {
4981 // This used to take 1 arguments. If we have no arguments, it is already
4982 // upgraded.
4983 if (CI->getNumOperands() == 0)
4984 return;
4985
4986 NewCall = Builder.CreateCall(Callee: NewFn);
4987 // Extract the second result and store it.
4988 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4989 Builder.CreateAlignedStore(Val: Data, Ptr: CI->getArgOperand(i: 0), Align: Align(1));
4990 // Replace the original call result with the first result of the new call.
4991 Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4992
4993 NewCall->takeName(V: CI);
4994 CI->replaceAllUsesWith(V: TSC);
4995 CI->eraseFromParent();
4996 return;
4997 }
4998
4999 case Intrinsic::x86_sse41_insertps:
5000 case Intrinsic::x86_sse41_dppd:
5001 case Intrinsic::x86_sse41_dpps:
5002 case Intrinsic::x86_sse41_mpsadbw:
5003 case Intrinsic::x86_avx_dp_ps_256:
5004 case Intrinsic::x86_avx2_mpsadbw: {
5005 // Need to truncate the last argument from i32 to i8 -- this argument models
5006 // an inherently 8-bit immediate operand to these x86 instructions.
5007 SmallVector<Value *, 4> Args(CI->args());
5008
5009 // Replace the last argument with a trunc.
5010 Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc");
5011 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5012 break;
5013 }
5014
5015 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5016 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5017 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5018 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5019 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5020 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5021 SmallVector<Value *, 4> Args(CI->args());
5022 unsigned NumElts =
5023 cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements();
5024 Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts);
5025
5026 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5027 Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr);
5028
5029 NewCall->takeName(V: CI);
5030 CI->replaceAllUsesWith(V: Res);
5031 CI->eraseFromParent();
5032 return;
5033 }
5034
5035 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5036 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5037 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5038 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5039 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5040 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5041 SmallVector<Value *, 4> Args(CI->args());
5042 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
5043 if (NewFn->getIntrinsicID() ==
5044 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5045 Args[1] = Builder.CreateBitCast(
5046 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5047
5048 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5049 Value *Res = Builder.CreateBitCast(
5050 V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts));
5051
5052 NewCall->takeName(V: CI);
5053 CI->replaceAllUsesWith(V: Res);
5054 CI->eraseFromParent();
5055 return;
5056 }
5057 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5058 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5059 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5060 SmallVector<Value *, 4> Args(CI->args());
5061 unsigned NumElts =
5062 cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2;
5063 Args[1] = Builder.CreateBitCast(
5064 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5065 Args[2] = Builder.CreateBitCast(
5066 V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
5067
5068 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5069 break;
5070 }
5071
5072 case Intrinsic::thread_pointer: {
5073 NewCall = Builder.CreateCall(Callee: NewFn, Args: {});
5074 break;
5075 }
5076
5077 case Intrinsic::memcpy:
5078 case Intrinsic::memmove:
5079 case Intrinsic::memset: {
5080 // We have to make sure that the call signature is what we're expecting.
5081 // We only want to change the old signatures by removing the alignment arg:
5082 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5083 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5084 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5085 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5086 // Note: i8*'s in the above can be any pointer type
5087 if (CI->arg_size() != 5) {
5088 DefaultCase();
5089 return;
5090 }
5091 // Remove alignment argument (3), and add alignment attributes to the
5092 // dest/src pointers.
5093 Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
5094 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
5095 NewCall = Builder.CreateCall(Callee: NewFn, Args);
5096 AttributeList OldAttrs = CI->getAttributes();
5097 AttributeList NewAttrs = AttributeList::get(
5098 C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(),
5099 ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1),
5100 OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)});
5101 NewCall->setAttributes(NewAttrs);
5102 auto *MemCI = cast<MemIntrinsic>(Val: NewCall);
5103 // All mem intrinsics support dest alignment.
5104 const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
5105 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5106 // Memcpy/Memmove also support source alignment.
5107 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI))
5108 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5109 break;
5110 }
5111 }
5112 assert(NewCall && "Should have either set this variable or returned through "
5113 "the default case");
5114 NewCall->takeName(V: CI);
5115 CI->replaceAllUsesWith(V: NewCall);
5116 CI->eraseFromParent();
5117}
5118
5119void llvm::UpgradeCallsToIntrinsic(Function *F) {
5120 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5121
5122 // Check if this function should be upgraded and get the replacement function
5123 // if there is one.
5124 Function *NewFn;
5125 if (UpgradeIntrinsicFunction(F, NewFn)) {
5126 // Replace all users of the old function with the new function or new
5127 // instructions. This is not a range loop because the call is deleted.
5128 for (User *U : make_early_inc_range(Range: F->users()))
5129 if (CallBase *CB = dyn_cast<CallBase>(Val: U))
5130 UpgradeIntrinsicCall(CI: CB, NewFn);
5131
5132 // Remove old function, no longer used, from the module.
5133 F->eraseFromParent();
5134 }
5135}
5136
5137MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
5138 const unsigned NumOperands = MD.getNumOperands();
5139 if (NumOperands == 0)
5140 return &MD; // Invalid, punt to a verifier error.
5141
5142 // Check if the tag uses struct-path aware TBAA format.
5143 if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3)
5144 return &MD;
5145
5146 auto &Context = MD.getContext();
5147 if (NumOperands == 3) {
5148 Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)};
5149 MDNode *ScalarType = MDNode::get(Context, MDs: Elts);
5150 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5151 Metadata *Elts2[] = {ScalarType, ScalarType,
5152 ConstantAsMetadata::get(
5153 C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))),
5154 MD.getOperand(I: 2)};
5155 return MDNode::get(Context, MDs: Elts2);
5156 }
5157 // Create a MDNode <MD, MD, offset 0>
5158 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue(
5159 Ty: Type::getInt64Ty(C&: Context)))};
5160 return MDNode::get(Context, MDs: Elts);
5161}
5162
5163Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
5164 Instruction *&Temp) {
5165 if (Opc != Instruction::BitCast)
5166 return nullptr;
5167
5168 Temp = nullptr;
5169 Type *SrcTy = V->getType();
5170 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5171 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5172 LLVMContext &Context = V->getContext();
5173
5174 // We have no information about target data layout, so we assume that
5175 // the maximum pointer size is 64bit.
5176 Type *MidTy = Type::getInt64Ty(C&: Context);
5177 Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy);
5178
5179 return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy);
5180 }
5181
5182 return nullptr;
5183}
5184
5185Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
5186 if (Opc != Instruction::BitCast)
5187 return nullptr;
5188
5189 Type *SrcTy = C->getType();
5190 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5191 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5192 LLVMContext &Context = C->getContext();
5193
5194 // We have no information about target data layout, so we assume that
5195 // the maximum pointer size is 64bit.
5196 Type *MidTy = Type::getInt64Ty(C&: Context);
5197
5198 return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy),
5199 Ty: DestTy);
5200 }
5201
5202 return nullptr;
5203}
5204
5205/// Check the debug info version number, if it is out-dated, drop the debug
5206/// info. Return true if module is modified.
5207bool llvm::UpgradeDebugInfo(Module &M) {
5208 if (DisableAutoUpgradeDebugInfo)
5209 return false;
5210
5211 // We need to get metadata before the module is verified (i.e., getModuleFlag
5212 // makes assumptions that we haven't verified yet). Carefully extract the flag
5213 // from the metadata.
5214 unsigned Version = 0;
5215 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5216 auto OpIt = find_if(Range: ModFlags->operands(), P: [](const MDNode *Flag) {
5217 if (Flag->getNumOperands() < 3)
5218 return false;
5219 if (MDString *K = dyn_cast_or_null<MDString>(Val: Flag->getOperand(I: 1)))
5220 return K->getString() == "Debug Info Version";
5221 return false;
5222 });
5223 if (OpIt != ModFlags->op_end()) {
5224 const MDOperand &ValOp = (*OpIt)->getOperand(I: 2);
5225 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD: ValOp))
5226 Version = CI->getZExtValue();
5227 }
5228 }
5229
5230 if (Version == DEBUG_METADATA_VERSION) {
5231 bool BrokenDebugInfo = false;
5232 if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo))
5233 report_fatal_error(reason: "Broken module found, compilation aborted!");
5234 if (!BrokenDebugInfo)
5235 // Everything is ok.
5236 return false;
5237 else {
5238 // Diagnose malformed debug info.
5239 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5240 M.getContext().diagnose(DI: Diag);
5241 }
5242 }
5243 bool Modified = StripDebugInfo(M);
5244 if (Modified && Version != DEBUG_METADATA_VERSION) {
5245 // Diagnose a version mismatch.
5246 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5247 M.getContext().diagnose(DI: DiagVersion);
5248 }
5249 return Modified;
5250}
5251
5252static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5253 GlobalValue *GV, const Metadata *V) {
5254 Function *F = cast<Function>(Val: GV);
5255
5256 constexpr StringLiteral DefaultValue = "1";
5257 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5258 unsigned Length = 0;
5259
5260 if (F->hasFnAttribute(Kind: Attr)) {
5261 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5262 // parse these elements placing them into Vect3
5263 StringRef S = F->getFnAttribute(Kind: Attr).getValueAsString();
5264 for (; Length < 3 && !S.empty(); Length++) {
5265 auto [Part, Rest] = S.split(Separator: ',');
5266 Vect3[Length] = Part.trim();
5267 S = Rest;
5268 }
5269 }
5270
5271 const unsigned Dim = DimC - 'x';
5272 assert(Dim < 3 && "Unexpected dim char");
5273
5274 const uint64_t VInt = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5275
5276 // local variable required for StringRef in Vect3 to point to.
5277 const std::string VStr = llvm::utostr(X: VInt);
5278 Vect3[Dim] = VStr;
5279 Length = std::max(a: Length, b: Dim + 1);
5280
5281 const std::string NewAttr = llvm::join(R: ArrayRef(Vect3, Length), Separator: ",");
5282 F->addFnAttr(Kind: Attr, Val: NewAttr);
5283}
5284
5285static inline bool isXYZ(StringRef S) {
5286 return S == "x" || S == "y" || S == "z";
5287}
5288
5289bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K,
5290 const Metadata *V) {
5291 if (K == "kernel") {
5292 if (!mdconst::extract<ConstantInt>(MD&: V)->isZero())
5293 cast<Function>(Val: GV)->setCallingConv(CallingConv::PTX_Kernel);
5294 return true;
5295 }
5296 if (K == "align") {
5297 // V is a bitfeild specifying two 16-bit values. The alignment value is
5298 // specfied in low 16-bits, The index is specified in the high bits. For the
5299 // index, 0 indicates the return value while higher values correspond to
5300 // each parameter (idx = param + 1).
5301 const uint64_t AlignIdxValuePair =
5302 mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5303 const unsigned Idx = (AlignIdxValuePair >> 16);
5304 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5305 cast<Function>(Val: GV)->addAttributeAtIndex(
5306 i: Idx, Attr: Attribute::getWithStackAlignment(Context&: GV->getContext(), Alignment: StackAlign));
5307 return true;
5308 }
5309 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5310 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5311 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxclusterrank", Val: llvm::utostr(X: CV));
5312 return true;
5313 }
5314 if (K == "minctasm") {
5315 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5316 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: CV));
5317 return true;
5318 }
5319 if (K == "maxnreg") {
5320 const auto CV = mdconst::extract<ConstantInt>(MD&: V)->getZExtValue();
5321 cast<Function>(Val: GV)->addFnAttr(Kind: "nvvm.maxnreg", Val: llvm::utostr(X: CV));
5322 return true;
5323 }
5324 if (K.consume_front(Prefix: "maxntid") && isXYZ(S: K)) {
5325 upgradeNVVMFnVectorAttr(Attr: "nvvm.maxntid", DimC: K[0], GV, V);
5326 return true;
5327 }
5328 if (K.consume_front(Prefix: "reqntid") && isXYZ(S: K)) {
5329 upgradeNVVMFnVectorAttr(Attr: "nvvm.reqntid", DimC: K[0], GV, V);
5330 return true;
5331 }
5332 if (K.consume_front(Prefix: "cluster_dim_") && isXYZ(S: K)) {
5333 upgradeNVVMFnVectorAttr(Attr: "nvvm.cluster_dim", DimC: K[0], GV, V);
5334 return true;
5335 }
5336
5337 return false;
5338}
5339
5340void llvm::UpgradeNVVMAnnotations(Module &M) {
5341 NamedMDNode *NamedMD = M.getNamedMetadata(Name: "nvvm.annotations");
5342 if (!NamedMD)
5343 return;
5344
5345 SmallVector<MDNode *, 8> NewNodes;
5346 SmallSet<const MDNode *, 8> SeenNodes;
5347 for (MDNode *MD : NamedMD->operands()) {
5348 if (!SeenNodes.insert(Ptr: MD).second)
5349 continue;
5350
5351 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD: MD->getOperand(I: 0));
5352 if (!GV)
5353 continue;
5354
5355 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5356
5357 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(I: 0)};
5358 // Each nvvm.annotations metadata entry will be of the following form:
5359 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5360 // start index = 1, to skip the global variable key
5361 // increment = 2, to skip the value for each property-value pairs
5362 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5363 MDString *K = cast<MDString>(Val: MD->getOperand(I: j));
5364 const MDOperand &V = MD->getOperand(I: j + 1);
5365 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K: K->getString(), V);
5366 if (!Upgraded)
5367 NewOperands.append(IL: {K, V});
5368 }
5369
5370 if (NewOperands.size() > 1)
5371 NewNodes.push_back(Elt: MDNode::get(Context&: M.getContext(), MDs: NewOperands));
5372 }
5373
5374 NamedMD->clearOperands();
5375 for (MDNode *N : NewNodes)
5376 NamedMD->addOperand(M: N);
5377}
5378
5379/// This checks for objc retain release marker which should be upgraded. It
5380/// returns true if module is modified.
5381static bool upgradeRetainReleaseMarker(Module &M) {
5382 bool Changed = false;
5383 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5384 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey);
5385 if (ModRetainReleaseMarker) {
5386 MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0);
5387 if (Op) {
5388 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0));
5389 if (ID) {
5390 SmallVector<StringRef, 4> ValueComp;
5391 ID->getString().split(A&: ValueComp, Separator: "#");
5392 if (ValueComp.size() == 2) {
5393 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5394 ID = MDString::get(Context&: M.getContext(), Str: NewValue);
5395 }
5396 M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID);
5397 M.eraseNamedMetadata(NMD: ModRetainReleaseMarker);
5398 Changed = true;
5399 }
5400 }
5401 }
5402 return Changed;
5403}
5404
5405void llvm::UpgradeARCRuntime(Module &M) {
5406 // This lambda converts normal function calls to ARC runtime functions to
5407 // intrinsic calls.
5408 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5409 llvm::Intrinsic::ID IntrinsicFunc) {
5410 Function *Fn = M.getFunction(Name: OldFunc);
5411
5412 if (!Fn)
5413 return;
5414
5415 Function *NewFn =
5416 llvm::Intrinsic::getOrInsertDeclaration(M: &M, id: IntrinsicFunc);
5417
5418 for (User *U : make_early_inc_range(Range: Fn->users())) {
5419 CallInst *CI = dyn_cast<CallInst>(Val: U);
5420 if (!CI || CI->getCalledFunction() != Fn)
5421 continue;
5422
5423 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5424 FunctionType *NewFuncTy = NewFn->getFunctionType();
5425 SmallVector<Value *, 2> Args;
5426
5427 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5428 // value to the return type of the old function.
5429 if (NewFuncTy->getReturnType() != CI->getType() &&
5430 !CastInst::castIsValid(op: Instruction::BitCast, S: CI,
5431 DstTy: NewFuncTy->getReturnType()))
5432 continue;
5433
5434 bool InvalidCast = false;
5435
5436 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5437 Value *Arg = CI->getArgOperand(i: I);
5438
5439 // Bitcast argument to the parameter type of the new function if it's
5440 // not a variadic argument.
5441 if (I < NewFuncTy->getNumParams()) {
5442 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5443 // to the parameter type of the new function.
5444 if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg,
5445 DstTy: NewFuncTy->getParamType(i: I))) {
5446 InvalidCast = true;
5447 break;
5448 }
5449 Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I));
5450 }
5451 Args.push_back(Elt: Arg);
5452 }
5453
5454 if (InvalidCast)
5455 continue;
5456
5457 // Create a call instruction that calls the new function.
5458 CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args);
5459 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
5460 NewCall->takeName(V: CI);
5461
5462 // Bitcast the return value back to the type of the old call.
5463 Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType());
5464
5465 if (!CI->use_empty())
5466 CI->replaceAllUsesWith(V: NewRetVal);
5467 CI->eraseFromParent();
5468 }
5469
5470 if (Fn->use_empty())
5471 Fn->eraseFromParent();
5472 };
5473
5474 // Unconditionally convert a call to "clang.arc.use" to a call to
5475 // "llvm.objc.clang.arc.use".
5476 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5477
5478 // Upgrade the retain release marker. If there is no need to upgrade
5479 // the marker, that means either the module is already new enough to contain
5480 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5481 if (!upgradeRetainReleaseMarker(M))
5482 return;
5483
5484 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5485 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5486 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5487 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5488 {"objc_autoreleaseReturnValue",
5489 llvm::Intrinsic::objc_autoreleaseReturnValue},
5490 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5491 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5492 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5493 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5494 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5495 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5496 {"objc_release", llvm::Intrinsic::objc_release},
5497 {"objc_retain", llvm::Intrinsic::objc_retain},
5498 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5499 {"objc_retainAutoreleaseReturnValue",
5500 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5501 {"objc_retainAutoreleasedReturnValue",
5502 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5503 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5504 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5505 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5506 {"objc_unsafeClaimAutoreleasedReturnValue",
5507 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5508 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5509 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5510 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5511 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5512 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5513 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5514 {"objc_arc_annotation_topdown_bbstart",
5515 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5516 {"objc_arc_annotation_topdown_bbend",
5517 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5518 {"objc_arc_annotation_bottomup_bbstart",
5519 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5520 {"objc_arc_annotation_bottomup_bbend",
5521 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5522
5523 for (auto &I : RuntimeFuncs)
5524 UpgradeToIntrinsic(I.first, I.second);
5525}
5526
5527bool llvm::UpgradeModuleFlags(Module &M) {
5528 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5529 if (!ModFlags)
5530 return false;
5531
5532 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5533 bool HasSwiftVersionFlag = false;
5534 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5535 uint32_t SwiftABIVersion;
5536 auto Int8Ty = Type::getInt8Ty(C&: M.getContext());
5537 auto Int32Ty = Type::getInt32Ty(C&: M.getContext());
5538
5539 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5540 MDNode *Op = ModFlags->getOperand(i: I);
5541 if (Op->getNumOperands() != 3)
5542 continue;
5543 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
5544 if (!ID)
5545 continue;
5546 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5547 Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get(
5548 Ty: Type::getInt32Ty(C&: M.getContext()), V: B)),
5549 MDString::get(Context&: M.getContext(), Str: ID->getString()),
5550 Op->getOperand(I: 2)};
5551 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5552 Changed = true;
5553 };
5554
5555 if (ID->getString() == "Objective-C Image Info Version")
5556 HasObjCFlag = true;
5557 if (ID->getString() == "Objective-C Class Properties")
5558 HasClassProperties = true;
5559 // Upgrade PIC from Error/Max to Min.
5560 if (ID->getString() == "PIC Level") {
5561 if (auto *Behavior =
5562 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
5563 uint64_t V = Behavior->getLimitedValue();
5564 if (V == Module::Error || V == Module::Max)
5565 SetBehavior(Module::Min);
5566 }
5567 }
5568 // Upgrade "PIE Level" from Error to Max.
5569 if (ID->getString() == "PIE Level")
5570 if (auto *Behavior =
5571 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0)))
5572 if (Behavior->getLimitedValue() == Module::Error)
5573 SetBehavior(Module::Max);
5574
5575 // Upgrade branch protection and return address signing module flags. The
5576 // module flag behavior for these fields were Error and now they are Min.
5577 if (ID->getString() == "branch-target-enforcement" ||
5578 ID->getString().starts_with(Prefix: "sign-return-address")) {
5579 if (auto *Behavior =
5580 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
5581 if (Behavior->getLimitedValue() == Module::Error) {
5582 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
5583 Metadata *Ops[3] = {
5584 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)),
5585 Op->getOperand(I: 1), Op->getOperand(I: 2)};
5586 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5587 Changed = true;
5588 }
5589 }
5590 }
5591
5592 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5593 // section name so that llvm-lto will not complain about mismatching
5594 // module flags that is functionally the same.
5595 if (ID->getString() == "Objective-C Image Info Section") {
5596 if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) {
5597 SmallVector<StringRef, 4> ValueComp;
5598 Value->getString().split(A&: ValueComp, Separator: " ");
5599 if (ValueComp.size() != 1) {
5600 std::string NewValue;
5601 for (auto &S : ValueComp)
5602 NewValue += S.str();
5603 Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1),
5604 MDString::get(Context&: M.getContext(), Str: NewValue)};
5605 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5606 Changed = true;
5607 }
5608 }
5609 }
5610
5611 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5612 // If the higher bits are set, it adds new module flag for swift info.
5613 if (ID->getString() == "Objective-C Garbage Collection") {
5614 auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2));
5615 if (Md) {
5616 assert(Md->getValue() && "Expected non-empty metadata");
5617 auto Type = Md->getValue()->getType();
5618 if (Type == Int8Ty)
5619 continue;
5620 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5621 if ((Val & 0xff) != Val) {
5622 HasSwiftVersionFlag = true;
5623 SwiftABIVersion = (Val & 0xff00) >> 8;
5624 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5625 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5626 }
5627 Metadata *Ops[3] = {
5628 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)),
5629 Op->getOperand(I: 1),
5630 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))};
5631 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5632 Changed = true;
5633 }
5634 }
5635
5636 if (ID->getString() == "amdgpu_code_object_version") {
5637 Metadata *Ops[3] = {
5638 Op->getOperand(I: 0),
5639 MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version"),
5640 Op->getOperand(I: 2)};
5641 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5642 Changed = true;
5643 }
5644 }
5645
5646 // "Objective-C Class Properties" is recently added for Objective-C. We
5647 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5648 // flag of value 0, so we can correclty downgrade this flag when trying to
5649 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5650 // this module flag.
5651 if (HasObjCFlag && !HasClassProperties) {
5652 M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties",
5653 Val: (uint32_t)0);
5654 Changed = true;
5655 }
5656
5657 if (HasSwiftVersionFlag) {
5658 M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version",
5659 Val: SwiftABIVersion);
5660 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version",
5661 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion));
5662 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version",
5663 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion));
5664 Changed = true;
5665 }
5666
5667 return Changed;
5668}
5669
5670void llvm::UpgradeSectionAttributes(Module &M) {
5671 auto TrimSpaces = [](StringRef Section) -> std::string {
5672 SmallVector<StringRef, 5> Components;
5673 Section.split(A&: Components, Separator: ',');
5674
5675 SmallString<32> Buffer;
5676 raw_svector_ostream OS(Buffer);
5677
5678 for (auto Component : Components)
5679 OS << ',' << Component.trim();
5680
5681 return std::string(OS.str().substr(Start: 1));
5682 };
5683
5684 for (auto &GV : M.globals()) {
5685 if (!GV.hasSection())
5686 continue;
5687
5688 StringRef Section = GV.getSection();
5689
5690 if (!Section.starts_with(Prefix: "__DATA, __objc_catlist"))
5691 continue;
5692
5693 // __DATA, __objc_catlist, regular, no_dead_strip
5694 // __DATA,__objc_catlist,regular,no_dead_strip
5695 GV.setSection(TrimSpaces(Section));
5696 }
5697}
5698
5699namespace {
5700// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5701// callsites within a function that did not also have the strictfp attribute.
5702// Since 10.0, if strict FP semantics are needed within a function, the
5703// function must have the strictfp attribute and all calls within the function
5704// must also have the strictfp attribute. This latter restriction is
5705// necessary to prevent unwanted libcall simplification when a function is
5706// being cloned (such as for inlining).
5707//
5708// The "dangling" strictfp attribute usage was only used to prevent constant
5709// folding and other libcall simplification. The nobuiltin attribute on the
5710// callsite has the same effect.
5711struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5712 StrictFPUpgradeVisitor() = default;
5713
5714 void visitCallBase(CallBase &Call) {
5715 if (!Call.isStrictFP())
5716 return;
5717 if (isa<ConstrainedFPIntrinsic>(Val: &Call))
5718 return;
5719 // If we get here, the caller doesn't have the strictfp attribute
5720 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5721 Call.removeFnAttr(Kind: Attribute::StrictFP);
5722 Call.addFnAttr(Kind: Attribute::NoBuiltin);
5723 }
5724};
5725
5726/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5727struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5728 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5729 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5730
5731 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5732 if (!RMW.isFloatingPointOperation())
5733 return;
5734
5735 MDNode *Empty = MDNode::get(Context&: RMW.getContext(), MDs: {});
5736 RMW.setMetadata(Kind: "amdgpu.no.fine.grained.host.memory", Node: Empty);
5737 RMW.setMetadata(Kind: "amdgpu.no.remote.memory.access", Node: Empty);
5738 RMW.setMetadata(Kind: "amdgpu.ignore.denormal.mode", Node: Empty);
5739 }
5740};
5741} // namespace
5742
5743void llvm::UpgradeFunctionAttributes(Function &F) {
5744 // If a function definition doesn't have the strictfp attribute,
5745 // convert any callsite strictfp attributes to nobuiltin.
5746 if (!F.isDeclaration() && !F.hasFnAttribute(Kind: Attribute::StrictFP)) {
5747 StrictFPUpgradeVisitor SFPV;
5748 SFPV.visit(F);
5749 }
5750
5751 // Remove all incompatibile attributes from function.
5752 F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(
5753 Ty: F.getReturnType(), AS: F.getAttributes().getRetAttrs()));
5754 for (auto &Arg : F.args())
5755 Arg.removeAttrs(
5756 AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType(), AS: Arg.getAttributes()));
5757
5758 // Older versions of LLVM treated an "implicit-section-name" attribute
5759 // similarly to directly setting the section on a Function.
5760 if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name");
5761 A.isValid() && A.isStringAttribute()) {
5762 F.setSection(A.getValueAsString());
5763 F.removeFnAttr(Kind: "implicit-section-name");
5764 }
5765
5766 if (!F.empty()) {
5767 // For some reason this is called twice, and the first time is before any
5768 // instructions are loaded into the body.
5769
5770 if (Attribute A = F.getFnAttribute(Kind: "amdgpu-unsafe-fp-atomics");
5771 A.isValid()) {
5772
5773 if (A.getValueAsBool()) {
5774 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5775 Visitor.visit(F);
5776 }
5777
5778 // We will leave behind dead attribute uses on external declarations, but
5779 // clang never added these to declarations anyway.
5780 F.removeFnAttr(Kind: "amdgpu-unsafe-fp-atomics");
5781 }
5782 }
5783}
5784
5785static bool isOldLoopArgument(Metadata *MD) {
5786 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
5787 if (!T)
5788 return false;
5789 if (T->getNumOperands() < 1)
5790 return false;
5791 auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
5792 if (!S)
5793 return false;
5794 return S->getString().starts_with(Prefix: "llvm.vectorizer.");
5795}
5796
5797static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5798 StringRef OldPrefix = "llvm.vectorizer.";
5799 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5800
5801 if (OldTag == "llvm.vectorizer.unroll")
5802 return MDString::get(Context&: C, Str: "llvm.loop.interleave.count");
5803
5804 return MDString::get(
5805 Context&: C, Str: (Twine("llvm.loop.vectorize.") + OldTag.drop_front(N: OldPrefix.size()))
5806 .str());
5807}
5808
5809static Metadata *upgradeLoopArgument(Metadata *MD) {
5810 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
5811 if (!T)
5812 return MD;
5813 if (T->getNumOperands() < 1)
5814 return MD;
5815 auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
5816 if (!OldTag)
5817 return MD;
5818 if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer."))
5819 return MD;
5820
5821 // This has an old tag. Upgrade it.
5822 SmallVector<Metadata *, 8> Ops;
5823 Ops.reserve(N: T->getNumOperands());
5824 Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString()));
5825 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5826 Ops.push_back(Elt: T->getOperand(I));
5827
5828 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
5829}
5830
5831MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5832 auto *T = dyn_cast<MDTuple>(Val: &N);
5833 if (!T)
5834 return &N;
5835
5836 if (none_of(Range: T->operands(), P: isOldLoopArgument))
5837 return &N;
5838
5839 SmallVector<Metadata *, 8> Ops;
5840 Ops.reserve(N: T->getNumOperands());
5841 for (Metadata *MD : T->operands())
5842 Ops.push_back(Elt: upgradeLoopArgument(MD));
5843
5844 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
5845}
5846
5847std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5848 Triple T(TT);
5849 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5850 // the address space of globals to 1. This does not apply to SPIRV Logical.
5851 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5852 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5853 !DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G")) {
5854 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5855 }
5856
5857 if (T.isLoongArch64() || T.isRISCV64()) {
5858 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5859 auto I = DL.find(Str: "-n64-");
5860 if (I != StringRef::npos)
5861 return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str();
5862 return DL.str();
5863 }
5864
5865 std::string Res = DL.str();
5866 // AMDGCN data layout upgrades.
5867 if (T.isAMDGCN()) {
5868 // Define address spaces for constants.
5869 if (!DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G"))
5870 Res.append(s: Res.empty() ? "G1" : "-G1");
5871
5872 // Add missing non-integral declarations.
5873 // This goes before adding new address spaces to prevent incoherent string
5874 // values.
5875 if (!DL.contains(Other: "-ni") && !DL.starts_with(Prefix: "ni"))
5876 Res.append(s: "-ni:7:8:9");
5877 // Update ni:7 to ni:7:8:9.
5878 if (DL.ends_with(Suffix: "ni:7"))
5879 Res.append(s: ":8:9");
5880 if (DL.ends_with(Suffix: "ni:7:8"))
5881 Res.append(s: ":9");
5882
5883 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5884 // resources) An empty data layout has already been upgraded to G1 by now.
5885 if (!DL.contains(Other: "-p7") && !DL.starts_with(Prefix: "p7"))
5886 Res.append(s: "-p7:160:256:256:32");
5887 if (!DL.contains(Other: "-p8") && !DL.starts_with(Prefix: "p8"))
5888 Res.append(s: "-p8:128:128:128:48");
5889 constexpr StringRef OldP8("-p8:128:128-");
5890 if (DL.contains(Other: OldP8))
5891 Res.replace(pos: Res.find(svt: OldP8), n1: OldP8.size(), s: "-p8:128:128:128:48-");
5892 if (!DL.contains(Other: "-p9") && !DL.starts_with(Prefix: "p9"))
5893 Res.append(s: "-p9:192:256:256:32");
5894
5895 return Res;
5896 }
5897
5898 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5899 // If the datalayout matches the expected format, add pointer size address
5900 // spaces to the datalayout.
5901 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5902 if (!DL.contains(Other: AddrSpaces)) {
5903 SmallVector<StringRef, 4> Groups;
5904 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5905 if (R.match(String: Res, Matches: &Groups))
5906 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5907 }
5908 };
5909
5910 // AArch64 data layout upgrades.
5911 if (T.isAArch64()) {
5912 // Add "-Fn32"
5913 if (!DL.empty() && !DL.contains(Other: "-Fn32"))
5914 Res.append(s: "-Fn32");
5915 AddPtr32Ptr64AddrSpaces();
5916 return Res;
5917 }
5918
5919 if (T.isSPARC() || (T.isMIPS64() && !DL.contains(Other: "m:m")) || T.isPPC64() ||
5920 T.isWasm()) {
5921 // Mips64 with o32 ABI did not add "-i128:128".
5922 // Add "-i128:128"
5923 std::string I64 = "-i64:64";
5924 std::string I128 = "-i128:128";
5925 if (!StringRef(Res).contains(Other: I128)) {
5926 size_t Pos = Res.find(str: I64);
5927 if (Pos != size_t(-1))
5928 Res.insert(pos1: Pos + I64.size(), str: I128);
5929 }
5930 return Res;
5931 }
5932
5933 if (!T.isX86())
5934 return Res;
5935
5936 AddPtr32Ptr64AddrSpaces();
5937
5938 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5939 // for i128 operations prior to this being reflected in the data layout, and
5940 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5941 // boundaries, so although this is a breaking change, the upgrade is expected
5942 // to fix more IR than it breaks.
5943 // Intel MCU is an exception and uses 4-byte-alignment.
5944 if (!T.isOSIAMCU()) {
5945 std::string I128 = "-i128:128";
5946 if (StringRef Ref = Res; !Ref.contains(Other: I128)) {
5947 SmallVector<StringRef, 4> Groups;
5948 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5949 if (R.match(String: Res, Matches: &Groups))
5950 Res = (Groups[1] + I128 + Groups[3]).str();
5951 }
5952 }
5953
5954 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5955 // Raising the alignment is safe because Clang did not produce f80 values in
5956 // the MSVC environment before this upgrade was added.
5957 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5958 StringRef Ref = Res;
5959 auto I = Ref.find(Str: "-f80:32-");
5960 if (I != StringRef::npos)
5961 Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str();
5962 }
5963
5964 return Res;
5965}
5966
5967void llvm::UpgradeAttributes(AttrBuilder &B) {
5968 StringRef FramePointer;
5969 Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim");
5970 if (A.isValid()) {
5971 // The value can be "true" or "false".
5972 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5973 B.removeAttribute(A: "no-frame-pointer-elim");
5974 }
5975 if (B.contains(A: "no-frame-pointer-elim-non-leaf")) {
5976 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5977 if (FramePointer != "all")
5978 FramePointer = "non-leaf";
5979 B.removeAttribute(A: "no-frame-pointer-elim-non-leaf");
5980 }
5981 if (!FramePointer.empty())
5982 B.addAttribute(A: "frame-pointer", V: FramePointer);
5983
5984 A = B.getAttribute(Kind: "null-pointer-is-valid");
5985 if (A.isValid()) {
5986 // The value can be "true" or "false".
5987 bool NullPointerIsValid = A.getValueAsString() == "true";
5988 B.removeAttribute(A: "null-pointer-is-valid");
5989 if (NullPointerIsValid)
5990 B.addAttribute(Val: Attribute::NullPointerIsValid);
5991 }
5992}
5993
5994void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5995 // clang.arc.attachedcall bundles are now required to have an operand.
5996 // If they don't, it's okay to drop them entirely: when there is an operand,
5997 // the "attachedcall" is meaningful and required, but without an operand,
5998 // it's just a marker NOP. Dropping it merely prevents an optimization.
5999 erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) {
6000 return OBD.getTag() == "clang.arc.attachedcall" &&
6001 OBD.inputs().empty();
6002 });
6003}
6004