1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
17#include "clang/Basic/AArch64CodeGenUtils.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "llvm/IR/InlineAsm.h"
20#include "llvm/IR/IntrinsicsAArch64.h"
21#include "llvm/IR/IntrinsicsARM.h"
22#include "llvm/IR/IntrinsicsBPF.h"
23#include "llvm/TargetParser/AArch64TargetParser.h"
24
25#include <numeric>
26
27using namespace clang;
28using namespace CodeGen;
29using namespace llvm;
30using namespace clang::aarch64;
31
32static std::optional<CodeGenFunction::MSVCIntrin>
33translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
34 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
35 switch (BuiltinID) {
36 default:
37 return std::nullopt;
38 case clang::AArch64::BI_BitScanForward:
39 case clang::AArch64::BI_BitScanForward64:
40 return MSVCIntrin::_BitScanForward;
41 case clang::AArch64::BI_BitScanReverse:
42 case clang::AArch64::BI_BitScanReverse64:
43 return MSVCIntrin::_BitScanReverse;
44 case clang::AArch64::BI_InterlockedAnd64:
45 return MSVCIntrin::_InterlockedAnd;
46 case clang::AArch64::BI_InterlockedExchange64:
47 return MSVCIntrin::_InterlockedExchange;
48 case clang::AArch64::BI_InterlockedExchangeAdd64:
49 return MSVCIntrin::_InterlockedExchangeAdd;
50 case clang::AArch64::BI_InterlockedExchangeSub64:
51 return MSVCIntrin::_InterlockedExchangeSub;
52 case clang::AArch64::BI_InterlockedOr64:
53 return MSVCIntrin::_InterlockedOr;
54 case clang::AArch64::BI_InterlockedXor64:
55 return MSVCIntrin::_InterlockedXor;
56 case clang::AArch64::BI_InterlockedDecrement64:
57 return MSVCIntrin::_InterlockedDecrement;
58 case clang::AArch64::BI_InterlockedIncrement64:
59 return MSVCIntrin::_InterlockedIncrement;
60 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
62 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
63 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
64 return MSVCIntrin::_InterlockedExchangeAdd_acq;
65 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
67 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
68 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
69 return MSVCIntrin::_InterlockedExchangeAdd_rel;
70 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
72 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
73 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
74 return MSVCIntrin::_InterlockedExchangeAdd_nf;
75 case clang::AArch64::BI_InterlockedExchange8_acq:
76 case clang::AArch64::BI_InterlockedExchange16_acq:
77 case clang::AArch64::BI_InterlockedExchange_acq:
78 case clang::AArch64::BI_InterlockedExchange64_acq:
79 case clang::AArch64::BI_InterlockedExchangePointer_acq:
80 return MSVCIntrin::_InterlockedExchange_acq;
81 case clang::AArch64::BI_InterlockedExchange8_rel:
82 case clang::AArch64::BI_InterlockedExchange16_rel:
83 case clang::AArch64::BI_InterlockedExchange_rel:
84 case clang::AArch64::BI_InterlockedExchange64_rel:
85 case clang::AArch64::BI_InterlockedExchangePointer_rel:
86 return MSVCIntrin::_InterlockedExchange_rel;
87 case clang::AArch64::BI_InterlockedExchange8_nf:
88 case clang::AArch64::BI_InterlockedExchange16_nf:
89 case clang::AArch64::BI_InterlockedExchange_nf:
90 case clang::AArch64::BI_InterlockedExchange64_nf:
91 case clang::AArch64::BI_InterlockedExchangePointer_nf:
92 return MSVCIntrin::_InterlockedExchange_nf;
93 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
95 case clang::AArch64::BI_InterlockedCompareExchange_acq:
96 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
97 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
98 return MSVCIntrin::_InterlockedCompareExchange_acq;
99 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
101 case clang::AArch64::BI_InterlockedCompareExchange_rel:
102 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
103 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
104 return MSVCIntrin::_InterlockedCompareExchange_rel;
105 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
107 case clang::AArch64::BI_InterlockedCompareExchange_nf:
108 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
109 return MSVCIntrin::_InterlockedCompareExchange_nf;
110 case clang::AArch64::BI_InterlockedCompareExchange128:
111 return MSVCIntrin::_InterlockedCompareExchange128;
112 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
113 return MSVCIntrin::_InterlockedCompareExchange128_acq;
114 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
115 return MSVCIntrin::_InterlockedCompareExchange128_nf;
116 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
117 return MSVCIntrin::_InterlockedCompareExchange128_rel;
118 case clang::AArch64::BI_InterlockedOr8_acq:
119 case clang::AArch64::BI_InterlockedOr16_acq:
120 case clang::AArch64::BI_InterlockedOr_acq:
121 case clang::AArch64::BI_InterlockedOr64_acq:
122 return MSVCIntrin::_InterlockedOr_acq;
123 case clang::AArch64::BI_InterlockedOr8_rel:
124 case clang::AArch64::BI_InterlockedOr16_rel:
125 case clang::AArch64::BI_InterlockedOr_rel:
126 case clang::AArch64::BI_InterlockedOr64_rel:
127 return MSVCIntrin::_InterlockedOr_rel;
128 case clang::AArch64::BI_InterlockedOr8_nf:
129 case clang::AArch64::BI_InterlockedOr16_nf:
130 case clang::AArch64::BI_InterlockedOr_nf:
131 case clang::AArch64::BI_InterlockedOr64_nf:
132 return MSVCIntrin::_InterlockedOr_nf;
133 case clang::AArch64::BI_InterlockedXor8_acq:
134 case clang::AArch64::BI_InterlockedXor16_acq:
135 case clang::AArch64::BI_InterlockedXor_acq:
136 case clang::AArch64::BI_InterlockedXor64_acq:
137 return MSVCIntrin::_InterlockedXor_acq;
138 case clang::AArch64::BI_InterlockedXor8_rel:
139 case clang::AArch64::BI_InterlockedXor16_rel:
140 case clang::AArch64::BI_InterlockedXor_rel:
141 case clang::AArch64::BI_InterlockedXor64_rel:
142 return MSVCIntrin::_InterlockedXor_rel;
143 case clang::AArch64::BI_InterlockedXor8_nf:
144 case clang::AArch64::BI_InterlockedXor16_nf:
145 case clang::AArch64::BI_InterlockedXor_nf:
146 case clang::AArch64::BI_InterlockedXor64_nf:
147 return MSVCIntrin::_InterlockedXor_nf;
148 case clang::AArch64::BI_InterlockedAnd8_acq:
149 case clang::AArch64::BI_InterlockedAnd16_acq:
150 case clang::AArch64::BI_InterlockedAnd_acq:
151 case clang::AArch64::BI_InterlockedAnd64_acq:
152 return MSVCIntrin::_InterlockedAnd_acq;
153 case clang::AArch64::BI_InterlockedAnd8_rel:
154 case clang::AArch64::BI_InterlockedAnd16_rel:
155 case clang::AArch64::BI_InterlockedAnd_rel:
156 case clang::AArch64::BI_InterlockedAnd64_rel:
157 return MSVCIntrin::_InterlockedAnd_rel;
158 case clang::AArch64::BI_InterlockedAnd8_nf:
159 case clang::AArch64::BI_InterlockedAnd16_nf:
160 case clang::AArch64::BI_InterlockedAnd_nf:
161 case clang::AArch64::BI_InterlockedAnd64_nf:
162 return MSVCIntrin::_InterlockedAnd_nf;
163 case clang::AArch64::BI_InterlockedIncrement16_acq:
164 case clang::AArch64::BI_InterlockedIncrement_acq:
165 case clang::AArch64::BI_InterlockedIncrement64_acq:
166 return MSVCIntrin::_InterlockedIncrement_acq;
167 case clang::AArch64::BI_InterlockedIncrement16_rel:
168 case clang::AArch64::BI_InterlockedIncrement_rel:
169 case clang::AArch64::BI_InterlockedIncrement64_rel:
170 return MSVCIntrin::_InterlockedIncrement_rel;
171 case clang::AArch64::BI_InterlockedIncrement16_nf:
172 case clang::AArch64::BI_InterlockedIncrement_nf:
173 case clang::AArch64::BI_InterlockedIncrement64_nf:
174 return MSVCIntrin::_InterlockedIncrement_nf;
175 case clang::AArch64::BI_InterlockedDecrement16_acq:
176 case clang::AArch64::BI_InterlockedDecrement_acq:
177 case clang::AArch64::BI_InterlockedDecrement64_acq:
178 return MSVCIntrin::_InterlockedDecrement_acq;
179 case clang::AArch64::BI_InterlockedDecrement16_rel:
180 case clang::AArch64::BI_InterlockedDecrement_rel:
181 case clang::AArch64::BI_InterlockedDecrement64_rel:
182 return MSVCIntrin::_InterlockedDecrement_rel;
183 case clang::AArch64::BI_InterlockedDecrement16_nf:
184 case clang::AArch64::BI_InterlockedDecrement_nf:
185 case clang::AArch64::BI_InterlockedDecrement64_nf:
186 return MSVCIntrin::_InterlockedDecrement_nf;
187 }
188 llvm_unreachable("must return from switch");
189}
190
191static std::optional<CodeGenFunction::MSVCIntrin>
192translateArmToMsvcIntrin(unsigned BuiltinID) {
193 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
194 switch (BuiltinID) {
195 default:
196 return std::nullopt;
197 case clang::ARM::BI_BitScanForward:
198 case clang::ARM::BI_BitScanForward64:
199 return MSVCIntrin::_BitScanForward;
200 case clang::ARM::BI_BitScanReverse:
201 case clang::ARM::BI_BitScanReverse64:
202 return MSVCIntrin::_BitScanReverse;
203 case clang::ARM::BI_InterlockedAnd64:
204 return MSVCIntrin::_InterlockedAnd;
205 case clang::ARM::BI_InterlockedExchange64:
206 return MSVCIntrin::_InterlockedExchange;
207 case clang::ARM::BI_InterlockedExchangeAdd64:
208 return MSVCIntrin::_InterlockedExchangeAdd;
209 case clang::ARM::BI_InterlockedExchangeSub64:
210 return MSVCIntrin::_InterlockedExchangeSub;
211 case clang::ARM::BI_InterlockedOr64:
212 return MSVCIntrin::_InterlockedOr;
213 case clang::ARM::BI_InterlockedXor64:
214 return MSVCIntrin::_InterlockedXor;
215 case clang::ARM::BI_InterlockedDecrement64:
216 return MSVCIntrin::_InterlockedDecrement;
217 case clang::ARM::BI_InterlockedIncrement64:
218 return MSVCIntrin::_InterlockedIncrement;
219 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
221 case clang::ARM::BI_InterlockedExchangeAdd_acq:
222 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
223 return MSVCIntrin::_InterlockedExchangeAdd_acq;
224 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
226 case clang::ARM::BI_InterlockedExchangeAdd_rel:
227 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
228 return MSVCIntrin::_InterlockedExchangeAdd_rel;
229 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
231 case clang::ARM::BI_InterlockedExchangeAdd_nf:
232 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
233 return MSVCIntrin::_InterlockedExchangeAdd_nf;
234 case clang::ARM::BI_InterlockedExchange8_acq:
235 case clang::ARM::BI_InterlockedExchange16_acq:
236 case clang::ARM::BI_InterlockedExchange_acq:
237 case clang::ARM::BI_InterlockedExchange64_acq:
238 case clang::ARM::BI_InterlockedExchangePointer_acq:
239 return MSVCIntrin::_InterlockedExchange_acq;
240 case clang::ARM::BI_InterlockedExchange8_rel:
241 case clang::ARM::BI_InterlockedExchange16_rel:
242 case clang::ARM::BI_InterlockedExchange_rel:
243 case clang::ARM::BI_InterlockedExchange64_rel:
244 case clang::ARM::BI_InterlockedExchangePointer_rel:
245 return MSVCIntrin::_InterlockedExchange_rel;
246 case clang::ARM::BI_InterlockedExchange8_nf:
247 case clang::ARM::BI_InterlockedExchange16_nf:
248 case clang::ARM::BI_InterlockedExchange_nf:
249 case clang::ARM::BI_InterlockedExchange64_nf:
250 case clang::ARM::BI_InterlockedExchangePointer_nf:
251 return MSVCIntrin::_InterlockedExchange_nf;
252 case clang::ARM::BI_InterlockedCompareExchange8_acq:
253 case clang::ARM::BI_InterlockedCompareExchange16_acq:
254 case clang::ARM::BI_InterlockedCompareExchange_acq:
255 case clang::ARM::BI_InterlockedCompareExchange64_acq:
256 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
257 return MSVCIntrin::_InterlockedCompareExchange_acq;
258 case clang::ARM::BI_InterlockedCompareExchange8_rel:
259 case clang::ARM::BI_InterlockedCompareExchange16_rel:
260 case clang::ARM::BI_InterlockedCompareExchange_rel:
261 case clang::ARM::BI_InterlockedCompareExchange64_rel:
262 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
263 return MSVCIntrin::_InterlockedCompareExchange_rel;
264 case clang::ARM::BI_InterlockedCompareExchange8_nf:
265 case clang::ARM::BI_InterlockedCompareExchange16_nf:
266 case clang::ARM::BI_InterlockedCompareExchange_nf:
267 case clang::ARM::BI_InterlockedCompareExchange64_nf:
268 return MSVCIntrin::_InterlockedCompareExchange_nf;
269 case clang::ARM::BI_InterlockedOr8_acq:
270 case clang::ARM::BI_InterlockedOr16_acq:
271 case clang::ARM::BI_InterlockedOr_acq:
272 case clang::ARM::BI_InterlockedOr64_acq:
273 return MSVCIntrin::_InterlockedOr_acq;
274 case clang::ARM::BI_InterlockedOr8_rel:
275 case clang::ARM::BI_InterlockedOr16_rel:
276 case clang::ARM::BI_InterlockedOr_rel:
277 case clang::ARM::BI_InterlockedOr64_rel:
278 return MSVCIntrin::_InterlockedOr_rel;
279 case clang::ARM::BI_InterlockedOr8_nf:
280 case clang::ARM::BI_InterlockedOr16_nf:
281 case clang::ARM::BI_InterlockedOr_nf:
282 case clang::ARM::BI_InterlockedOr64_nf:
283 return MSVCIntrin::_InterlockedOr_nf;
284 case clang::ARM::BI_InterlockedXor8_acq:
285 case clang::ARM::BI_InterlockedXor16_acq:
286 case clang::ARM::BI_InterlockedXor_acq:
287 case clang::ARM::BI_InterlockedXor64_acq:
288 return MSVCIntrin::_InterlockedXor_acq;
289 case clang::ARM::BI_InterlockedXor8_rel:
290 case clang::ARM::BI_InterlockedXor16_rel:
291 case clang::ARM::BI_InterlockedXor_rel:
292 case clang::ARM::BI_InterlockedXor64_rel:
293 return MSVCIntrin::_InterlockedXor_rel;
294 case clang::ARM::BI_InterlockedXor8_nf:
295 case clang::ARM::BI_InterlockedXor16_nf:
296 case clang::ARM::BI_InterlockedXor_nf:
297 case clang::ARM::BI_InterlockedXor64_nf:
298 return MSVCIntrin::_InterlockedXor_nf;
299 case clang::ARM::BI_InterlockedAnd8_acq:
300 case clang::ARM::BI_InterlockedAnd16_acq:
301 case clang::ARM::BI_InterlockedAnd_acq:
302 case clang::ARM::BI_InterlockedAnd64_acq:
303 return MSVCIntrin::_InterlockedAnd_acq;
304 case clang::ARM::BI_InterlockedAnd8_rel:
305 case clang::ARM::BI_InterlockedAnd16_rel:
306 case clang::ARM::BI_InterlockedAnd_rel:
307 case clang::ARM::BI_InterlockedAnd64_rel:
308 return MSVCIntrin::_InterlockedAnd_rel;
309 case clang::ARM::BI_InterlockedAnd8_nf:
310 case clang::ARM::BI_InterlockedAnd16_nf:
311 case clang::ARM::BI_InterlockedAnd_nf:
312 case clang::ARM::BI_InterlockedAnd64_nf:
313 return MSVCIntrin::_InterlockedAnd_nf;
314 case clang::ARM::BI_InterlockedIncrement16_acq:
315 case clang::ARM::BI_InterlockedIncrement_acq:
316 case clang::ARM::BI_InterlockedIncrement64_acq:
317 return MSVCIntrin::_InterlockedIncrement_acq;
318 case clang::ARM::BI_InterlockedIncrement16_rel:
319 case clang::ARM::BI_InterlockedIncrement_rel:
320 case clang::ARM::BI_InterlockedIncrement64_rel:
321 return MSVCIntrin::_InterlockedIncrement_rel;
322 case clang::ARM::BI_InterlockedIncrement16_nf:
323 case clang::ARM::BI_InterlockedIncrement_nf:
324 case clang::ARM::BI_InterlockedIncrement64_nf:
325 return MSVCIntrin::_InterlockedIncrement_nf;
326 case clang::ARM::BI_InterlockedDecrement16_acq:
327 case clang::ARM::BI_InterlockedDecrement_acq:
328 case clang::ARM::BI_InterlockedDecrement64_acq:
329 return MSVCIntrin::_InterlockedDecrement_acq;
330 case clang::ARM::BI_InterlockedDecrement16_rel:
331 case clang::ARM::BI_InterlockedDecrement_rel:
332 case clang::ARM::BI_InterlockedDecrement64_rel:
333 return MSVCIntrin::_InterlockedDecrement_rel;
334 case clang::ARM::BI_InterlockedDecrement16_nf:
335 case clang::ARM::BI_InterlockedDecrement_nf:
336 case clang::ARM::BI_InterlockedDecrement64_nf:
337 return MSVCIntrin::_InterlockedDecrement_nf;
338 }
339 llvm_unreachable("must return from switch");
340}
341
342// Emit an intrinsic where all operands are of the same type as the result.
343// Depending on mode, this may be a constrained floating-point intrinsic.
344static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
345 unsigned IntrinsicID,
346 unsigned ConstrainedIntrinsicID,
347 llvm::Type *Ty,
348 ArrayRef<Value *> Args) {
349 Function *F;
350 if (CGF.Builder.getIsFPConstrained())
351 F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Ty);
352 else
353 F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Ty);
354
355 if (CGF.Builder.getIsFPConstrained())
356 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args);
357
358 return CGF.Builder.CreateCall(Callee: F, Args);
359}
360
361static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
362 NeonTypeFlags TypeFlags,
363 bool HasFastHalfType = true,
364 bool V1Ty = false,
365 bool AllowBFloatArgsAndRet = true) {
366 int IsQuad = TypeFlags.isQuad();
367 switch (TypeFlags.getEltType()) {
368 case NeonTypeFlags::Int8:
369 case NeonTypeFlags::Poly8:
370 case NeonTypeFlags::MFloat8:
371 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: V1Ty ? 1 : (8 << IsQuad));
372 case NeonTypeFlags::Int16:
373 case NeonTypeFlags::Poly16:
374 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
375 case NeonTypeFlags::BFloat16:
376 if (AllowBFloatArgsAndRet)
377 return llvm::FixedVectorType::get(ElementType: CGF->BFloatTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
378 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
379 case NeonTypeFlags::Float16:
380 if (HasFastHalfType)
381 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
382 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
383 case NeonTypeFlags::Int32:
384 return llvm::FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: V1Ty ? 1 : (2 << IsQuad));
385 case NeonTypeFlags::Int64:
386 case NeonTypeFlags::Poly64:
387 return llvm::FixedVectorType::get(ElementType: CGF->Int64Ty, NumElts: V1Ty ? 1 : (1 << IsQuad));
388 case NeonTypeFlags::Poly128:
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: 16);
393 case NeonTypeFlags::Float32:
394 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: V1Ty ? 1 : (2 << IsQuad));
395 case NeonTypeFlags::Float64:
396 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
405 case NeonTypeFlags::Int16:
406 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: (4 << IsQuad));
407 case NeonTypeFlags::Int32:
408 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: (2 << IsQuad));
409 case NeonTypeFlags::Int64:
410 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
416Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(EC: Count, Elt: C);
419 return Builder.CreateShuffleVector(V1: V, V2: V, Mask: SV, Name: "lane");
420}
421
422Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
423 ElementCount EC = cast<llvm::VectorType>(Val: V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, Count: EC);
425}
426
427Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(V: Ops[j], Ty: ai->getType(), negateForRightShift: rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(V: Ops[j], DestTy: ai->getType(), Name: name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops, Name: name);
444 return Builder.CreateCall(Callee: F, Args: Ops, Name: name);
445}
446
447Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
448 ArrayRef<llvm::Type *> Tys,
449 SmallVectorImpl<Value *> &Ops,
450 const CallExpr *E, const char *name) {
451 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
452 Args: Ops.pop_back_val());
453 return EmitNeonCall(F: CGM.getIntrinsic(IID, Tys), Ops, name);
454}
455
456llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
457 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
458 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
459
460 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
461 RetTy->getPrimitiveSizeInBits();
462 llvm::Type *Tys[] = {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount),
463 Ops[1]->getType()};
464 if (ExtendLaneArg) {
465 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
466 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
467 Idx: uint64_t(0));
468 }
469 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
470}
471
472llvm::Value *CodeGenFunction::EmitFP8NeonFMLACall(
473 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
474 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
475
476 if (ExtendLaneArg) {
477 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
478 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
479 Idx: uint64_t(0));
480 }
481 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
482 RetTy->getPrimitiveSizeInBits();
483 return EmitFP8NeonCall(IID, Tys: {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount)},
484 Ops, E, name);
485}
486
487Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
488 bool neg) {
489 int SV = cast<ConstantInt>(Val: V)->getSExtValue();
490 return ConstantInt::getSigned(Ty, V: neg ? -SV : SV);
491}
492
493Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
494 llvm::Type *Ty1, bool Extract,
495 SmallVectorImpl<llvm::Value *> &Ops,
496 const CallExpr *E,
497 const char *name) {
498 llvm::Type *Tys[] = {Ty0, Ty1};
499 if (Extract) {
500 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
501 // the vector.
502 Tys[1] = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
503 Ops[0] = Builder.CreateExtractVector(DstType: Tys[1], SrcVec: Ops[0], Idx: uint64_t(0));
504 }
505 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
506}
507
508// Right-shift a vector by a constant.
509Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
510 llvm::Type *Ty, bool usgn,
511 const char *name) {
512 llvm::VectorType *VTy = cast<llvm::VectorType>(Val: Ty);
513
514 int ShiftAmt = cast<ConstantInt>(Val: Shift)->getSExtValue();
515 int EltSize = VTy->getScalarSizeInBits();
516
517 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty);
518
519 // lshr/ashr are undefined when the shift amount is equal to the vector
520 // element size.
521 if (ShiftAmt == EltSize) {
522 if (usgn) {
523 // Right-shifting an unsigned value by its size yields 0.
524 return llvm::ConstantAggregateZero::get(Ty: VTy);
525 } else {
526 // Right-shifting a signed value by its size is equivalent
527 // to a shift of size-1.
528 --ShiftAmt;
529 Shift = ConstantInt::get(Ty: VTy->getElementType(), V: ShiftAmt);
530 }
531 }
532
533 Shift = EmitNeonShiftVector(V: Shift, Ty, neg: false);
534 if (usgn)
535 return Builder.CreateLShr(LHS: Vec, RHS: Shift, Name: name);
536 return Builder.CreateAShr(LHS: Vec, RHS: Shift, Name: name);
537}
538
539// clang-format off
540static const ARMNeonVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
541 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
542 NEONMAP0(splat_lane_v),
543 NEONMAP0(splat_laneq_v),
544 NEONMAP0(splatq_lane_v),
545 NEONMAP0(splatq_laneq_v),
546 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
547 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
548 NEONMAP1(vabs_v, arm_neon_vabs, 0),
549 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
550 NEONMAP0(vadd_v),
551 NEONMAP0(vaddhn_v),
552 NEONMAP0(vaddq_v),
553 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
554 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
555 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
556 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
557 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
558 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
559 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
560 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
561 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
562 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
563 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
564 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
565 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
566 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
567 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
568 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
569 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
570 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
571 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
572 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
573 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
574 NEONMAP1(vcage_v, arm_neon_vacge, 0),
575 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
576 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
577 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
578 NEONMAP1(vcale_v, arm_neon_vacge, 0),
579 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
580 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
581 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
582 NEONMAP0(vceqz_v),
583 NEONMAP0(vceqzq_v),
584 NEONMAP0(vcgez_v),
585 NEONMAP0(vcgezq_v),
586 NEONMAP0(vcgtz_v),
587 NEONMAP0(vcgtzq_v),
588 NEONMAP0(vclez_v),
589 NEONMAP0(vclezq_v),
590 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
591 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
592 NEONMAP0(vcltz_v),
593 NEONMAP0(vcltzq_v),
594 NEONMAP1(vclz_v, ctlz, Add1ArgType),
595 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
596 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
597 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
598 NEONMAP0(vcvt_f16_s16),
599 NEONMAP0(vcvt_f16_u16),
600 NEONMAP0(vcvt_f32_v),
601 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
602 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
603 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
604 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
605 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
606 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
607 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
608 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
609 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
610 NEONMAP0(vcvt_s16_f16),
611 NEONMAP0(vcvt_s32_v),
612 NEONMAP0(vcvt_s64_v),
613 NEONMAP0(vcvt_u16_f16),
614 NEONMAP0(vcvt_u32_v),
615 NEONMAP0(vcvt_u64_v),
616 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
617 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
618 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
619 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
620 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
621 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
622 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
623 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
624 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
625 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
626 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
627 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
628 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
629 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
630 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
631 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
632 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
633 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
634 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
635 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
636 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
637 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
638 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
639 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
640 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
641 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
642 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
643 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
644 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
645 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
646 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
647 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
648 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
649 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
650 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
651 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
652 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
653 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
654 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
655 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
656 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
657 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
658 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
659 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
660 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
661 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
662 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
663 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
664 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
665 NEONMAP0(vcvtq_f16_s16),
666 NEONMAP0(vcvtq_f16_u16),
667 NEONMAP0(vcvtq_f32_v),
668 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
669 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
670 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
671 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
672 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
673 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
674 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
675 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
676 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
677 NEONMAP0(vcvtq_s16_f16),
678 NEONMAP0(vcvtq_s32_v),
679 NEONMAP0(vcvtq_s64_v),
680 NEONMAP0(vcvtq_u16_f16),
681 NEONMAP0(vcvtq_u32_v),
682 NEONMAP0(vcvtq_u64_v),
683 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
684 NEONMAP1(vdot_u32, arm_neon_udot, 0),
685 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
686 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
687 NEONMAP0(vext_v),
688 NEONMAP0(vextq_v),
689 NEONMAP0(vfma_v),
690 NEONMAP0(vfmaq_v),
691 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
692 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
693 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
694 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
695 NEONMAP0(vld1_dup_v),
696 NEONMAP1(vld1_v, arm_neon_vld1, 0),
697 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
698 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
699 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
700 NEONMAP0(vld1q_dup_v),
701 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
702 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
703 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
704 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
705 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
706 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
707 NEONMAP1(vld2_v, arm_neon_vld2, 0),
708 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
709 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
710 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
711 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
712 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
713 NEONMAP1(vld3_v, arm_neon_vld3, 0),
714 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
715 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
716 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
717 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
718 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
719 NEONMAP1(vld4_v, arm_neon_vld4, 0),
720 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
721 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
722 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
723 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
724 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
725 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
726 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
727 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
728 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
729 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
730 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
731 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
732 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
733 NEONMAP0(vmovl_v),
734 NEONMAP0(vmovn_v),
735 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
736 NEONMAP0(vmull_v),
737 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
738 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
739 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
740 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
741 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
742 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
743 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
744 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
745 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
746 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
747 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
748 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
750 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
751 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
752 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
753 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
754 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
755 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
756 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
757 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
758 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
759 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
760 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
761 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
762 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
763 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
764 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
765 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
766 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
767 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
768 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
769 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
770 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
771 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
772 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
773 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
774 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
775 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
776 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
777 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
778 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
780 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
781 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
782 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
783 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
784 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
785 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vrnd_v, trunc, Add1ArgType),
787 NEONMAP1(vrnda_v, round, Add1ArgType),
788 NEONMAP1(vrndaq_v, round, Add1ArgType),
789 NEONMAP0(vrndi_v),
790 NEONMAP0(vrndiq_v),
791 NEONMAP1(vrndm_v, floor, Add1ArgType),
792 NEONMAP1(vrndmq_v, floor, Add1ArgType),
793 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
794 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
795 NEONMAP1(vrndp_v, ceil, Add1ArgType),
796 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
797 NEONMAP1(vrndq_v, trunc, Add1ArgType),
798 NEONMAP1(vrndx_v, rint, Add1ArgType),
799 NEONMAP1(vrndxq_v, rint, Add1ArgType),
800 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
801 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
802 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
803 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
804 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
805 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
806 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
807 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
808 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
809 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
810 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
811 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
812 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
813 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
814 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
815 NEONMAP0(vshl_n_v),
816 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
817 NEONMAP0(vshll_n_v),
818 NEONMAP0(vshlq_n_v),
819 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
820 NEONMAP0(vshr_n_v),
821 NEONMAP0(vshrn_n_v),
822 NEONMAP0(vshrq_n_v),
823 NEONMAP1(vst1_v, arm_neon_vst1, 0),
824 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
825 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
826 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
827 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
828 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
829 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
830 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
831 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
832 NEONMAP1(vst2_v, arm_neon_vst2, 0),
833 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
834 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
835 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
836 NEONMAP1(vst3_v, arm_neon_vst3, 0),
837 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
838 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
839 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
840 NEONMAP1(vst4_v, arm_neon_vst4, 0),
841 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
842 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
843 NEONMAP0(vsubhn_v),
844 NEONMAP0(vtrn_v),
845 NEONMAP0(vtrnq_v),
846 NEONMAP0(vtst_v),
847 NEONMAP0(vtstq_v),
848 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
849 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
850 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
851 NEONMAP0(vuzp_v),
852 NEONMAP0(vuzpq_v),
853 NEONMAP0(vzip_v),
854 NEONMAP0(vzipq_v)
855};
856
857// clang-format on
858
859// Some intrinsics are equivalent for codegen.
860static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
861 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
862 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
863 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
864 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
865 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
866 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
867 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
868 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
869 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
870 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
871 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
872 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
873 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
874 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
875 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
876 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
877 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
878 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
879 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
880 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
881 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
882 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
883 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
884 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
885 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
886 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
887 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
888 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
889 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
890 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
891 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
892 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
893 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
894 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
895 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
896 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
897 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
898 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
899 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
900 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
901 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
902 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
903 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
904 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
905 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
906 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
907 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
908 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
909 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
910 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
911 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
912 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
913 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
914 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
915 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
916 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
917 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
918 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
919 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
920 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
921 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
922 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
923 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
924 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
925 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
926 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
927 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
928 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
929 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
930 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
931 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
932 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
933 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
934 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
935 // arbitrary one to be handled as tha canonical variation.
936 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
937 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
938 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
939 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
940 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
941 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
942 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
943 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
944 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
945 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
946 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
947 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
948};
949
950#undef NEONMAP0
951#undef NEONMAP1
952#undef NEONMAP2
953
954#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
955 {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
956
957#define SVEMAP2(NameBase, TypeModifier) \
958 {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
959static const AArch64SVEAndSMEVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
960#define GET_SVE_LLVM_INTRINSIC_MAP
961#include "clang/Basic/arm_sve_builtin_cg.inc"
962#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
963#undef GET_SVE_LLVM_INTRINSIC_MAP
964};
965
966#undef SVEMAP1
967#undef SVEMAP2
968
969#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
970 {SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
971
972#define SMEMAP2(NameBase, TypeModifier) \
973 {SME::BI__builtin_sme_##NameBase, 0, TypeModifier}
974static const AArch64SVEAndSMEVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
975#define GET_SME_LLVM_INTRINSIC_MAP
976#include "clang/Basic/arm_sme_builtin_cg.inc"
977#undef GET_SME_LLVM_INTRINSIC_MAP
978};
979
980#undef SMEMAP1
981#undef SMEMAP2
982
983static bool NEONSIMDIntrinsicsProvenSorted = false;
984
985static bool AArch64SIMDIntrinsicsProvenSorted = false;
986static bool AArch64SISDIntrinsicsProvenSorted = false;
987static bool AArch64SVEIntrinsicsProvenSorted = false;
988static bool AArch64SMEIntrinsicsProvenSorted = false;
989
990// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
991// the corresponding info struct.
992template <typename IntrinsicInfo>
993static const IntrinsicInfo *
994findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo> IntrinsicMap,
995 unsigned BuiltinID, bool &MapProvenSorted) {
996
997#ifndef NDEBUG
998 if (!MapProvenSorted) {
999 assert(llvm::is_sorted(IntrinsicMap));
1000 MapProvenSorted = true;
1001 }
1002#endif
1003
1004 const IntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID);
1005
1006 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1007 return Builtin;
1008
1009 return nullptr;
1010}
1011
1012Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
1013 unsigned Modifier,
1014 llvm::Type *ArgType,
1015 const CallExpr *E) {
1016 int VectorSize = 0;
1017 if (Modifier & Use64BitVectors)
1018 VectorSize = 64;
1019 else if (Modifier & Use128BitVectors)
1020 VectorSize = 128;
1021
1022 // Return type.
1023 SmallVector<llvm::Type *, 3> Tys;
1024 if (Modifier & AddRetType) {
1025 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
1026 if (Modifier & VectorizeRetType)
1027 Ty = llvm::FixedVectorType::get(
1028 ElementType: Ty, NumElts: VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1029
1030 Tys.push_back(Elt: Ty);
1031 }
1032
1033 // Arguments.
1034 if (Modifier & VectorizeArgTypes) {
1035 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1036 ArgType = llvm::FixedVectorType::get(ElementType: ArgType, NumElts: Elts);
1037 }
1038
1039 if (Modifier & (Add1ArgType | Add2ArgTypes))
1040 Tys.push_back(Elt: ArgType);
1041
1042 if (Modifier & Add2ArgTypes)
1043 Tys.push_back(Elt: ArgType);
1044
1045 if (Modifier & InventFloatType)
1046 Tys.push_back(Elt: FloatTy);
1047
1048 return CGM.getIntrinsic(IID: IntrinsicID, Tys);
1049}
1050
1051//===----------------------------------------------------------------------===//
1052// Emit-helpers
1053//===----------------------------------------------------------------------===//
1054static Value *EmitCommonNeonSISDBuiltinExpr(
1055 CodeGenFunction &CGF, const ARMNeonVectorIntrinsicInfo &SISDInfo,
1056 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1057 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1058
1059 switch (SISDInfo.BuiltinID) {
1060 case NEON::BI__builtin_neon_vcled_s64:
1061 case NEON::BI__builtin_neon_vcled_u64:
1062 case NEON::BI__builtin_neon_vcles_f32:
1063 case NEON::BI__builtin_neon_vcled_f64:
1064 case NEON::BI__builtin_neon_vcltd_s64:
1065 case NEON::BI__builtin_neon_vcltd_u64:
1066 case NEON::BI__builtin_neon_vclts_f32:
1067 case NEON::BI__builtin_neon_vcltd_f64:
1068 case NEON::BI__builtin_neon_vcales_f32:
1069 case NEON::BI__builtin_neon_vcaled_f64:
1070 case NEON::BI__builtin_neon_vcalts_f32:
1071 case NEON::BI__builtin_neon_vcaltd_f64:
1072 // Only one direction of comparisons actually exist, cmle is actually a cmge
1073 // with swapped operands. The table gives us the right intrinsic but we
1074 // still need to do the swap.
1075 std::swap(a&: Ops[0], b&: Ops[1]);
1076 break;
1077 }
1078
1079 // Use fptosi.sat/fptoui.sat unless under strict FP.
1080 unsigned LLVMIntrinsic = SISDInfo.LLVMIntrinsic;
1081 if (!CGF.Builder.getIsFPConstrained()) {
1082 if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzs)
1083 LLVMIntrinsic = Intrinsic::fptosi_sat;
1084 else if (LLVMIntrinsic == Intrinsic::aarch64_neon_fcvtzu)
1085 LLVMIntrinsic = Intrinsic::fptoui_sat;
1086 }
1087 llvm::Type *ArgTy = CGF.ConvertType(T: E->getArg(Arg: 0)->getType());
1088 Function *F = CGF.LookupNeonLLVMIntrinsic(IntrinsicID: LLVMIntrinsic,
1089 Modifier: SISDInfo.TypeModifier, ArgType: ArgTy, E);
1090
1091 int j = 0;
1092 ConstantInt *C0 = ConstantInt::get(Ty: CGF.SizeTy, V: 0);
1093 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1094 ai != ae; ++ai, ++j) {
1095 llvm::Type *ArgTy = ai->getType();
1096 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1097 ArgTy->getPrimitiveSizeInBits())
1098 continue;
1099 assert(
1100 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1101 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1102
1103 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1104 // it before inserting.
1105 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1106 V: Ops[j], DestTy: cast<llvm::VectorType>(Val: ArgTy)->getElementType());
1107 Ops[j] =
1108 CGF.Builder.CreateInsertElement(Vec: PoisonValue::get(T: ArgTy), NewElt: Ops[j], Idx: C0);
1109 }
1110
1111 Value *Result = CGF.EmitNeonCall(F, Ops, name: SISDInfo.NameHint);
1112 llvm::Type *ResultType = CGF.ConvertType(T: E->getType());
1113 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1114 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1115 return CGF.Builder.CreateExtractElement(Vec: Result, Idx: C0);
1116
1117 return CGF.Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: SISDInfo.NameHint);
1118}
1119
1120Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
1121 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1122 const char *NameHint, unsigned Modifier, const CallExpr *E,
1123 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1124 llvm::Triple::ArchType Arch) {
1125
1126 // Extract the trailing immediate argument that encodes the type discriminator
1127 // for this overloaded intrinsic.
1128 // TODO: Move to the parent code that takes care of argument processing.
1129 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
1130 std::optional<llvm::APSInt> NeonTypeConst =
1131 Arg->getIntegerConstantExpr(Ctx: getContext());
1132 if (!NeonTypeConst)
1133 return nullptr;
1134
1135 // Determine the type of this overloaded NEON intrinsic.
1136 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1137 const bool Usgn = Type.isUnsigned();
1138 const bool Quad = Type.isQuad();
1139 const bool Floating = Type.isFloatingPoint();
1140 const bool HasFastHalfType = getTarget().hasFastHalfType();
1141 const bool AllowBFloatArgsAndRet =
1142 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1143
1144 llvm::FixedVectorType *VTy =
1145 GetNeonType(CGF: this, TypeFlags: Type, HasFastHalfType, V1Ty: false, AllowBFloatArgsAndRet);
1146 llvm::Type *Ty = VTy;
1147 if (!Ty)
1148 return nullptr;
1149
1150 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1151 return Builder.getInt32(C: addr.getAlignment().getQuantity());
1152 };
1153
1154 unsigned Int = LLVMIntrinsic;
1155 if ((Modifier & UnsignedAlts) && !Usgn)
1156 Int = AltLLVMIntrinsic;
1157
1158 switch (BuiltinID) {
1159 default: break;
1160 case NEON::BI__builtin_neon_splat_lane_v:
1161 case NEON::BI__builtin_neon_splat_laneq_v:
1162 case NEON::BI__builtin_neon_splatq_lane_v:
1163 case NEON::BI__builtin_neon_splatq_laneq_v: {
1164 auto NumElements = VTy->getElementCount();
1165 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1166 NumElements = NumElements * 2;
1167 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1168 NumElements = NumElements.divideCoefficientBy(RHS: 2);
1169
1170 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1171 return EmitNeonSplat(V: Ops[0], C: cast<ConstantInt>(Val: Ops[1]), Count: NumElements);
1172 }
1173 case NEON::BI__builtin_neon_vpadd_v:
1174 case NEON::BI__builtin_neon_vpaddq_v:
1175 // We don't allow fp/int overloading of intrinsics.
1176 if (VTy->getElementType()->isFloatingPointTy() &&
1177 Int == Intrinsic::aarch64_neon_addp)
1178 Int = Intrinsic::aarch64_neon_faddp;
1179 break;
1180 case NEON::BI__builtin_neon_vabs_v:
1181 case NEON::BI__builtin_neon_vabsq_v:
1182 if (VTy->getElementType()->isFloatingPointTy())
1183 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: Ty), Ops, name: "vabs");
1184 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops, name: "vabs");
1185 case NEON::BI__builtin_neon_vadd_v:
1186 case NEON::BI__builtin_neon_vaddq_v: {
1187 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Quad ? 16 : 8);
1188 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1189 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
1190 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
1191 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1192 }
1193 case NEON::BI__builtin_neon_vaddhn_v: {
1194 llvm::FixedVectorType *SrcTy =
1195 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1196
1197 // %sum = add <4 x i32> %lhs, %rhs
1198 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1199 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
1200 Ops[0] = Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddhn");
1201
1202 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1203 Constant *ShiftAmt =
1204 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
1205 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vaddhn");
1206
1207 // %res = trunc <4 x i32> %high to <4 x i16>
1208 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vaddhn");
1209 }
1210 case NEON::BI__builtin_neon_vcale_v:
1211 case NEON::BI__builtin_neon_vcaleq_v:
1212 case NEON::BI__builtin_neon_vcalt_v:
1213 case NEON::BI__builtin_neon_vcaltq_v:
1214 std::swap(a&: Ops[0], b&: Ops[1]);
1215 [[fallthrough]];
1216 case NEON::BI__builtin_neon_vcage_v:
1217 case NEON::BI__builtin_neon_vcageq_v:
1218 case NEON::BI__builtin_neon_vcagt_v:
1219 case NEON::BI__builtin_neon_vcagtq_v: {
1220 llvm::Type *Ty;
1221 switch (VTy->getScalarSizeInBits()) {
1222 default: llvm_unreachable("unexpected type");
1223 case 32:
1224 Ty = FloatTy;
1225 break;
1226 case 64:
1227 Ty = DoubleTy;
1228 break;
1229 case 16:
1230 Ty = HalfTy;
1231 break;
1232 }
1233 auto *VecFlt = llvm::FixedVectorType::get(ElementType: Ty, NumElts: VTy->getNumElements());
1234 llvm::Type *Tys[] = { VTy, VecFlt };
1235 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1236 return EmitNeonCall(F, Ops, name: NameHint);
1237 }
1238 case NEON::BI__builtin_neon_vceqz_v:
1239 case NEON::BI__builtin_neon_vceqzq_v:
1240 return EmitAArch64CompareBuiltinExpr(
1241 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, Name: "vceqz");
1242 case NEON::BI__builtin_neon_vcgez_v:
1243 case NEON::BI__builtin_neon_vcgezq_v:
1244 return EmitAArch64CompareBuiltinExpr(
1245 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1246 Name: "vcgez");
1247 case NEON::BI__builtin_neon_vclez_v:
1248 case NEON::BI__builtin_neon_vclezq_v:
1249 return EmitAArch64CompareBuiltinExpr(
1250 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1251 Name: "vclez");
1252 case NEON::BI__builtin_neon_vcgtz_v:
1253 case NEON::BI__builtin_neon_vcgtzq_v:
1254 return EmitAArch64CompareBuiltinExpr(
1255 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1256 Name: "vcgtz");
1257 case NEON::BI__builtin_neon_vcltz_v:
1258 case NEON::BI__builtin_neon_vcltzq_v:
1259 return EmitAArch64CompareBuiltinExpr(
1260 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1261 Name: "vcltz");
1262 case NEON::BI__builtin_neon_vclz_v:
1263 case NEON::BI__builtin_neon_vclzq_v:
1264 // We generate target-independent intrinsic, which needs a second argument
1265 // for whether or not clz of zero is undefined; on ARM it isn't.
1266 Ops.push_back(Elt: Builder.getInt1(V: getTarget().isCLZForZeroUndef()));
1267 break;
1268 case NEON::BI__builtin_neon_vcvt_f32_v:
1269 case NEON::BI__builtin_neon_vcvtq_f32_v:
1270 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1271 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1272 HasFastHalfType);
1273 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1274 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1275 case NEON::BI__builtin_neon_vcvt_f16_s16:
1276 case NEON::BI__builtin_neon_vcvt_f16_u16:
1277 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1278 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1279 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1280 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1281 HasFastHalfType);
1282 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1283 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1284 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1285 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1286 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1287 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1288 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1289 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1290 return EmitNeonCall(F, Ops, name: "vcvt_n");
1291 }
1292 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1293 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1294 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1295 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1296 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1297 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1298 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1299 return EmitNeonCall(F, Ops, name: "vcvt_n");
1300 }
1301 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1302 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1303 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1304 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1305 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1306 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1307 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1308 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1309 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1310 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1311 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1312 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1313 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1314 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1315 return EmitNeonCall(F, Ops, name: "vcvt_n");
1316 }
1317 case NEON::BI__builtin_neon_vcvt_s32_v:
1318 case NEON::BI__builtin_neon_vcvt_u32_v:
1319 case NEON::BI__builtin_neon_vcvt_s64_v:
1320 case NEON::BI__builtin_neon_vcvt_u64_v:
1321 case NEON::BI__builtin_neon_vcvt_s16_f16:
1322 case NEON::BI__builtin_neon_vcvt_u16_f16:
1323 case NEON::BI__builtin_neon_vcvtq_s32_v:
1324 case NEON::BI__builtin_neon_vcvtq_u32_v:
1325 case NEON::BI__builtin_neon_vcvtq_s64_v:
1326 case NEON::BI__builtin_neon_vcvtq_u64_v:
1327 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1328 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1329 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetFloatNeonType(CGF: this, IntTypeFlags: Type));
1330 if (Int) {
1331 // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
1332 if (!Builder.getIsFPConstrained())
1333 Int = Usgn ? Intrinsic::fptoui_sat : Intrinsic::fptosi_sat;
1334 llvm::Type *Tys[2] = {Ty, Ops[0]->getType()};
1335 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtz");
1336 }
1337 // FIXME: ARM uses plain fptoui/fptosi which have UB on out-of-range
1338 // values. These should also use saturating intrinsics.
1339 return Usgn ? Builder.CreateFPToUI(V: Ops[0], DestTy: Ty, Name: "vcvt")
1340 : Builder.CreateFPToSI(V: Ops[0], DestTy: Ty, Name: "vcvt");
1341 }
1342 case NEON::BI__builtin_neon_vcvta_s16_f16:
1343 case NEON::BI__builtin_neon_vcvta_s32_v:
1344 case NEON::BI__builtin_neon_vcvta_s64_v:
1345 case NEON::BI__builtin_neon_vcvta_u16_f16:
1346 case NEON::BI__builtin_neon_vcvta_u32_v:
1347 case NEON::BI__builtin_neon_vcvta_u64_v:
1348 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1349 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1350 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1351 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1352 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1353 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1354 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1355 case NEON::BI__builtin_neon_vcvtn_s32_v:
1356 case NEON::BI__builtin_neon_vcvtn_s64_v:
1357 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1358 case NEON::BI__builtin_neon_vcvtn_u32_v:
1359 case NEON::BI__builtin_neon_vcvtn_u64_v:
1360 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1361 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1362 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1363 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1364 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1365 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1366 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1367 case NEON::BI__builtin_neon_vcvtp_s32_v:
1368 case NEON::BI__builtin_neon_vcvtp_s64_v:
1369 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1370 case NEON::BI__builtin_neon_vcvtp_u32_v:
1371 case NEON::BI__builtin_neon_vcvtp_u64_v:
1372 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1373 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1374 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1375 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1376 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1377 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1378 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1379 case NEON::BI__builtin_neon_vcvtm_s32_v:
1380 case NEON::BI__builtin_neon_vcvtm_s64_v:
1381 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1382 case NEON::BI__builtin_neon_vcvtm_u32_v:
1383 case NEON::BI__builtin_neon_vcvtm_u64_v:
1384 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1385 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1386 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1387 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1388 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1389 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1390 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1391 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
1392 }
1393 case NEON::BI__builtin_neon_vcvtx_f32_v: {
1394 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
1395 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
1396
1397 }
1398 case NEON::BI__builtin_neon_vext_v:
1399 case NEON::BI__builtin_neon_vextq_v: {
1400 int CV = cast<ConstantInt>(Val: Ops[2])->getSExtValue();
1401 SmallVector<int, 16> Indices;
1402 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1403 Indices.push_back(Elt: i+CV);
1404
1405 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1406 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
1407 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1], Mask: Indices, Name: "vext");
1408 }
1409 case NEON::BI__builtin_neon_vfma_v:
1410 case NEON::BI__builtin_neon_vfmaq_v: {
1411 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1412 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
1413 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
1414
1415 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
1416 return emitCallMaybeConstrainedFPBuiltin(
1417 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
1418 Args: {Ops[1], Ops[2], Ops[0]});
1419 }
1420 case NEON::BI__builtin_neon_vld1_v:
1421 case NEON::BI__builtin_neon_vld1q_v: {
1422 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1423 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
1424 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vld1");
1425 }
1426 case NEON::BI__builtin_neon_vld1_x2_v:
1427 case NEON::BI__builtin_neon_vld1q_x2_v:
1428 case NEON::BI__builtin_neon_vld1_x3_v:
1429 case NEON::BI__builtin_neon_vld1q_x3_v:
1430 case NEON::BI__builtin_neon_vld1_x4_v:
1431 case NEON::BI__builtin_neon_vld1q_x4_v: {
1432 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1433 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1434 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld1xN");
1435 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
1436 }
1437 case NEON::BI__builtin_neon_vld2_v:
1438 case NEON::BI__builtin_neon_vld2q_v:
1439 case NEON::BI__builtin_neon_vld3_v:
1440 case NEON::BI__builtin_neon_vld3q_v:
1441 case NEON::BI__builtin_neon_vld4_v:
1442 case NEON::BI__builtin_neon_vld4q_v:
1443 case NEON::BI__builtin_neon_vld2_dup_v:
1444 case NEON::BI__builtin_neon_vld2q_dup_v:
1445 case NEON::BI__builtin_neon_vld3_dup_v:
1446 case NEON::BI__builtin_neon_vld3q_dup_v:
1447 case NEON::BI__builtin_neon_vld4_dup_v:
1448 case NEON::BI__builtin_neon_vld4q_dup_v: {
1449 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1450 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1451 Value *Align = getAlignmentValue32(PtrOp1);
1452 Ops[1] = Builder.CreateCall(Callee: F, Args: {Ops[1], Align}, Name: NameHint);
1453 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
1454 }
1455 case NEON::BI__builtin_neon_vld1_dup_v:
1456 case NEON::BI__builtin_neon_vld1q_dup_v: {
1457 Value *V = PoisonValue::get(T: Ty);
1458 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
1459 LoadInst *Ld = Builder.CreateLoad(Addr: PtrOp0);
1460 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
1461 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ld, Idx: CI);
1462 return EmitNeonSplat(V: Ops[0], C: CI);
1463 }
1464 case NEON::BI__builtin_neon_vld2_lane_v:
1465 case NEON::BI__builtin_neon_vld2q_lane_v:
1466 case NEON::BI__builtin_neon_vld3_lane_v:
1467 case NEON::BI__builtin_neon_vld3q_lane_v:
1468 case NEON::BI__builtin_neon_vld4_lane_v:
1469 case NEON::BI__builtin_neon_vld4q_lane_v: {
1470 llvm::Type *Tys[] = {Ty, Int8PtrTy};
1471 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1472 for (unsigned I = 2; I < Ops.size() - 1; ++I)
1473 Ops[I] = Builder.CreateBitCast(V: Ops[I], DestTy: Ty);
1474 Ops.push_back(Elt: getAlignmentValue32(PtrOp1));
1475 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: NameHint);
1476 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
1477 }
1478 case NEON::BI__builtin_neon_vmovl_v: {
1479 llvm::FixedVectorType *DTy =
1480 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1481 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DTy);
1482 if (Usgn)
1483 return Builder.CreateZExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
1484 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
1485 }
1486 case NEON::BI__builtin_neon_vmovn_v: {
1487 llvm::FixedVectorType *QTy =
1488 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1489 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: QTy);
1490 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vmovn");
1491 }
1492 case NEON::BI__builtin_neon_vmull_v:
1493 // FIXME: the integer vmull operations could be emitted in terms of pure
1494 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
1495 // hoisting the exts outside loops. Until global ISel comes along that can
1496 // see through such movement this leads to bad CodeGen. So we need an
1497 // intrinsic for now.
1498 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1499 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1500 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
1501 case NEON::BI__builtin_neon_vpadal_v:
1502 case NEON::BI__builtin_neon_vpadalq_v: {
1503 // The source operand type has twice as many elements of half the size.
1504 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1505 llvm::Type *EltTy =
1506 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
1507 auto *NarrowTy =
1508 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
1509 llvm::Type *Tys[2] = { Ty, NarrowTy };
1510 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
1511 }
1512 case NEON::BI__builtin_neon_vpaddl_v:
1513 case NEON::BI__builtin_neon_vpaddlq_v: {
1514 // The source operand type has twice as many elements of half the size.
1515 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1516 llvm::Type *EltTy = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
1517 auto *NarrowTy =
1518 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
1519 llvm::Type *Tys[2] = { Ty, NarrowTy };
1520 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vpaddl");
1521 }
1522 case NEON::BI__builtin_neon_vqdmlal_v:
1523 case NEON::BI__builtin_neon_vqdmlsl_v: {
1524 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
1525 Ops[1] =
1526 EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops&: MulOps, name: "vqdmlal");
1527 Ops.resize(N: 2);
1528 return EmitNeonCall(F: CGM.getIntrinsic(IID: AltLLVMIntrinsic, Tys: Ty), Ops, name: NameHint);
1529 }
1530 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
1531 case NEON::BI__builtin_neon_vqdmulh_lane_v:
1532 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
1533 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
1534 auto *RTy = cast<llvm::FixedVectorType>(Val: Ty);
1535 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
1536 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
1537 RTy = llvm::FixedVectorType::get(ElementType: RTy->getElementType(),
1538 NumElts: RTy->getNumElements() * 2);
1539 llvm::Type *Tys[2] = {
1540 RTy, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
1541 /*isQuad*/ false))};
1542 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
1543 }
1544 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
1545 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
1546 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
1547 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
1548 llvm::Type *Tys[2] = {
1549 Ty, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
1550 /*isQuad*/ true))};
1551 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
1552 }
1553 case NEON::BI__builtin_neon_vqshl_n_v:
1554 case NEON::BI__builtin_neon_vqshlq_n_v:
1555 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshl_n",
1556 shift: 1, rightshift: false);
1557 case NEON::BI__builtin_neon_vqshlu_n_v:
1558 case NEON::BI__builtin_neon_vqshluq_n_v:
1559 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshlu_n",
1560 shift: 1, rightshift: false);
1561 case NEON::BI__builtin_neon_vrecpe_v:
1562 case NEON::BI__builtin_neon_vrecpeq_v:
1563 case NEON::BI__builtin_neon_vrsqrte_v:
1564 case NEON::BI__builtin_neon_vrsqrteq_v:
1565 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
1566 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
1567 case NEON::BI__builtin_neon_vrndi_v:
1568 case NEON::BI__builtin_neon_vrndiq_v:
1569 Int = Builder.getIsFPConstrained()
1570 ? Intrinsic::experimental_constrained_nearbyint
1571 : Intrinsic::nearbyint;
1572 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
1573 case NEON::BI__builtin_neon_vrshr_n_v:
1574 case NEON::BI__builtin_neon_vrshrq_n_v:
1575 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshr_n",
1576 shift: 1, rightshift: true);
1577 case NEON::BI__builtin_neon_vsha512hq_u64:
1578 case NEON::BI__builtin_neon_vsha512h2q_u64:
1579 case NEON::BI__builtin_neon_vsha512su0q_u64:
1580 case NEON::BI__builtin_neon_vsha512su1q_u64: {
1581 Function *F = CGM.getIntrinsic(IID: Int);
1582 return EmitNeonCall(F, Ops, name: "");
1583 }
1584 case NEON::BI__builtin_neon_vshl_n_v:
1585 case NEON::BI__builtin_neon_vshlq_n_v:
1586 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty, neg: false);
1587 return Builder.CreateShl(LHS: Builder.CreateBitCast(V: Ops[0],DestTy: Ty), RHS: Ops[1],
1588 Name: "vshl_n");
1589 case NEON::BI__builtin_neon_vshll_n_v: {
1590 llvm::FixedVectorType *SrcTy =
1591 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
1592 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1593 if (Usgn)
1594 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: VTy);
1595 else
1596 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: VTy);
1597 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: VTy, neg: false);
1598 return Builder.CreateShl(LHS: Ops[0], RHS: Ops[1], Name: "vshll_n");
1599 }
1600 case NEON::BI__builtin_neon_vshrn_n_v: {
1601 llvm::FixedVectorType *SrcTy =
1602 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1603 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1604 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: SrcTy, neg: false);
1605 if (Usgn)
1606 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: Ops[1]);
1607 else
1608 Ops[0] = Builder.CreateAShr(LHS: Ops[0], RHS: Ops[1]);
1609 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vshrn_n");
1610 }
1611 case NEON::BI__builtin_neon_vshr_n_v:
1612 case NEON::BI__builtin_neon_vshrq_n_v:
1613 return EmitNeonRShiftImm(Vec: Ops[0], Shift: Ops[1], Ty, usgn: Usgn, name: "vshr_n");
1614 case NEON::BI__builtin_neon_vst1_v:
1615 case NEON::BI__builtin_neon_vst1q_v:
1616 case NEON::BI__builtin_neon_vst2_v:
1617 case NEON::BI__builtin_neon_vst2q_v:
1618 case NEON::BI__builtin_neon_vst3_v:
1619 case NEON::BI__builtin_neon_vst3q_v:
1620 case NEON::BI__builtin_neon_vst4_v:
1621 case NEON::BI__builtin_neon_vst4q_v:
1622 case NEON::BI__builtin_neon_vst2_lane_v:
1623 case NEON::BI__builtin_neon_vst2q_lane_v:
1624 case NEON::BI__builtin_neon_vst3_lane_v:
1625 case NEON::BI__builtin_neon_vst3q_lane_v:
1626 case NEON::BI__builtin_neon_vst4_lane_v:
1627 case NEON::BI__builtin_neon_vst4q_lane_v: {
1628 llvm::Type *Tys[] = {Int8PtrTy, Ty};
1629 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
1630 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "");
1631 }
1632 case NEON::BI__builtin_neon_vsm3partw1q_u32:
1633 case NEON::BI__builtin_neon_vsm3partw2q_u32:
1634 case NEON::BI__builtin_neon_vsm3ss1q_u32:
1635 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
1636 case NEON::BI__builtin_neon_vsm4eq_u32: {
1637 Function *F = CGM.getIntrinsic(IID: Int);
1638 return EmitNeonCall(F, Ops, name: "");
1639 }
1640 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
1641 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
1642 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
1643 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
1644 Function *F = CGM.getIntrinsic(IID: Int);
1645 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
1646 return EmitNeonCall(F, Ops, name: "");
1647 }
1648 case NEON::BI__builtin_neon_vst1_x2_v:
1649 case NEON::BI__builtin_neon_vst1q_x2_v:
1650 case NEON::BI__builtin_neon_vst1_x3_v:
1651 case NEON::BI__builtin_neon_vst1q_x3_v:
1652 case NEON::BI__builtin_neon_vst1_x4_v:
1653 case NEON::BI__builtin_neon_vst1q_x4_v: {
1654 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
1655 // in AArch64 it comes last. We may want to stick to one or another.
1656 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
1657 Arch == llvm::Triple::aarch64_32) {
1658 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
1659 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
1660 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
1661 }
1662 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
1663 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
1664 }
1665 case NEON::BI__builtin_neon_vsubhn_v: {
1666 llvm::FixedVectorType *SrcTy =
1667 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1668
1669 // %sum = add <4 x i32> %lhs, %rhs
1670 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1671 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
1672 Ops[0] = Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubhn");
1673
1674 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1675 Constant *ShiftAmt =
1676 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
1677 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vsubhn");
1678
1679 // %res = trunc <4 x i32> %high to <4 x i16>
1680 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vsubhn");
1681 }
1682 case NEON::BI__builtin_neon_vtrn_v:
1683 case NEON::BI__builtin_neon_vtrnq_v: {
1684 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
1685 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
1686 Value *SV = nullptr;
1687
1688 for (unsigned vi = 0; vi != 2; ++vi) {
1689 SmallVector<int, 16> Indices;
1690 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1691 Indices.push_back(Elt: i+vi);
1692 Indices.push_back(Elt: i+e+vi);
1693 }
1694 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
1695 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
1696 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
1697 }
1698 return SV;
1699 }
1700 case NEON::BI__builtin_neon_vtst_v:
1701 case NEON::BI__builtin_neon_vtstq_v: {
1702 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1703 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
1704 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
1705 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
1706 RHS: ConstantAggregateZero::get(Ty));
1707 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vtst");
1708 }
1709 case NEON::BI__builtin_neon_vuzp_v:
1710 case NEON::BI__builtin_neon_vuzpq_v: {
1711 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
1712 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
1713 Value *SV = nullptr;
1714
1715 for (unsigned vi = 0; vi != 2; ++vi) {
1716 SmallVector<int, 16> Indices;
1717 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1718 Indices.push_back(Elt: 2*i+vi);
1719
1720 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
1721 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
1722 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
1723 }
1724 return SV;
1725 }
1726 case NEON::BI__builtin_neon_vxarq_u64: {
1727 Function *F = CGM.getIntrinsic(IID: Int);
1728 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
1729 return EmitNeonCall(F, Ops, name: "");
1730 }
1731 case NEON::BI__builtin_neon_vzip_v:
1732 case NEON::BI__builtin_neon_vzipq_v: {
1733 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
1734 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
1735 Value *SV = nullptr;
1736
1737 for (unsigned vi = 0; vi != 2; ++vi) {
1738 SmallVector<int, 16> Indices;
1739 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1740 Indices.push_back(Elt: (i + vi*e) >> 1);
1741 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
1742 }
1743 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
1744 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
1745 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
1746 }
1747 return SV;
1748 }
1749 case NEON::BI__builtin_neon_vdot_s32:
1750 case NEON::BI__builtin_neon_vdot_u32:
1751 case NEON::BI__builtin_neon_vdotq_s32:
1752 case NEON::BI__builtin_neon_vdotq_u32: {
1753 auto *InputTy =
1754 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
1755 llvm::Type *Tys[2] = { Ty, InputTy };
1756 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vdot");
1757 }
1758 case NEON::BI__builtin_neon_vfmlal_low_f16:
1759 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
1760 auto *InputTy =
1761 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
1762 llvm::Type *Tys[2] = { Ty, InputTy };
1763 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_low");
1764 }
1765 case NEON::BI__builtin_neon_vfmlsl_low_f16:
1766 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
1767 auto *InputTy =
1768 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
1769 llvm::Type *Tys[2] = { Ty, InputTy };
1770 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_low");
1771 }
1772 case NEON::BI__builtin_neon_vfmlal_high_f16:
1773 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
1774 auto *InputTy =
1775 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
1776 llvm::Type *Tys[2] = { Ty, InputTy };
1777 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_high");
1778 }
1779 case NEON::BI__builtin_neon_vfmlsl_high_f16:
1780 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
1781 auto *InputTy =
1782 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
1783 llvm::Type *Tys[2] = { Ty, InputTy };
1784 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_high");
1785 }
1786 case NEON::BI__builtin_neon_vmmlaq_s32:
1787 case NEON::BI__builtin_neon_vmmlaq_u32: {
1788 auto *InputTy =
1789 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
1790 llvm::Type *Tys[2] = { Ty, InputTy };
1791 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vmmla");
1792 }
1793 case NEON::BI__builtin_neon_vmmlaq_f16_f16:
1794 case NEON::BI__builtin_neon_vmmlaq_f32_f16: {
1795 auto *InputTy =
1796 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
1797 llvm::Type *Tys[2] = {Ty, InputTy};
1798 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fmmla");
1799 }
1800 case NEON::BI__builtin_neon_vusmmlaq_s32: {
1801 auto *InputTy =
1802 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
1803 llvm::Type *Tys[2] = { Ty, InputTy };
1804 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusmmla");
1805 }
1806 case NEON::BI__builtin_neon_vusdot_s32:
1807 case NEON::BI__builtin_neon_vusdotq_s32: {
1808 auto *InputTy =
1809 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
1810 llvm::Type *Tys[2] = { Ty, InputTy };
1811 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusdot");
1812 }
1813 case NEON::BI__builtin_neon_vbfdot_f32:
1814 case NEON::BI__builtin_neon_vbfdotq_f32: {
1815 llvm::Type *InputTy =
1816 llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
1817 llvm::Type *Tys[2] = { Ty, InputTy };
1818 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vbfdot");
1819 }
1820 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
1821 llvm::Type *Tys[1] = { Ty };
1822 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1823 return EmitNeonCall(F, Ops, name: "vcvtfp2bf");
1824 }
1825
1826 }
1827
1828 assert(Int && "Expected valid intrinsic number");
1829
1830 // Determine the type(s) of this overloaded AArch64 intrinsic.
1831 Function *F = LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: Ty, E);
1832
1833 Value *Result = EmitNeonCall(F, Ops, name: NameHint);
1834 llvm::Type *ResultType = ConvertType(T: E->getType());
1835 // AArch64 intrinsic one-element vector type cast to
1836 // scalar type expected by the builtin
1837 return Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: NameHint);
1838}
1839
1840Value *
1841CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
1842 const CmpInst::Predicate Pred,
1843 const Twine &Name) {
1844
1845 if (isa<FixedVectorType>(Val: Ty)) {
1846 // Vector types are cast to i8 vectors. Recover original type.
1847 Op = Builder.CreateBitCast(V: Op, DestTy: Ty);
1848 }
1849
1850 Constant *zero = Constant::getNullValue(Ty: Op->getType());
1851
1852 if (CmpInst::isFPPredicate(P: Pred)) {
1853 if (Pred == CmpInst::FCMP_OEQ)
1854 Op = Builder.CreateFCmp(P: Pred, LHS: Op, RHS: zero);
1855 else
1856 Op = Builder.CreateFCmpS(P: Pred, LHS: Op, RHS: zero);
1857 } else {
1858 Op = Builder.CreateICmp(P: Pred, LHS: Op, RHS: zero);
1859 }
1860
1861 llvm::Type *ResTy = Ty;
1862 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Ty))
1863 ResTy = FixedVectorType::get(
1864 ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: VTy->getScalarSizeInBits()),
1865 NumElts: VTy->getNumElements());
1866
1867 return Builder.CreateSExt(V: Op, DestTy: ResTy, Name);
1868}
1869
1870static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1871 Value *ExtOp, Value *IndexOp,
1872 llvm::Type *ResTy, unsigned IntID,
1873 const char *Name) {
1874 SmallVector<Value *, 2> TblOps;
1875 if (ExtOp)
1876 TblOps.push_back(Elt: ExtOp);
1877
1878 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
1879 SmallVector<int, 16> Indices;
1880 auto *TblTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
1881 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
1882 Indices.push_back(Elt: 2*i);
1883 Indices.push_back(Elt: 2*i+1);
1884 }
1885
1886 int PairPos = 0, End = Ops.size() - 1;
1887 while (PairPos < End) {
1888 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
1889 V2: Ops[PairPos+1], Mask: Indices,
1890 Name));
1891 PairPos += 2;
1892 }
1893
1894 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
1895 // of the 128-bit lookup table with zero.
1896 if (PairPos == End) {
1897 Value *ZeroTbl = ConstantAggregateZero::get(Ty: TblTy);
1898 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
1899 V2: ZeroTbl, Mask: Indices, Name));
1900 }
1901
1902 Function *TblF;
1903 TblOps.push_back(Elt: IndexOp);
1904 TblF = CGF.CGM.getIntrinsic(IID: IntID, Tys: ResTy);
1905
1906 return CGF.EmitNeonCall(F: TblF, Ops&: TblOps, name: Name);
1907}
1908
1909Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
1910 unsigned Value;
1911 switch (BuiltinID) {
1912 default:
1913 return nullptr;
1914 case clang::ARM::BI__builtin_arm_nop:
1915 Value = 0;
1916 break;
1917 case clang::ARM::BI__builtin_arm_yield:
1918 case clang::ARM::BI__yield:
1919 Value = 1;
1920 break;
1921 case clang::ARM::BI__builtin_arm_wfe:
1922 case clang::ARM::BI__wfe:
1923 Value = 2;
1924 break;
1925 case clang::ARM::BI__builtin_arm_wfi:
1926 case clang::ARM::BI__wfi:
1927 Value = 3;
1928 break;
1929 case clang::ARM::BI__builtin_arm_sev:
1930 case clang::ARM::BI__sev:
1931 Value = 4;
1932 break;
1933 case clang::ARM::BI__builtin_arm_sevl:
1934 case clang::ARM::BI__sevl:
1935 Value = 5;
1936 break;
1937 }
1938
1939 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_hint),
1940 Args: llvm::ConstantInt::get(Ty: Int32Ty, V: Value));
1941}
1942
1943enum SpecialRegisterAccessKind {
1944 NormalRead,
1945 VolatileRead,
1946 Write,
1947};
1948
1949// Generates the IR for the read/write special register builtin,
1950// ValueType is the type of the value that is to be written or read,
1951// RegisterType is the type of the register being written to or read from.
1952static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
1953 const CallExpr *E,
1954 llvm::Type *RegisterType,
1955 llvm::Type *ValueType,
1956 SpecialRegisterAccessKind AccessKind,
1957 StringRef SysReg = "") {
1958 // write and register intrinsics only support 32, 64 and 128 bit operations.
1959 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
1960 RegisterType->isIntegerTy(128)) &&
1961 "Unsupported size for register.");
1962
1963 CodeGen::CGBuilderTy &Builder = CGF.Builder;
1964 CodeGen::CodeGenModule &CGM = CGF.CGM;
1965 LLVMContext &Context = CGM.getLLVMContext();
1966
1967 if (SysReg.empty()) {
1968 const Expr *SysRegStrExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
1969 SysReg = cast<clang::StringLiteral>(Val: SysRegStrExpr)->getString();
1970 }
1971
1972 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysReg) };
1973 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
1974 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
1975
1976 llvm::Type *Types[] = { RegisterType };
1977
1978 bool MixedTypes = RegisterType->isIntegerTy(BitWidth: 64) && ValueType->isIntegerTy(BitWidth: 32);
1979 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
1980 && "Can't fit 64-bit value in 32-bit register");
1981
1982 if (AccessKind != Write) {
1983 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
1984 llvm::Function *F = CGM.getIntrinsic(
1985 IID: AccessKind == VolatileRead ? Intrinsic::read_volatile_register
1986 : Intrinsic::read_register,
1987 Tys: Types);
1988 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: Metadata);
1989
1990 if (MixedTypes)
1991 // Read into 64 bit register and then truncate result to 32 bit.
1992 return Builder.CreateTrunc(V: Call, DestTy: ValueType);
1993
1994 if (ValueType->isPointerTy())
1995 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
1996 return Builder.CreateIntToPtr(V: Call, DestTy: ValueType);
1997
1998 return Call;
1999 }
2000
2001 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
2002 llvm::Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
2003 if (MixedTypes) {
2004 // Extend 32 bit write value to 64 bit to pass to write.
2005 ArgValue = Builder.CreateZExt(V: ArgValue, DestTy: RegisterType);
2006 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2007 }
2008
2009 if (ValueType->isPointerTy()) {
2010 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2011 ArgValue = Builder.CreatePtrToInt(V: ArgValue, DestTy: RegisterType);
2012 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2013 }
2014
2015 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2016}
2017
2018static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2019 const CallExpr *E) {
2020 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2021 CodeGen::CodeGenModule &CGM = CGF.CGM;
2022 SmallVector<llvm::Value *, 4> Ops;
2023
2024 auto getIntArg = [&](unsigned ArgNo) {
2025 Expr::EvalResult Result;
2026 if (!E->getArg(Arg: ArgNo)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2027 llvm_unreachable("Expected constant argument to range prefetch.");
2028 return Result.Val.getInt().getExtValue();
2029 };
2030
2031 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 0))); /*Addr*/
2032 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 1))); /*Access Kind*/
2033 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 2))); /*Policy*/
2034
2035 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2036 auto Length = getIntArg(3);
2037 auto Count = getIntArg(4) - 1;
2038 auto Stride = getIntArg(5);
2039 auto Distance = getIntArg(6);
2040
2041 // Map ReuseDistance given in bytes to four bits representing decreasing
2042 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2043 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2044 // over the maximum is represented by 0 (distance not known).
2045 if (Distance > 0) {
2046 Distance = llvm::Log2_32_Ceil(Value: Distance);
2047 if (Distance < 15)
2048 Distance = 15;
2049 else if (Distance > 29)
2050 Distance = 0;
2051 else
2052 Distance = 30 - Distance;
2053 }
2054
2055 uint64_t Mask22 = (1ULL << 22) - 1;
2056 uint64_t Mask16 = (1ULL << 16) - 1;
2057 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2058 ((Count & Mask16) << 22) | (Length & Mask22);
2059
2060 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Builder.getInt64Ty(), V: Metadata));
2061 } else
2062 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 3)));
2063
2064 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_range_prefetch),
2065 Args: Ops);
2066}
2067
2068/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2069/// argument that specifies the vector type. The additional argument is meant
2070/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2071/// should be kept consistent with the logic in Sema.
2072/// TODO: Make this return false for SISD builtins.
2073static bool HasExtraNeonArgument(unsigned BuiltinID) {
2074 // Required by the headers included below, but not in this particular
2075 // function.
2076 [[maybe_unused]] int PtrArgNum = -1;
2077 [[maybe_unused]] bool HasConstPtr = false;
2078
2079 // The mask encodes the type. We don't care about the actual value. Instead,
2080 // we just check whether its been set.
2081 uint64_t mask = 0;
2082 switch (BuiltinID) {
2083#define GET_NEON_OVERLOAD_CHECK
2084#include "clang/Basic/arm_fp16.inc"
2085#include "clang/Basic/arm_neon.inc"
2086#undef GET_NEON_OVERLOAD_CHECK
2087 // Non-neon builtins for controling VFP that take extra argument for
2088 // discriminating the type.
2089 case ARM::BI__builtin_arm_vcvtr_f:
2090 case ARM::BI__builtin_arm_vcvtr_d:
2091 mask = 1;
2092 }
2093
2094 if (mask)
2095 return true;
2096
2097 return false;
2098}
2099
2100Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2101 const CallExpr *E,
2102 ReturnValueSlot ReturnValue,
2103 llvm::Triple::ArchType Arch) {
2104 if (auto Hint = GetValueForARMHint(BuiltinID))
2105 return Hint;
2106
2107 if (BuiltinID == clang::ARM::BI__emit) {
2108 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2109 llvm::FunctionType *FTy =
2110 llvm::FunctionType::get(Result: VoidTy, /*Variadic=*/isVarArg: false);
2111
2112 Expr::EvalResult Result;
2113 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2114 llvm_unreachable("Sema will ensure that the parameter is constant");
2115
2116 llvm::APSInt Value = Result.Val.getInt();
2117 uint64_t ZExtValue = Value.zextOrTrunc(width: IsThumb ? 16 : 32).getZExtValue();
2118
2119 llvm::InlineAsm *Emit =
2120 IsThumb ? InlineAsm::get(Ty: FTy, AsmString: ".inst.n 0x" + utohexstr(X: ZExtValue), Constraints: "",
2121 /*hasSideEffects=*/true)
2122 : InlineAsm::get(Ty: FTy, AsmString: ".inst 0x" + utohexstr(X: ZExtValue), Constraints: "",
2123 /*hasSideEffects=*/true);
2124
2125 return Builder.CreateCall(Callee: Emit);
2126 }
2127
2128 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2129 Value *Option = EmitScalarExpr(E: E->getArg(Arg: 0));
2130 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_dbg), Args: Option);
2131 }
2132
2133 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2134 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
2135 Value *RW = EmitScalarExpr(E: E->getArg(Arg: 1));
2136 Value *IsData = EmitScalarExpr(E: E->getArg(Arg: 2));
2137
2138 // Locality is not supported on ARM target
2139 Value *Locality = llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
2140
2141 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
2142 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, IsData});
2143 }
2144
2145 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2146 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2147 return Builder.CreateCall(
2148 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
2149 }
2150
2151 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2152 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2153 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2154 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
2155 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
2156 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2157 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
2158 return Res;
2159 }
2160
2161
2162 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2163 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2164 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls), Args: Arg, Name: "cls");
2165 }
2166 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2167 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2168 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls64), Args: Arg,
2169 Name: "cls");
2170 }
2171
2172 if (BuiltinID == clang::ARM::BI__clear_cache) {
2173 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2174 const FunctionDecl *FD = E->getDirectCallee();
2175 Value *Ops[2];
2176 for (unsigned i = 0; i < 2; i++)
2177 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
2178 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
2179 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
2180 StringRef Name = FD->getName();
2181 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
2182 }
2183
2184 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2185 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2186 Function *F;
2187
2188 switch (BuiltinID) {
2189 default: llvm_unreachable("unexpected builtin");
2190 case clang::ARM::BI__builtin_arm_mcrr:
2191 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr);
2192 break;
2193 case clang::ARM::BI__builtin_arm_mcrr2:
2194 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr2);
2195 break;
2196 }
2197
2198 // MCRR{2} instruction has 5 operands but
2199 // the intrinsic has 4 because Rt and Rt2
2200 // are represented as a single unsigned 64
2201 // bit integer in the intrinsic definition
2202 // but internally it's represented as 2 32
2203 // bit integers.
2204
2205 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2206 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2207 Value *RtAndRt2 = EmitScalarExpr(E: E->getArg(Arg: 2));
2208 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 3));
2209
2210 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2211 Value *Rt = Builder.CreateTruncOrBitCast(V: RtAndRt2, DestTy: Int32Ty);
2212 Value *Rt2 = Builder.CreateLShr(LHS: RtAndRt2, RHS: C1);
2213 Rt2 = Builder.CreateTruncOrBitCast(V: Rt2, DestTy: Int32Ty);
2214
2215 return Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, Rt, Rt2, CRm});
2216 }
2217
2218 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2219 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2220 Function *F;
2221
2222 switch (BuiltinID) {
2223 default: llvm_unreachable("unexpected builtin");
2224 case clang::ARM::BI__builtin_arm_mrrc:
2225 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc);
2226 break;
2227 case clang::ARM::BI__builtin_arm_mrrc2:
2228 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc2);
2229 break;
2230 }
2231
2232 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2233 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2234 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 2));
2235 Value *RtAndRt2 = Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, CRm});
2236
2237 // Returns an unsigned 64 bit integer, represented
2238 // as two 32 bit integers.
2239
2240 Value *Rt = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 1);
2241 Value *Rt1 = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 0);
2242 Rt = Builder.CreateZExt(V: Rt, DestTy: Int64Ty);
2243 Rt1 = Builder.CreateZExt(V: Rt1, DestTy: Int64Ty);
2244
2245 Value *ShiftCast = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2246 RtAndRt2 = Builder.CreateShl(LHS: Rt, RHS: ShiftCast, Name: "shl", HasNUW: true);
2247 RtAndRt2 = Builder.CreateOr(LHS: RtAndRt2, RHS: Rt1);
2248
2249 return Builder.CreateBitCast(V: RtAndRt2, DestTy: ConvertType(T: E->getType()));
2250 }
2251
2252 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2253 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2254 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2255 getContext().getTypeSize(T: E->getType()) == 64) ||
2256 BuiltinID == clang::ARM::BI__ldrexd) {
2257 Function *F;
2258
2259 switch (BuiltinID) {
2260 default: llvm_unreachable("unexpected builtin");
2261 case clang::ARM::BI__builtin_arm_ldaex:
2262 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldaexd);
2263 break;
2264 case clang::ARM::BI__builtin_arm_ldrexd:
2265 case clang::ARM::BI__builtin_arm_ldrex:
2266 case clang::ARM::BI__ldrexd:
2267 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldrexd);
2268 break;
2269 }
2270
2271 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
2272 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldrexd");
2273
2274 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2275 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2276 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int64Ty);
2277 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int64Ty);
2278
2279 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2280 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
2281 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
2282 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
2283 }
2284
2285 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2286 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2287 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
2288
2289 QualType Ty = E->getType();
2290 llvm::Type *RealResTy = ConvertType(T: Ty);
2291 llvm::Type *IntTy =
2292 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2293
2294 Function *F = CGM.getIntrinsic(
2295 IID: BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2296 : Intrinsic::arm_ldrex,
2297 Tys: DefaultPtrTy);
2298 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldrex");
2299 Val->addParamAttr(
2300 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
2301
2302 if (RealResTy->isPointerTy())
2303 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
2304 else {
2305 llvm::Type *IntResTy = llvm::IntegerType::get(
2306 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
2307 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
2308 DestTy: RealResTy);
2309 }
2310 }
2311
2312 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2313 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2314 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2315 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 64)) {
2316 Function *F = CGM.getIntrinsic(
2317 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2318 : Intrinsic::arm_strexd);
2319 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty);
2320
2321 Address Tmp = CreateMemTempWithoutCast(T: E->getArg(Arg: 0)->getType());
2322 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
2323 Builder.CreateStore(Val, Addr: Tmp);
2324
2325 Address LdPtr = Tmp.withElementType(ElemTy: STy);
2326 Val = Builder.CreateLoad(Addr: LdPtr);
2327
2328 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2329 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2330 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
2331 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "strexd");
2332 }
2333
2334 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2335 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2336 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
2337 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
2338
2339 QualType Ty = E->getArg(Arg: 0)->getType();
2340 llvm::Type *StoreTy =
2341 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2342
2343 if (StoreVal->getType()->isPointerTy())
2344 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int32Ty);
2345 else {
2346 llvm::Type *IntTy = llvm::IntegerType::get(
2347 C&: getLLVMContext(),
2348 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
2349 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
2350 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int32Ty);
2351 }
2352
2353 Function *F = CGM.getIntrinsic(
2354 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2355 : Intrinsic::arm_strex,
2356 Tys: StoreAddr->getType());
2357
2358 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "strex");
2359 CI->addParamAttr(
2360 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
2361 return CI;
2362 }
2363
2364 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2365 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_clrex);
2366 return Builder.CreateCall(Callee: F);
2367 }
2368
2369 // CRC32
2370 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2371 switch (BuiltinID) {
2372 case clang::ARM::BI__builtin_arm_crc32b:
2373 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2374 case clang::ARM::BI__builtin_arm_crc32cb:
2375 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2376 case clang::ARM::BI__builtin_arm_crc32h:
2377 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2378 case clang::ARM::BI__builtin_arm_crc32ch:
2379 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2380 case clang::ARM::BI__builtin_arm_crc32w:
2381 case clang::ARM::BI__builtin_arm_crc32d:
2382 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2383 case clang::ARM::BI__builtin_arm_crc32cw:
2384 case clang::ARM::BI__builtin_arm_crc32cd:
2385 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2386 }
2387
2388 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2389 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
2390 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2391
2392 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2393 // intrinsics, hence we need different codegen for these cases.
2394 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2395 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2396 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2397 Value *Arg1a = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: Int32Ty);
2398 Value *Arg1b = Builder.CreateLShr(LHS: Arg1, RHS: C1);
2399 Arg1b = Builder.CreateTruncOrBitCast(V: Arg1b, DestTy: Int32Ty);
2400
2401 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
2402 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg0, Arg1a});
2403 return Builder.CreateCall(Callee: F, Args: {Res, Arg1b});
2404 } else {
2405 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: Int32Ty);
2406
2407 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
2408 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
2409 }
2410 }
2411
2412 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2413 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2414 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2415 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2416 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2417 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2418
2419 SpecialRegisterAccessKind AccessKind = Write;
2420 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2421 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2422 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2423 AccessKind = VolatileRead;
2424
2425 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2426 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2427
2428 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2429 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2430
2431 llvm::Type *ValueType;
2432 llvm::Type *RegisterType;
2433 if (IsPointerBuiltin) {
2434 ValueType = VoidPtrTy;
2435 RegisterType = Int32Ty;
2436 } else if (Is64Bit) {
2437 ValueType = RegisterType = Int64Ty;
2438 } else {
2439 ValueType = RegisterType = Int32Ty;
2440 }
2441
2442 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
2443 AccessKind);
2444 }
2445
2446 if (BuiltinID == ARM::BI__builtin_sponentry) {
2447 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
2448 return Builder.CreateCall(Callee: F);
2449 }
2450
2451 // Handle MSVC intrinsics before argument evaluation to prevent double
2452 // evaluation.
2453 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
2454 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
2455
2456 // Deal with MVE builtins
2457 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2458 return Result;
2459 // Handle CDE builtins
2460 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
2461 return Result;
2462
2463 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
2464 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
2465 return P.first == BuiltinID;
2466 });
2467 if (It != end(arr: NEONEquivalentIntrinsicMap))
2468 BuiltinID = It->second;
2469
2470 // Find out if any arguments are required to be integer constant
2471 // expressions.
2472 unsigned ICEArguments = 0;
2473 ASTContext::GetBuiltinTypeError Error;
2474 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
2475 assert(Error == ASTContext::GE_None && "Should not codegen an error");
2476
2477 auto getAlignmentValue32 = [&](Address addr) -> Value* {
2478 return Builder.getInt32(C: addr.getAlignment().getQuantity());
2479 };
2480
2481 Address PtrOp0 = Address::invalid();
2482 Address PtrOp1 = Address::invalid();
2483 SmallVector<Value*, 4> Ops;
2484 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
2485 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
2486 for (unsigned i = 0, e = NumArgs; i != e; i++) {
2487 if (i == 0) {
2488 switch (BuiltinID) {
2489 case NEON::BI__builtin_neon_vld1_v:
2490 case NEON::BI__builtin_neon_vld1q_v:
2491 case NEON::BI__builtin_neon_vld1q_lane_v:
2492 case NEON::BI__builtin_neon_vld1_lane_v:
2493 case NEON::BI__builtin_neon_vld1_dup_v:
2494 case NEON::BI__builtin_neon_vld1q_dup_v:
2495 case NEON::BI__builtin_neon_vst1_v:
2496 case NEON::BI__builtin_neon_vst1q_v:
2497 case NEON::BI__builtin_neon_vst1q_lane_v:
2498 case NEON::BI__builtin_neon_vst1_lane_v:
2499 case NEON::BI__builtin_neon_vst2_v:
2500 case NEON::BI__builtin_neon_vst2q_v:
2501 case NEON::BI__builtin_neon_vst2_lane_v:
2502 case NEON::BI__builtin_neon_vst2q_lane_v:
2503 case NEON::BI__builtin_neon_vst3_v:
2504 case NEON::BI__builtin_neon_vst3q_v:
2505 case NEON::BI__builtin_neon_vst3_lane_v:
2506 case NEON::BI__builtin_neon_vst3q_lane_v:
2507 case NEON::BI__builtin_neon_vst4_v:
2508 case NEON::BI__builtin_neon_vst4q_v:
2509 case NEON::BI__builtin_neon_vst4_lane_v:
2510 case NEON::BI__builtin_neon_vst4q_lane_v:
2511 // Get the alignment for the argument in addition to the value;
2512 // we'll use it later.
2513 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
2514 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
2515 continue;
2516 }
2517 }
2518 if (i == 1) {
2519 switch (BuiltinID) {
2520 case NEON::BI__builtin_neon_vld2_v:
2521 case NEON::BI__builtin_neon_vld2q_v:
2522 case NEON::BI__builtin_neon_vld3_v:
2523 case NEON::BI__builtin_neon_vld3q_v:
2524 case NEON::BI__builtin_neon_vld4_v:
2525 case NEON::BI__builtin_neon_vld4q_v:
2526 case NEON::BI__builtin_neon_vld2_lane_v:
2527 case NEON::BI__builtin_neon_vld2q_lane_v:
2528 case NEON::BI__builtin_neon_vld3_lane_v:
2529 case NEON::BI__builtin_neon_vld3q_lane_v:
2530 case NEON::BI__builtin_neon_vld4_lane_v:
2531 case NEON::BI__builtin_neon_vld4q_lane_v:
2532 case NEON::BI__builtin_neon_vld2_dup_v:
2533 case NEON::BI__builtin_neon_vld2q_dup_v:
2534 case NEON::BI__builtin_neon_vld3_dup_v:
2535 case NEON::BI__builtin_neon_vld3q_dup_v:
2536 case NEON::BI__builtin_neon_vld4_dup_v:
2537 case NEON::BI__builtin_neon_vld4q_dup_v:
2538 // Get the alignment for the argument in addition to the value;
2539 // we'll use it later.
2540 PtrOp1 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
2541 Ops.push_back(Elt: PtrOp1.emitRawPointer(CGF&: *this));
2542 continue;
2543 }
2544 }
2545
2546 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
2547 }
2548
2549 switch (BuiltinID) {
2550 default: break;
2551
2552 case NEON::BI__builtin_neon_vget_lane_i8:
2553 case NEON::BI__builtin_neon_vget_lane_i16:
2554 case NEON::BI__builtin_neon_vget_lane_i32:
2555 case NEON::BI__builtin_neon_vget_lane_i64:
2556 case NEON::BI__builtin_neon_vget_lane_bf16:
2557 case NEON::BI__builtin_neon_vget_lane_f32:
2558 case NEON::BI__builtin_neon_vgetq_lane_i8:
2559 case NEON::BI__builtin_neon_vgetq_lane_i16:
2560 case NEON::BI__builtin_neon_vgetq_lane_i32:
2561 case NEON::BI__builtin_neon_vgetq_lane_i64:
2562 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2563 case NEON::BI__builtin_neon_vgetq_lane_f32:
2564 case NEON::BI__builtin_neon_vduph_lane_bf16:
2565 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2566 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
2567
2568 case NEON::BI__builtin_neon_vrndns_f32: {
2569 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2570 llvm::Type *Tys[] = {Arg->getType()};
2571 Function *F = CGM.getIntrinsic(IID: Intrinsic::roundeven, Tys);
2572 return Builder.CreateCall(Callee: F, Args: {Arg}, Name: "vrndn"); }
2573
2574 case NEON::BI__builtin_neon_vset_lane_i8:
2575 case NEON::BI__builtin_neon_vset_lane_i16:
2576 case NEON::BI__builtin_neon_vset_lane_i32:
2577 case NEON::BI__builtin_neon_vset_lane_i64:
2578 case NEON::BI__builtin_neon_vset_lane_bf16:
2579 case NEON::BI__builtin_neon_vset_lane_f32:
2580 case NEON::BI__builtin_neon_vsetq_lane_i8:
2581 case NEON::BI__builtin_neon_vsetq_lane_i16:
2582 case NEON::BI__builtin_neon_vsetq_lane_i32:
2583 case NEON::BI__builtin_neon_vsetq_lane_i64:
2584 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2585 case NEON::BI__builtin_neon_vsetq_lane_f32:
2586 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
2587
2588 case NEON::BI__builtin_neon_vsha1h_u32:
2589 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1h), Ops,
2590 name: "vsha1h");
2591 case NEON::BI__builtin_neon_vsha1cq_u32:
2592 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1c), Ops,
2593 name: "vsha1h");
2594 case NEON::BI__builtin_neon_vsha1pq_u32:
2595 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1p), Ops,
2596 name: "vsha1h");
2597 case NEON::BI__builtin_neon_vsha1mq_u32:
2598 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1m), Ops,
2599 name: "vsha1h");
2600
2601 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2602 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vcvtbfp2bf), Ops,
2603 name: "vcvtbfp2bf");
2604 case NEON::BI__builtin_neon_vcvt_f16_f32:
2605 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vcvtfp2hf), Ops,
2606 name: "vcvtfp2hf");
2607 case NEON::BI__builtin_neon_vcvt_f32_f16:
2608 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vcvthf2fp), Ops,
2609 name: "vcvthf2fp");
2610
2611 // The ARM _MoveToCoprocessor builtins put the input register value as
2612 // the first argument, but the LLVM intrinsic expects it as the third one.
2613 case clang::ARM::BI_MoveToCoprocessor:
2614 case clang::ARM::BI_MoveToCoprocessor2: {
2615 Function *F = CGM.getIntrinsic(IID: BuiltinID == clang::ARM::BI_MoveToCoprocessor
2616 ? Intrinsic::arm_mcr
2617 : Intrinsic::arm_mcr2);
2618 return Builder.CreateCall(Callee: F, Args: {Ops[1], Ops[2], Ops[0],
2619 Ops[3], Ops[4], Ops[5]});
2620 }
2621 }
2622
2623 // Get the last argument, which specifies the vector type.
2624 assert(HasExtraArg);
2625 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
2626 std::optional<llvm::APSInt> Result =
2627 Arg->getIntegerConstantExpr(Ctx: getContext());
2628 if (!Result)
2629 return nullptr;
2630
2631 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
2632 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
2633 // Determine the overloaded type of this builtin.
2634 llvm::Type *Ty;
2635 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
2636 Ty = FloatTy;
2637 else
2638 Ty = DoubleTy;
2639
2640 // Determine whether this is an unsigned conversion or not.
2641 bool usgn = Result->getZExtValue() == 1;
2642 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
2643
2644 // Call the appropriate intrinsic.
2645 Function *F = CGM.getIntrinsic(IID: Int, Tys: Ty);
2646 return Builder.CreateCall(Callee: F, Args: Ops, Name: "vcvtr");
2647 }
2648
2649 // Determine the type of this overloaded NEON intrinsic.
2650 NeonTypeFlags Type = Result->getZExtValue();
2651 bool usgn = Type.isUnsigned();
2652 bool rightShift = false;
2653
2654 llvm::FixedVectorType *VTy =
2655 GetNeonType(CGF: this, TypeFlags: Type, HasFastHalfType: getTarget().hasFastHalfType(), V1Ty: false,
2656 AllowBFloatArgsAndRet: getTarget().hasBFloat16Type());
2657 llvm::Type *Ty = VTy;
2658 if (!Ty)
2659 return nullptr;
2660
2661 // Many NEON builtins have identical semantics and uses in ARM and
2662 // AArch64. Emit these in a single function.
2663 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
2664 const ARMNeonVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
2665 IntrinsicMap, BuiltinID, MapProvenSorted&: NEONSIMDIntrinsicsProvenSorted);
2666 if (Builtin)
2667 return EmitCommonNeonBuiltinExpr(
2668 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
2669 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
2670
2671 unsigned Int;
2672 switch (BuiltinID) {
2673 default: return nullptr;
2674 case NEON::BI__builtin_neon_vld1q_lane_v:
2675 // Handle 64-bit integer elements as a special case. Use shuffles of
2676 // one-element vectors to avoid poor code for i64 in the backend.
2677 if (VTy->getElementType()->isIntegerTy(BitWidth: 64)) {
2678 // Extract the other lane.
2679 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2680 int Lane = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
2681 Value *SV = llvm::ConstantVector::get(V: ConstantInt::get(Ty: Int32Ty, V: 1-Lane));
2682 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
2683 // Load the value as a one-element vector.
2684 Ty = llvm::FixedVectorType::get(ElementType: VTy->getElementType(), NumElts: 1);
2685 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2686 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_neon_vld1, Tys);
2687 Value *Align = getAlignmentValue32(PtrOp0);
2688 Value *Ld = Builder.CreateCall(Callee: F, Args: {Ops[0], Align});
2689 // Combine them.
2690 int Indices[] = {1 - Lane, Lane};
2691 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ld, Mask: Indices, Name: "vld1q_lane");
2692 }
2693 [[fallthrough]];
2694 case NEON::BI__builtin_neon_vld1_lane_v: {
2695 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2696 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
2697 Value *Ld = Builder.CreateLoad(Addr: PtrOp0);
2698 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ld, Idx: Ops[2], Name: "vld1_lane");
2699 }
2700 case NEON::BI__builtin_neon_vqrshrn_n_v:
2701 Int =
2702 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
2703 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n",
2704 shift: 1, rightshift: true);
2705 case NEON::BI__builtin_neon_vqrshrun_n_v:
2706 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqrshiftnsu, Tys: Ty),
2707 Ops, name: "vqrshrun_n", shift: 1, rightshift: true);
2708 case NEON::BI__builtin_neon_vqshrn_n_v:
2709 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
2710 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n",
2711 shift: 1, rightshift: true);
2712 case NEON::BI__builtin_neon_vqshrun_n_v:
2713 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqshiftnsu, Tys: Ty),
2714 Ops, name: "vqshrun_n", shift: 1, rightshift: true);
2715 case NEON::BI__builtin_neon_vrecpe_v:
2716 case NEON::BI__builtin_neon_vrecpeq_v:
2717 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrecpe, Tys: Ty),
2718 Ops, name: "vrecpe");
2719 case NEON::BI__builtin_neon_vrshrn_n_v:
2720 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrshiftn, Tys: Ty),
2721 Ops, name: "vrshrn_n", shift: 1, rightshift: true);
2722 case NEON::BI__builtin_neon_vrsra_n_v:
2723 case NEON::BI__builtin_neon_vrsraq_n_v:
2724 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2725 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2726 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: true);
2727 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2728 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Ty), Args: {Ops[1], Ops[2]});
2729 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vrsra_n");
2730 case NEON::BI__builtin_neon_vsri_n_v:
2731 case NEON::BI__builtin_neon_vsriq_n_v:
2732 rightShift = true;
2733 [[fallthrough]];
2734 case NEON::BI__builtin_neon_vsli_n_v:
2735 case NEON::BI__builtin_neon_vsliq_n_v:
2736 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: rightShift);
2737 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vshiftins, Tys: Ty),
2738 Ops, name: "vsli_n");
2739 case NEON::BI__builtin_neon_vsra_n_v:
2740 case NEON::BI__builtin_neon_vsraq_n_v:
2741 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2742 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
2743 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
2744 case NEON::BI__builtin_neon_vst1q_lane_v:
2745 // Handle 64-bit integer elements as a special case. Use a shuffle to get
2746 // a one-element vector and avoid poor code for i64 in the backend.
2747 if (VTy->getElementType()->isIntegerTy(BitWidth: 64)) {
2748 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2749 Value *SV = llvm::ConstantVector::get(V: cast<llvm::Constant>(Val: Ops[2]));
2750 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
2751 Ops[2] = getAlignmentValue32(PtrOp0);
2752 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
2753 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vst1,
2754 Tys), Args: Ops);
2755 }
2756 [[fallthrough]];
2757 case NEON::BI__builtin_neon_vst1_lane_v: {
2758 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2759 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
2760 return Builder.CreateStore(Val: Ops[1],
2761 Addr: PtrOp0.withElementType(ElemTy: Ops[1]->getType()));
2762 }
2763 case NEON::BI__builtin_neon_vtbl1_v:
2764 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl1),
2765 Ops, name: "vtbl1");
2766 case NEON::BI__builtin_neon_vtbl2_v:
2767 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl2),
2768 Ops, name: "vtbl2");
2769 case NEON::BI__builtin_neon_vtbl3_v:
2770 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl3),
2771 Ops, name: "vtbl3");
2772 case NEON::BI__builtin_neon_vtbl4_v:
2773 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl4),
2774 Ops, name: "vtbl4");
2775 case NEON::BI__builtin_neon_vtbx1_v:
2776 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx1),
2777 Ops, name: "vtbx1");
2778 case NEON::BI__builtin_neon_vtbx2_v:
2779 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx2),
2780 Ops, name: "vtbx2");
2781 case NEON::BI__builtin_neon_vtbx3_v:
2782 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx3),
2783 Ops, name: "vtbx3");
2784 case NEON::BI__builtin_neon_vtbx4_v:
2785 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx4),
2786 Ops, name: "vtbx4");
2787 }
2788}
2789
2790template<typename Integer>
2791static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
2792 return E->getIntegerConstantExpr(Ctx: Context)->getExtValue();
2793}
2794
2795static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
2796 llvm::Type *T, bool Unsigned) {
2797 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
2798 // which finds it convenient to specify signed/unsigned as a boolean flag.
2799 return Unsigned ? Builder.CreateZExt(V, DestTy: T) : Builder.CreateSExt(V, DestTy: T);
2800}
2801
2802static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
2803 uint32_t Shift, bool Unsigned) {
2804 // MVE helper function for integer shift right. This must handle signed vs
2805 // unsigned, and also deal specially with the case where the shift count is
2806 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
2807 // undefined behavior, but in MVE it's legal, so we must convert it to code
2808 // that is not undefined in IR.
2809 unsigned LaneBits = cast<llvm::VectorType>(Val: V->getType())
2810 ->getElementType()
2811 ->getPrimitiveSizeInBits();
2812 if (Shift == LaneBits) {
2813 // An unsigned shift of the full lane size always generates zero, so we can
2814 // simply emit a zero vector. A signed shift of the full lane size does the
2815 // same thing as shifting by one bit fewer.
2816 if (Unsigned)
2817 return llvm::Constant::getNullValue(Ty: V->getType());
2818 else
2819 --Shift;
2820 }
2821 return Unsigned ? Builder.CreateLShr(LHS: V, RHS: Shift) : Builder.CreateAShr(LHS: V, RHS: Shift);
2822}
2823
2824static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
2825 // MVE-specific helper function for a vector splat, which infers the element
2826 // count of the output vector by knowing that MVE vectors are all 128 bits
2827 // wide.
2828 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
2829 return Builder.CreateVectorSplat(NumElts: Elements, V);
2830}
2831
2832static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
2833 CodeGenFunction *CGF,
2834 llvm::Value *V,
2835 llvm::Type *DestType) {
2836 // Convert one MVE vector type into another by reinterpreting its in-register
2837 // format.
2838 //
2839 // Little-endian, this is identical to a bitcast (which reinterprets the
2840 // memory format). But big-endian, they're not necessarily the same, because
2841 // the register and memory formats map to each other differently depending on
2842 // the lane size.
2843 //
2844 // We generate a bitcast whenever we can (if we're little-endian, or if the
2845 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
2846 // that performs the different kind of reinterpretation.
2847 if (CGF->getTarget().isBigEndian() &&
2848 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
2849 return Builder.CreateCall(
2850 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vreinterpretq,
2851 Tys: {DestType, V->getType()}),
2852 Args: V);
2853 } else {
2854 return Builder.CreateBitCast(V, DestTy: DestType);
2855 }
2856}
2857
2858static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
2859 // Make a shufflevector that extracts every other element of a vector (evens
2860 // or odds, as desired).
2861 SmallVector<int, 16> Indices;
2862 unsigned InputElements =
2863 cast<llvm::FixedVectorType>(Val: V->getType())->getNumElements();
2864 for (unsigned i = 0; i < InputElements; i += 2)
2865 Indices.push_back(Elt: i + Odd);
2866 return Builder.CreateShuffleVector(V, Mask: Indices);
2867}
2868
2869static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
2870 llvm::Value *V1) {
2871 // Make a shufflevector that interleaves two vectors element by element.
2872 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
2873 SmallVector<int, 16> Indices;
2874 unsigned InputElements =
2875 cast<llvm::FixedVectorType>(Val: V0->getType())->getNumElements();
2876 for (unsigned i = 0; i < InputElements; i++) {
2877 Indices.push_back(Elt: i);
2878 Indices.push_back(Elt: i + InputElements);
2879 }
2880 return Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: Indices);
2881}
2882
2883template<unsigned HighBit, unsigned OtherBits>
2884static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
2885 // MVE-specific helper function to make a vector splat of a constant such as
2886 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
2887 llvm::Type *T = cast<llvm::VectorType>(Val: VT)->getElementType();
2888 unsigned LaneBits = T->getPrimitiveSizeInBits();
2889 uint32_t Value = HighBit << (LaneBits - 1);
2890 if (OtherBits)
2891 Value |= (1UL << (LaneBits - 1)) - 1;
2892 llvm::Value *Lane = llvm::ConstantInt::get(Ty: T, V: Value);
2893 return ARMMVEVectorSplat(Builder, V: Lane);
2894}
2895
2896static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
2897 llvm::Value *V,
2898 unsigned ReverseWidth) {
2899 // MVE-specific helper function which reverses the elements of a
2900 // vector within every (ReverseWidth)-bit collection of lanes.
2901 SmallVector<int, 16> Indices;
2902 unsigned LaneSize = V->getType()->getScalarSizeInBits();
2903 unsigned Elements = 128 / LaneSize;
2904 unsigned Mask = ReverseWidth / LaneSize - 1;
2905 for (unsigned i = 0; i < Elements; i++)
2906 Indices.push_back(Elt: i ^ Mask);
2907 return Builder.CreateShuffleVector(V, Mask: Indices);
2908}
2909
2910static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
2911 CodeGenFunction *CGF, llvm::Value *V,
2912 llvm::Type *Ty) {
2913 return Builder.CreateCall(
2914 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_fp_int, Tys: {Ty, V->getType()}),
2915 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)});
2916}
2917
2918static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
2919 CodeGenFunction *CGF, llvm::Value *V,
2920 llvm::Type *Ty) {
2921 return Builder.CreateCall(
2922 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_fp_int, Tys: {Ty, V->getType()}),
2923 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 1)});
2924}
2925
2926static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
2927 CodeGenFunction *CGF, llvm::Value *V,
2928 llvm::Type *Ty) {
2929 return Builder.CreateCall(
2930 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_int_fp, Tys: {Ty, V->getType()}),
2931 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)});
2932}
2933
2934static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
2935 CodeGenFunction *CGF, llvm::Value *V,
2936 llvm::Type *Ty) {
2937 return Builder.CreateCall(
2938 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_int_fp, Tys: {Ty, V->getType()}),
2939 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 1)});
2940}
2941
2942Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
2943 const CallExpr *E,
2944 ReturnValueSlot ReturnValue,
2945 llvm::Triple::ArchType Arch) {
2946 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
2947 Intrinsic::ID IRIntr;
2948 unsigned NumVectors;
2949
2950 // Code autogenerated by Tablegen will handle all the simple builtins.
2951 switch (BuiltinID) {
2952 #include "clang/Basic/arm_mve_builtin_cg.inc"
2953
2954 // If we didn't match an MVE builtin id at all, go back to the
2955 // main EmitARMBuiltinExpr.
2956 default:
2957 return nullptr;
2958 }
2959
2960 // Anything that breaks from that switch is an MVE builtin that
2961 // needs handwritten code to generate.
2962
2963 switch (CustomCodeGenType) {
2964
2965 case CustomCodeGen::VLD24: {
2966 llvm::SmallVector<Value *, 4> Ops;
2967 llvm::SmallVector<llvm::Type *, 4> Tys;
2968
2969 auto MvecCType = E->getType();
2970 auto MvecLType = ConvertType(T: MvecCType);
2971 assert(MvecLType->isStructTy() &&
2972 "Return type for vld[24]q should be a struct");
2973 assert(MvecLType->getStructNumElements() == 1 &&
2974 "Return-type struct for vld[24]q should have one element");
2975 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
2976 assert(MvecLTypeInner->isArrayTy() &&
2977 "Return-type struct for vld[24]q should contain an array");
2978 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
2979 "Array member of return-type struct vld[24]q has wrong length");
2980 auto VecLType = MvecLTypeInner->getArrayElementType();
2981
2982 Tys.push_back(Elt: VecLType);
2983
2984 auto Addr = E->getArg(Arg: 0);
2985 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
2986 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
2987
2988 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
2989 Value *LoadResult = Builder.CreateCall(Callee: F, Args: Ops);
2990 Value *MvecOut = PoisonValue::get(T: MvecLType);
2991 for (unsigned i = 0; i < NumVectors; ++i) {
2992 Value *Vec = Builder.CreateExtractValue(Agg: LoadResult, Idxs: i);
2993 MvecOut = Builder.CreateInsertValue(Agg: MvecOut, Val: Vec, Idxs: {0, i});
2994 }
2995
2996 if (ReturnValue.isNull())
2997 return MvecOut;
2998 else
2999 return Builder.CreateStore(Val: MvecOut, Addr: ReturnValue.getAddress());
3000 }
3001
3002 case CustomCodeGen::VST24: {
3003 llvm::SmallVector<Value *, 4> Ops;
3004 llvm::SmallVector<llvm::Type *, 4> Tys;
3005
3006 auto Addr = E->getArg(Arg: 0);
3007 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3008 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3009
3010 auto MvecCType = E->getArg(Arg: 1)->getType();
3011 auto MvecLType = ConvertType(T: MvecCType);
3012 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3013 assert(MvecLType->getStructNumElements() == 1 &&
3014 "Data-type struct for vst2q should have one element");
3015 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3016 assert(MvecLTypeInner->isArrayTy() &&
3017 "Data-type struct for vst2q should contain an array");
3018 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3019 "Array member of return-type struct vld[24]q has wrong length");
3020 auto VecLType = MvecLTypeInner->getArrayElementType();
3021
3022 Tys.push_back(Elt: VecLType);
3023
3024 AggValueSlot MvecSlot = CreateAggTemp(T: MvecCType);
3025 EmitAggExpr(E: E->getArg(Arg: 1), AS: MvecSlot);
3026 auto Mvec = Builder.CreateLoad(Addr: MvecSlot.getAddress());
3027 for (unsigned i = 0; i < NumVectors; i++)
3028 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Mvec, Idxs: {0, i}));
3029
3030 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3031 Value *ToReturn = nullptr;
3032 for (unsigned i = 0; i < NumVectors; i++) {
3033 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: i));
3034 ToReturn = Builder.CreateCall(Callee: F, Args: Ops);
3035 Ops.pop_back();
3036 }
3037 return ToReturn;
3038 }
3039 }
3040 llvm_unreachable("unknown custom codegen type.");
3041}
3042
3043Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
3044 const CallExpr *E,
3045 ReturnValueSlot ReturnValue,
3046 llvm::Triple::ArchType Arch) {
3047 switch (BuiltinID) {
3048 default:
3049 return nullptr;
3050#include "clang/Basic/arm_cde_builtin_cg.inc"
3051 }
3052}
3053
3054static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3055 const CallExpr *E,
3056 SmallVectorImpl<Value *> &Ops,
3057 llvm::Triple::ArchType Arch) {
3058 unsigned int Int = 0;
3059 const char *s = nullptr;
3060
3061 switch (BuiltinID) {
3062 default:
3063 return nullptr;
3064 case NEON::BI__builtin_neon_vtbl1_v:
3065 case NEON::BI__builtin_neon_vqtbl1_v:
3066 case NEON::BI__builtin_neon_vqtbl1q_v:
3067 case NEON::BI__builtin_neon_vtbl2_v:
3068 case NEON::BI__builtin_neon_vqtbl2_v:
3069 case NEON::BI__builtin_neon_vqtbl2q_v:
3070 case NEON::BI__builtin_neon_vtbl3_v:
3071 case NEON::BI__builtin_neon_vqtbl3_v:
3072 case NEON::BI__builtin_neon_vqtbl3q_v:
3073 case NEON::BI__builtin_neon_vtbl4_v:
3074 case NEON::BI__builtin_neon_vqtbl4_v:
3075 case NEON::BI__builtin_neon_vqtbl4q_v:
3076 break;
3077 case NEON::BI__builtin_neon_vtbx1_v:
3078 case NEON::BI__builtin_neon_vqtbx1_v:
3079 case NEON::BI__builtin_neon_vqtbx1q_v:
3080 case NEON::BI__builtin_neon_vtbx2_v:
3081 case NEON::BI__builtin_neon_vqtbx2_v:
3082 case NEON::BI__builtin_neon_vqtbx2q_v:
3083 case NEON::BI__builtin_neon_vtbx3_v:
3084 case NEON::BI__builtin_neon_vqtbx3_v:
3085 case NEON::BI__builtin_neon_vqtbx3q_v:
3086 case NEON::BI__builtin_neon_vtbx4_v:
3087 case NEON::BI__builtin_neon_vqtbx4_v:
3088 case NEON::BI__builtin_neon_vqtbx4q_v:
3089 break;
3090 }
3091
3092 assert(E->getNumArgs() >= 3);
3093
3094 // Get the last argument, which specifies the vector type.
3095 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
3096 std::optional<llvm::APSInt> Result =
3097 Arg->getIntegerConstantExpr(Ctx: CGF.getContext());
3098 if (!Result)
3099 return nullptr;
3100
3101 // Determine the type of this overloaded NEON intrinsic.
3102 NeonTypeFlags Type = Result->getZExtValue();
3103 llvm::FixedVectorType *Ty = GetNeonType(CGF: &CGF, TypeFlags: Type);
3104 if (!Ty)
3105 return nullptr;
3106
3107 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3108
3109 // AArch64 scalar builtins are not overloaded, they do not have an extra
3110 // argument that specifies the vector type, need to handle each case.
3111 switch (BuiltinID) {
3112 case NEON::BI__builtin_neon_vtbl1_v: {
3113 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 1), ExtOp: nullptr, IndexOp: Ops[1],
3114 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3115 }
3116 case NEON::BI__builtin_neon_vtbl2_v: {
3117 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 2), ExtOp: nullptr, IndexOp: Ops[2],
3118 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3119 }
3120 case NEON::BI__builtin_neon_vtbl3_v: {
3121 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 3), ExtOp: nullptr, IndexOp: Ops[3],
3122 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3123 }
3124 case NEON::BI__builtin_neon_vtbl4_v: {
3125 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 4), ExtOp: nullptr, IndexOp: Ops[4],
3126 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3127 }
3128 case NEON::BI__builtin_neon_vtbx1_v: {
3129 Value *TblRes =
3130 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 1), ExtOp: nullptr, IndexOp: Ops[2], ResTy: Ty,
3131 IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3132
3133 llvm::Constant *EightV = ConstantInt::get(Ty, V: 8);
3134 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[2], RHS: EightV);
3135 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3136
3137 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3138 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3139 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3140 }
3141 case NEON::BI__builtin_neon_vtbx2_v: {
3142 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 2), ExtOp: Ops[0], IndexOp: Ops[3],
3143 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx1, Name: "vtbx1");
3144 }
3145 case NEON::BI__builtin_neon_vtbx3_v: {
3146 Value *TblRes =
3147 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 3), ExtOp: nullptr, IndexOp: Ops[4], ResTy: Ty,
3148 IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3149
3150 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, V: 24);
3151 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[4],
3152 RHS: TwentyFourV);
3153 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3154
3155 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3156 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3157 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3158 }
3159 case NEON::BI__builtin_neon_vtbx4_v: {
3160 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 4), ExtOp: Ops[0], IndexOp: Ops[5],
3161 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx2, Name: "vtbx2");
3162 }
3163 case NEON::BI__builtin_neon_vqtbl1_v:
3164 case NEON::BI__builtin_neon_vqtbl1q_v:
3165 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3166 case NEON::BI__builtin_neon_vqtbl2_v:
3167 case NEON::BI__builtin_neon_vqtbl2q_v: {
3168 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3169 case NEON::BI__builtin_neon_vqtbl3_v:
3170 case NEON::BI__builtin_neon_vqtbl3q_v:
3171 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3172 case NEON::BI__builtin_neon_vqtbl4_v:
3173 case NEON::BI__builtin_neon_vqtbl4q_v:
3174 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3175 case NEON::BI__builtin_neon_vqtbx1_v:
3176 case NEON::BI__builtin_neon_vqtbx1q_v:
3177 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3178 case NEON::BI__builtin_neon_vqtbx2_v:
3179 case NEON::BI__builtin_neon_vqtbx2q_v:
3180 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3181 case NEON::BI__builtin_neon_vqtbx3_v:
3182 case NEON::BI__builtin_neon_vqtbx3q_v:
3183 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3184 case NEON::BI__builtin_neon_vqtbx4_v:
3185 case NEON::BI__builtin_neon_vqtbx4q_v:
3186 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3187 }
3188 }
3189
3190 if (!Int)
3191 return nullptr;
3192
3193 Function *F = CGF.CGM.getIntrinsic(IID: Int, Tys: Ty);
3194 return CGF.EmitNeonCall(F, Ops, name: s);
3195}
3196
3197Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
3198 auto *VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
3199 Op = Builder.CreateBitCast(V: Op, DestTy: Int16Ty);
3200 Value *V = PoisonValue::get(T: VTy);
3201 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
3202 Op = Builder.CreateInsertElement(Vec: V, NewElt: Op, Idx: CI);
3203 return Op;
3204}
3205
3206/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3207/// access builtin. Only required if it can't be inferred from the base pointer
3208/// operand.
3209llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
3210 switch (TypeFlags.getMemEltType()) {
3211 case SVETypeFlags::MemEltTyDefault:
3212 return getEltType(TypeFlags);
3213 case SVETypeFlags::MemEltTyInt8:
3214 return Builder.getInt8Ty();
3215 case SVETypeFlags::MemEltTyInt16:
3216 return Builder.getInt16Ty();
3217 case SVETypeFlags::MemEltTyInt32:
3218 return Builder.getInt32Ty();
3219 case SVETypeFlags::MemEltTyInt64:
3220 return Builder.getInt64Ty();
3221 }
3222 llvm_unreachable("Unknown MemEltType");
3223}
3224
3225llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3226 switch (TypeFlags.getEltType()) {
3227 default:
3228 llvm_unreachable("Invalid SVETypeFlag!");
3229
3230 case SVETypeFlags::EltTyMFloat8:
3231 case SVETypeFlags::EltTyInt8:
3232 return Builder.getInt8Ty();
3233 case SVETypeFlags::EltTyInt16:
3234 return Builder.getInt16Ty();
3235 case SVETypeFlags::EltTyInt32:
3236 return Builder.getInt32Ty();
3237 case SVETypeFlags::EltTyInt64:
3238 return Builder.getInt64Ty();
3239 case SVETypeFlags::EltTyInt128:
3240 return Builder.getInt128Ty();
3241
3242 case SVETypeFlags::EltTyFloat16:
3243 return Builder.getHalfTy();
3244 case SVETypeFlags::EltTyFloat32:
3245 return Builder.getFloatTy();
3246 case SVETypeFlags::EltTyFloat64:
3247 return Builder.getDoubleTy();
3248
3249 case SVETypeFlags::EltTyBFloat16:
3250 return Builder.getBFloatTy();
3251
3252 case SVETypeFlags::EltTyBool8:
3253 case SVETypeFlags::EltTyBool16:
3254 case SVETypeFlags::EltTyBool32:
3255 case SVETypeFlags::EltTyBool64:
3256 return Builder.getInt1Ty();
3257 }
3258}
3259
3260// Return the llvm predicate vector type corresponding to the specified element
3261// TypeFlags.
3262llvm::ScalableVectorType *
3263CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
3264 switch (TypeFlags.getEltType()) {
3265 default: llvm_unreachable("Unhandled SVETypeFlag!");
3266
3267 case SVETypeFlags::EltTyInt8:
3268 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3269 case SVETypeFlags::EltTyInt16:
3270 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3271 case SVETypeFlags::EltTyInt32:
3272 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3273 case SVETypeFlags::EltTyInt64:
3274 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3275
3276 case SVETypeFlags::EltTyBFloat16:
3277 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3278 case SVETypeFlags::EltTyFloat16:
3279 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3280 case SVETypeFlags::EltTyFloat32:
3281 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3282 case SVETypeFlags::EltTyFloat64:
3283 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3284
3285 case SVETypeFlags::EltTyBool8:
3286 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3287 case SVETypeFlags::EltTyBool16:
3288 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3289 case SVETypeFlags::EltTyBool32:
3290 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3291 case SVETypeFlags::EltTyBool64:
3292 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3293 }
3294}
3295
3296// Return the llvm vector type corresponding to the specified element TypeFlags.
3297llvm::ScalableVectorType *
3298CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
3299 switch (TypeFlags.getEltType()) {
3300 default:
3301 llvm_unreachable("Invalid SVETypeFlag!");
3302
3303 case SVETypeFlags::EltTyInt8:
3304 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3305 case SVETypeFlags::EltTyInt16:
3306 return llvm::ScalableVectorType::get(ElementType: Builder.getInt16Ty(), MinNumElts: 8);
3307 case SVETypeFlags::EltTyInt32:
3308 return llvm::ScalableVectorType::get(ElementType: Builder.getInt32Ty(), MinNumElts: 4);
3309 case SVETypeFlags::EltTyInt64:
3310 return llvm::ScalableVectorType::get(ElementType: Builder.getInt64Ty(), MinNumElts: 2);
3311
3312 case SVETypeFlags::EltTyMFloat8:
3313 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3314 case SVETypeFlags::EltTyFloat16:
3315 return llvm::ScalableVectorType::get(ElementType: Builder.getHalfTy(), MinNumElts: 8);
3316 case SVETypeFlags::EltTyBFloat16:
3317 return llvm::ScalableVectorType::get(ElementType: Builder.getBFloatTy(), MinNumElts: 8);
3318 case SVETypeFlags::EltTyFloat32:
3319 return llvm::ScalableVectorType::get(ElementType: Builder.getFloatTy(), MinNumElts: 4);
3320 case SVETypeFlags::EltTyFloat64:
3321 return llvm::ScalableVectorType::get(ElementType: Builder.getDoubleTy(), MinNumElts: 2);
3322
3323 case SVETypeFlags::EltTyBool8:
3324 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3325 case SVETypeFlags::EltTyBool16:
3326 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3327 case SVETypeFlags::EltTyBool32:
3328 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3329 case SVETypeFlags::EltTyBool64:
3330 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3331 }
3332}
3333
3334constexpr unsigned SVEBitsPerBlock = 128;
3335
3336static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3337 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3338 return llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: NumElts);
3339}
3340
3341// Reinterpret the input predicate so that it can be used to correctly isolate
3342// the elements of the specified datatype.
3343Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
3344 llvm::ScalableVectorType *VTy) {
3345
3346 if (isa<TargetExtType>(Val: Pred->getType()) &&
3347 cast<TargetExtType>(Val: Pred->getType())->getName() == "aarch64.svcount")
3348 return Pred;
3349
3350 auto *RTy = llvm::VectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), Other: VTy);
3351 if (Pred->getType() == RTy)
3352 return Pred;
3353
3354 unsigned IntID;
3355 llvm::Type *IntrinsicTy;
3356 switch (VTy->getMinNumElements()) {
3357 default:
3358 llvm_unreachable("unsupported element count!");
3359 case 1:
3360 case 2:
3361 case 4:
3362 case 8:
3363 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3364 IntrinsicTy = RTy;
3365 break;
3366 case 16:
3367 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3368 IntrinsicTy = Pred->getType();
3369 break;
3370 }
3371
3372 Function *F = CGM.getIntrinsic(IID: IntID, Tys: IntrinsicTy);
3373 Value *C = Builder.CreateCall(Callee: F, Args: Pred);
3374 assert(C->getType() == RTy && "Unexpected return type!");
3375 return C;
3376}
3377
3378Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
3379 llvm::StructType *Ty) {
3380 if (PredTuple->getType() == Ty)
3381 return PredTuple;
3382
3383 Value *Ret = llvm::PoisonValue::get(T: Ty);
3384 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3385 Value *Pred = Builder.CreateExtractValue(Agg: PredTuple, Idxs: I);
3386 Pred = EmitSVEPredicateCast(
3387 Pred, VTy: cast<llvm::ScalableVectorType>(Val: Ty->getTypeAtIndex(N: I)));
3388 Ret = Builder.CreateInsertValue(Agg: Ret, Val: Pred, Idxs: I);
3389 }
3390
3391 return Ret;
3392}
3393
3394Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
3395 SmallVectorImpl<Value *> &Ops,
3396 unsigned IntID) {
3397 auto *ResultTy = getSVEType(TypeFlags);
3398 auto *OverloadedTy =
3399 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: ResultTy);
3400 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[1]->getType()});
3401
3402 // At the ACLE level there's only one predicate type, svbool_t, which is
3403 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3404 // actual type being loaded. For example, when loading doubles (i64) the
3405 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3406 // the predicate and the data being loaded must match. Cast to the type
3407 // expected by the intrinsic. The intrinsic itself should be defined in
3408 // a way than enforces relations between parameter types.
3409 Ops[0] = EmitSVEPredicateCast(
3410 Pred: Ops[0], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 0)->getType()));
3411
3412 // Pass 0 when the offset is missing. This can only be applied when using
3413 // the "vector base" addressing mode for which ACLE allows no offset. The
3414 // corresponding LLVM IR always requires an offset.
3415 if (Ops.size() == 2) {
3416 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3417 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
3418 }
3419
3420 // For "vector base, scalar index" scale the index so that it becomes a
3421 // scalar offset.
3422 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3423 unsigned BytesPerElt =
3424 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3425 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
3426 }
3427
3428 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
3429
3430 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3431 // other cases it's folded into a nop.
3432 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(V: Call, DestTy: ResultTy)
3433 : Builder.CreateSExt(V: Call, DestTy: ResultTy);
3434}
3435
3436Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
3437 SmallVectorImpl<Value *> &Ops,
3438 unsigned IntID) {
3439 auto *SrcDataTy = getSVEType(TypeFlags);
3440 auto *OverloadedTy =
3441 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: SrcDataTy);
3442
3443 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3444 // it's the first argument. Move it accordingly.
3445 Ops.insert(I: Ops.begin(), Elt: Ops.pop_back_val());
3446
3447 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[2]->getType()});
3448
3449 // Pass 0 when the offset is missing. This can only be applied when using
3450 // the "vector base" addressing mode for which ACLE allows no offset. The
3451 // corresponding LLVM IR always requires an offset.
3452 if (Ops.size() == 3) {
3453 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3454 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
3455 }
3456
3457 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
3458 // folded into a nop.
3459 Ops[0] = Builder.CreateTrunc(V: Ops[0], DestTy: OverloadedTy);
3460
3461 // At the ACLE level there's only one predicate type, svbool_t, which is
3462 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3463 // actual type being stored. For example, when storing doubles (i64) the
3464 // predicated should be <n x 2 x i1> instead. At the IR level the type of
3465 // the predicate and the data being stored must match. Cast to the type
3466 // expected by the intrinsic. The intrinsic itself should be defined in
3467 // a way that enforces relations between parameter types.
3468 Ops[1] = EmitSVEPredicateCast(
3469 Pred: Ops[1], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 1)->getType()));
3470
3471 // For "vector base, scalar index" scale the index so that it becomes a
3472 // scalar offset.
3473 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
3474 unsigned BytesPerElt =
3475 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3476 Ops[3] = Builder.CreateShl(LHS: Ops[3], RHS: Log2_32(Value: BytesPerElt));
3477 }
3478
3479 return Builder.CreateCall(Callee: F, Args: Ops);
3480}
3481
3482Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
3483 SmallVectorImpl<Value *> &Ops,
3484 unsigned IntID) {
3485 // The gather prefetches are overloaded on the vector input - this can either
3486 // be the vector of base addresses or vector of offsets.
3487 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[1]->getType());
3488 if (!OverloadedTy)
3489 OverloadedTy = cast<llvm::ScalableVectorType>(Val: Ops[2]->getType());
3490
3491 // Cast the predicate from svbool_t to the right number of elements.
3492 Ops[0] = EmitSVEPredicateCast(Pred: Ops[0], VTy: OverloadedTy);
3493
3494 // vector + imm addressing modes
3495 if (Ops[1]->getType()->isVectorTy()) {
3496 if (Ops.size() == 3) {
3497 // Pass 0 for 'vector+imm' when the index is omitted.
3498 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
3499
3500 // The sv_prfop is the last operand in the builtin and IR intrinsic.
3501 std::swap(a&: Ops[2], b&: Ops[3]);
3502 } else {
3503 // Index needs to be passed as scaled offset.
3504 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3505 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
3506 if (BytesPerElt > 1)
3507 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
3508 }
3509
3510 Function *F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
3511 return Builder.CreateCall(Callee: F, Args: Ops);
3512 }
3513
3514 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {Ops[1]->getType(), OverloadedTy});
3515 return Builder.CreateCall(Callee: F, Args: Ops);
3516}
3517
3518Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
3519 SmallVectorImpl<Value*> &Ops,
3520 unsigned IntID) {
3521 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3522 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
3523 Value *BasePtr = Ops[1];
3524
3525 // Does the load have an offset?
3526 if (Ops.size() > 2)
3527 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
3528
3529 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy, BasePtr->getType()});
3530 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
3531}
3532
3533Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
3534 SmallVectorImpl<Value*> &Ops,
3535 unsigned IntID) {
3536 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
3537
3538 unsigned N;
3539 switch (IntID) {
3540 case Intrinsic::aarch64_sve_st2:
3541 case Intrinsic::aarch64_sve_st1_pn_x2:
3542 case Intrinsic::aarch64_sve_stnt1_pn_x2:
3543 case Intrinsic::aarch64_sve_st2q:
3544 N = 2;
3545 break;
3546 case Intrinsic::aarch64_sve_st3:
3547 case Intrinsic::aarch64_sve_st3q:
3548 N = 3;
3549 break;
3550 case Intrinsic::aarch64_sve_st4:
3551 case Intrinsic::aarch64_sve_st1_pn_x4:
3552 case Intrinsic::aarch64_sve_stnt1_pn_x4:
3553 case Intrinsic::aarch64_sve_st4q:
3554 N = 4;
3555 break;
3556 default:
3557 llvm_unreachable("unknown intrinsic!");
3558 }
3559
3560 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
3561 Value *BasePtr = Ops[1];
3562
3563 // Does the store have an offset?
3564 if (Ops.size() > (2 + N))
3565 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
3566
3567 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
3568 // need to break up the tuple vector.
3569 SmallVector<llvm::Value*, 5> Operands;
3570 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
3571 Operands.push_back(Elt: Ops[I]);
3572 Operands.append(IL: {Predicate, BasePtr});
3573 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy, BasePtr->getType()});
3574
3575 return Builder.CreateCall(Callee: F, Args: Operands);
3576}
3577
3578// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
3579// svpmullt_pair intrinsics, with the exception that their results are bitcast
3580// to a wider type.
3581Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
3582 SmallVectorImpl<Value *> &Ops,
3583 unsigned BuiltinID) {
3584 // Splat scalar operand to vector (intrinsics with _n infix)
3585 if (TypeFlags.hasSplatOperand()) {
3586 unsigned OpNo = TypeFlags.getSplatOperand();
3587 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
3588 }
3589
3590 // The pair-wise function has a narrower overloaded type.
3591 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Ops[0]->getType());
3592 Value *Call = Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
3593
3594 // Now bitcast to the wider result type.
3595 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
3596 return EmitSVEReinterpret(Val: Call, Ty);
3597}
3598
3599Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
3600 ArrayRef<Value *> Ops, unsigned BuiltinID) {
3601 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
3602 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: OverloadedTy);
3603 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt32(C: 0)});
3604}
3605
3606Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
3607 SmallVectorImpl<Value *> &Ops,
3608 unsigned BuiltinID) {
3609 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
3610 auto *VectorTy = getSVEVectorForElementType(EltTy: MemEltTy);
3611 auto *MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
3612
3613 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: MemoryTy);
3614 Value *BasePtr = Ops[1];
3615
3616 // Implement the index operand if not omitted.
3617 if (Ops.size() > 3)
3618 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
3619
3620 Value *PrfOp = Ops.back();
3621
3622 llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()};
3623 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys);
3624 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr, PrfOp});
3625}
3626
3627Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
3628 llvm::Type *ReturnTy,
3629 SmallVectorImpl<Value *> &Ops,
3630 unsigned IntrinsicID,
3631 bool IsZExtReturn) {
3632 QualType LangPTy = E->getArg(Arg: 1)->getType();
3633 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3634 T: LangPTy->castAs<PointerType>()->getPointeeType());
3635
3636 // Mfloat8 types is stored as a vector, so extra work
3637 // to extract sclar element type is necessary.
3638 if (MemEltTy->isVectorTy()) {
3639 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3640 "Only <1 x i8> expected");
3641 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
3642 }
3643
3644 // The vector type that is returned may be different from the
3645 // eventual type loaded from memory.
3646 auto VectorTy = cast<llvm::ScalableVectorType>(Val: ReturnTy);
3647 llvm::ScalableVectorType *MemoryTy = nullptr;
3648 llvm::ScalableVectorType *PredTy = nullptr;
3649 bool IsQuadLoad = false;
3650 switch (IntrinsicID) {
3651 case Intrinsic::aarch64_sve_ld1uwq:
3652 case Intrinsic::aarch64_sve_ld1udq:
3653 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
3654 PredTy = llvm::ScalableVectorType::get(
3655 ElementType: llvm::Type::getInt1Ty(C&: getLLVMContext()), MinNumElts: 1);
3656 IsQuadLoad = true;
3657 break;
3658 default:
3659 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
3660 PredTy = MemoryTy;
3661 break;
3662 }
3663
3664 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
3665 Value *BasePtr = Ops[1];
3666
3667 // Does the load have an offset?
3668 if (Ops.size() > 2)
3669 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
3670
3671 llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()};
3672 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys);
3673 auto *Load = Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
3674 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
3675 CGM.DecorateInstructionWithTBAA(Inst: Load, TBAAInfo);
3676
3677 if (IsQuadLoad)
3678 return Load;
3679
3680 return IsZExtReturn ? Builder.CreateZExt(V: Load, DestTy: VectorTy)
3681 : Builder.CreateSExt(V: Load, DestTy: VectorTy);
3682}
3683
3684Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
3685 SmallVectorImpl<Value *> &Ops,
3686 unsigned IntrinsicID) {
3687 QualType LangPTy = E->getArg(Arg: 1)->getType();
3688 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
3689 T: LangPTy->castAs<PointerType>()->getPointeeType());
3690
3691 // Mfloat8 types is stored as a vector, so extra work
3692 // to extract sclar element type is necessary.
3693 if (MemEltTy->isVectorTy()) {
3694 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
3695 "Only <1 x i8> expected");
3696 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
3697 }
3698
3699 // The vector type that is stored may be different from the
3700 // eventual type stored to memory.
3701 auto VectorTy = cast<llvm::ScalableVectorType>(Val: Ops.back()->getType());
3702 auto MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
3703
3704 auto PredTy = MemoryTy;
3705 auto AddrMemoryTy = MemoryTy;
3706 bool IsQuadStore = false;
3707
3708 switch (IntrinsicID) {
3709 case Intrinsic::aarch64_sve_st1wq:
3710 case Intrinsic::aarch64_sve_st1dq:
3711 AddrMemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
3712 PredTy =
3713 llvm::ScalableVectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), MinNumElts: 1);
3714 IsQuadStore = true;
3715 break;
3716 default:
3717 break;
3718 }
3719 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
3720 Value *BasePtr = Ops[1];
3721
3722 // Does the store have an offset?
3723 if (Ops.size() == 4)
3724 BasePtr = Builder.CreateGEP(Ty: AddrMemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
3725
3726 // Last value is always the data
3727 Value *Val =
3728 IsQuadStore ? Ops.back() : Builder.CreateTrunc(V: Ops.back(), DestTy: MemoryTy);
3729
3730 llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()};
3731 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys);
3732 auto *Store = Builder.CreateCall(Callee: F, Args: {Val, Predicate, BasePtr});
3733 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
3734 CGM.DecorateInstructionWithTBAA(Inst: Store, TBAAInfo);
3735 return Store;
3736}
3737
3738Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
3739 SmallVectorImpl<Value *> &Ops,
3740 unsigned IntID) {
3741 Ops[2] = EmitSVEPredicateCast(
3742 Pred: Ops[2], VTy: getSVEVectorForElementType(EltTy: SVEBuiltinMemEltTy(TypeFlags)));
3743
3744 SmallVector<Value *> NewOps;
3745 NewOps.push_back(Elt: Ops[2]);
3746
3747 llvm::Value *BasePtr = Ops[3];
3748 llvm::Value *RealSlice = Ops[1];
3749 // If the intrinsic contains the vnum parameter, multiply it with the vector
3750 // size in bytes.
3751 if (Ops.size() == 5) {
3752 Function *StreamingVectorLength =
3753 CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsd);
3754 llvm::Value *StreamingVectorLengthCall =
3755 Builder.CreateMul(LHS: Builder.CreateCall(Callee: StreamingVectorLength),
3756 RHS: llvm::ConstantInt::get(Ty: Int64Ty, V: 8), Name: "svl",
3757 /* HasNUW */ true, /* HasNSW */ true);
3758 llvm::Value *Mulvl =
3759 Builder.CreateMul(LHS: StreamingVectorLengthCall, RHS: Ops[4], Name: "mulvl");
3760 // The type of the ptr parameter is void *, so use Int8Ty here.
3761 BasePtr = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[3], IdxList: Mulvl);
3762 RealSlice = Builder.CreateZExt(V: RealSlice, DestTy: Int64Ty);
3763 RealSlice = Builder.CreateAdd(LHS: RealSlice, RHS: Ops[4]);
3764 RealSlice = Builder.CreateTrunc(V: RealSlice, DestTy: Int32Ty);
3765 }
3766 NewOps.push_back(Elt: BasePtr);
3767 NewOps.push_back(Elt: Ops[0]);
3768 NewOps.push_back(Elt: RealSlice);
3769 Function *F = CGM.getIntrinsic(IID: IntID, Tys: BasePtr->getType());
3770 return Builder.CreateCall(Callee: F, Args: NewOps);
3771}
3772
3773Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
3774 SmallVectorImpl<Value *> &Ops,
3775 unsigned IntID) {
3776 auto *VecTy = getSVEType(TypeFlags);
3777 Function *F = CGM.getIntrinsic(IID: IntID, Tys: VecTy);
3778 if (TypeFlags.isReadZA())
3779 Ops[1] = EmitSVEPredicateCast(Pred: Ops[1], VTy: VecTy);
3780 else if (TypeFlags.isWriteZA())
3781 Ops[2] = EmitSVEPredicateCast(Pred: Ops[2], VTy: VecTy);
3782 return Builder.CreateCall(Callee: F, Args: Ops);
3783}
3784
3785Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
3786 SmallVectorImpl<Value *> &Ops,
3787 unsigned IntID) {
3788 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
3789 if (Ops.size() == 0)
3790 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: 255));
3791 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
3792 return Builder.CreateCall(Callee: F, Args: Ops);
3793}
3794
3795Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
3796 SmallVectorImpl<Value *> &Ops,
3797 unsigned IntID) {
3798 if (Ops.size() == 2)
3799 Ops.push_back(Elt: Builder.getInt32(C: 0));
3800 else
3801 Ops[2] = Builder.CreateIntCast(V: Ops[2], DestTy: Int32Ty, isSigned: true);
3802 Function *F = CGM.getIntrinsic(IID: IntID, Tys: Ops[1]->getType());
3803 return Builder.CreateCall(Callee: F, Args: Ops);
3804}
3805
3806// Limit the usage of scalable llvm IR generated by the ACLE by using the
3807// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
3808Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
3809 return Builder.CreateVectorSplat(
3810 EC: cast<llvm::VectorType>(Val: Ty)->getElementCount(), V: Scalar);
3811}
3812
3813Value *CodeGenFunction::EmitSVEDupX(Value *Scalar) {
3814 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
3815#ifndef NDEBUG
3816 auto *VecTy = cast<llvm::VectorType>(Ty);
3817 ElementCount EC = VecTy->getElementCount();
3818 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
3819 "Only <1 x i8> expected");
3820#endif
3821 Scalar = Builder.CreateExtractElement(Vec: Scalar, Idx: uint64_t(0));
3822 }
3823 return EmitSVEDupX(Scalar, Ty: getSVEVectorForElementType(EltTy: Scalar->getType()));
3824}
3825
3826Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
3827 // FIXME: For big endian this needs an additional REV, or needs a separate
3828 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
3829 // instruction is defined as 'bitwise' equivalent from memory point of
3830 // view (when storing/reloading), whereas the svreinterpret builtin
3831 // implements bitwise equivalent cast from register point of view.
3832 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
3833
3834 if (auto *StructTy = dyn_cast<StructType>(Val: Ty)) {
3835 Value *Tuple = llvm::PoisonValue::get(T: Ty);
3836
3837 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
3838 Value *In = Builder.CreateExtractValue(Agg: Val, Idxs: I);
3839 Value *Out = Builder.CreateBitCast(V: In, DestTy: StructTy->getTypeAtIndex(N: I));
3840 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Out, Idxs: I);
3841 }
3842
3843 return Tuple;
3844 }
3845
3846 return Builder.CreateBitCast(V: Val, DestTy: Ty);
3847}
3848
3849static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3850 SmallVectorImpl<Value *> &Ops) {
3851 auto *SplatZero = Constant::getNullValue(Ty);
3852 Ops.insert(I: Ops.begin(), Elt: SplatZero);
3853}
3854
3855static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
3856 SmallVectorImpl<Value *> &Ops) {
3857 auto *SplatUndef = UndefValue::get(T: Ty);
3858 Ops.insert(I: Ops.begin(), Elt: SplatUndef);
3859}
3860
3861SmallVector<llvm::Type *, 2>
3862CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
3863 llvm::Type *ResultType,
3864 ArrayRef<Value *> Ops) {
3865 if (TypeFlags.isOverloadNone())
3866 return {};
3867
3868 llvm::Type *DefaultType = getSVEType(TypeFlags);
3869
3870 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
3871 return {DefaultType, Ops[1]->getType()};
3872
3873 if (TypeFlags.isOverloadWhileRW())
3874 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
3875
3876 if (TypeFlags.isOverloadDefaultAndOp0())
3877 return {DefaultType, Ops[0]->getType()};
3878
3879 if (TypeFlags.isOverloadFirstandLast())
3880 return {Ops[0]->getType(), Ops.back()->getType()};
3881
3882 if (TypeFlags.isReductionQV())
3883 return {ResultType, Ops[1]->getType()};
3884
3885 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
3886 return {DefaultType};
3887}
3888
3889Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
3890 ArrayRef<Value *> Ops) {
3891 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
3892 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
3893 unsigned Idx = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
3894
3895 if (TypeFlags.isTupleSet())
3896 return Builder.CreateInsertValue(Agg: Ops[0], Val: Ops[2], Idxs: Idx);
3897 return Builder.CreateExtractValue(Agg: Ops[0], Idxs: Idx);
3898}
3899
3900Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
3901 llvm::Type *Ty,
3902 ArrayRef<Value *> Ops) {
3903 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
3904
3905 Value *Tuple = llvm::PoisonValue::get(T: Ty);
3906 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
3907 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Ops[Idx], Idxs: Idx);
3908
3909 return Tuple;
3910}
3911
3912void CodeGenFunction::GetAArch64SVEProcessedOperands(
3913 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
3914 SVETypeFlags TypeFlags) {
3915 // Find out if any arguments are required to be integer constant expressions.
3916 unsigned ICEArguments = 0;
3917 ASTContext::GetBuiltinTypeError Error;
3918 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
3919 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3920
3921 // Tuple set/get only requires one insert/extract vector, which is
3922 // created by EmitSVETupleSetOrGet.
3923 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
3924
3925 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3926 bool IsICE = ICEArguments & (1 << i);
3927 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: i));
3928
3929 if (IsICE) {
3930 // If this is required to be a constant, constant fold it so that we know
3931 // that the generated intrinsic gets a ConstantInt.
3932 std::optional<llvm::APSInt> Result =
3933 E->getArg(Arg: i)->getIntegerConstantExpr(Ctx: getContext());
3934 assert(Result && "Expected argument to be a constant");
3935
3936 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
3937 // truncate because the immediate has been range checked and no valid
3938 // immediate requires more than a handful of bits.
3939 *Result = Result->extOrTrunc(width: 32);
3940 Ops.push_back(Elt: llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result));
3941 continue;
3942 }
3943
3944 if (isa<StructType>(Val: Arg->getType()) && !IsTupleGetOrSet) {
3945 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
3946 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Arg, Idxs: I));
3947
3948 continue;
3949 }
3950
3951 Ops.push_back(Elt: Arg);
3952 }
3953}
3954
3955Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
3956 const CallExpr *E) {
3957 llvm::Type *Ty = ConvertType(T: E->getType());
3958 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
3959 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
3960 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
3961 return EmitSVEReinterpret(Val, Ty);
3962 }
3963
3964 auto *Builtin =
3965 findARMVectorIntrinsicInMap(IntrinsicMap: ArrayRef(AArch64SVEIntrinsicMap), BuiltinID,
3966 MapProvenSorted&: AArch64SVEIntrinsicsProvenSorted);
3967
3968 llvm::SmallVector<Value *, 4> Ops;
3969 SVETypeFlags TypeFlags(Builtin->TypeModifier);
3970 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
3971
3972 if (TypeFlags.isLoad())
3973 return EmitSVEMaskedLoad(E, ReturnTy: Ty, Ops, IntrinsicID: Builtin->LLVMIntrinsic,
3974 IsZExtReturn: TypeFlags.isZExtReturn());
3975 if (TypeFlags.isStore())
3976 return EmitSVEMaskedStore(E, Ops, IntrinsicID: Builtin->LLVMIntrinsic);
3977 if (TypeFlags.isGatherLoad())
3978 return EmitSVEGatherLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
3979 if (TypeFlags.isScatterStore())
3980 return EmitSVEScatterStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
3981 if (TypeFlags.isPrefetch())
3982 return EmitSVEPrefetchLoad(TypeFlags, Ops, BuiltinID: Builtin->LLVMIntrinsic);
3983 if (TypeFlags.isGatherPrefetch())
3984 return EmitSVEGatherPrefetch(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
3985 if (TypeFlags.isStructLoad())
3986 return EmitSVEStructLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
3987 if (TypeFlags.isStructStore())
3988 return EmitSVEStructStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
3989 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
3990 return EmitSVETupleSetOrGet(TypeFlags, Ops);
3991 if (TypeFlags.isTupleCreate())
3992 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
3993 if (TypeFlags.isUndef())
3994 return UndefValue::get(T: Ty);
3995
3996 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
3997 // -------------------------------------------------------------------
3998 if (Builtin->LLVMIntrinsic != 0) {
3999 // Emit set FPMR for intrinsics that require it
4000 if (TypeFlags.setsFPMR())
4001 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
4002 Args: Ops.pop_back_val());
4003 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4004 InsertExplicitZeroOperand(Builder, Ty, Ops);
4005
4006 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4007 InsertExplicitUndefOperand(Builder, Ty, Ops);
4008
4009 // Some ACLE builtins leave out the argument to specify the predicate
4010 // pattern, which is expected to be expanded to an SV_ALL pattern.
4011 if (TypeFlags.isAppendSVALL())
4012 Ops.push_back(Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4013 if (TypeFlags.isInsertOp1SVALL())
4014 Ops.insert(I: &Ops[1], Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4015
4016 // Predicates must match the main datatype.
4017 for (Value *&Op : Ops)
4018 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
4019 if (PredTy->getElementType()->isIntegerTy(BitWidth: 1))
4020 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
4021
4022 // Splat scalar operand to vector (intrinsics with _n infix)
4023 if (TypeFlags.hasSplatOperand()) {
4024 unsigned OpNo = TypeFlags.getSplatOperand();
4025 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4026 }
4027
4028 if (TypeFlags.isReverseCompare())
4029 std::swap(a&: Ops[1], b&: Ops[2]);
4030 else if (TypeFlags.isReverseUSDOT())
4031 std::swap(a&: Ops[1], b&: Ops[2]);
4032 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4033 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4034 std::swap(a&: Ops[1], b&: Ops[2]);
4035 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4036 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4037 std::swap(a&: Ops[1], b&: Ops[3]);
4038
4039 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4040 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4041 llvm::Type *OpndTy = Ops[1]->getType();
4042 auto *SplatZero = Constant::getNullValue(Ty: OpndTy);
4043 Ops[1] = Builder.CreateSelect(C: Ops[0], True: Ops[1], False: SplatZero);
4044 }
4045
4046 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic,
4047 Tys: getSVEOverloadTypes(TypeFlags, ResultType: Ty, Ops));
4048 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4049
4050 if (Call->getType() == Ty)
4051 return Call;
4052
4053 // Predicate results must be converted to svbool_t.
4054 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Val: Ty))
4055 return EmitSVEPredicateCast(Pred: Call, VTy: PredTy);
4056 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Val: Ty))
4057 return EmitSVEPredicateTupleCast(PredTuple: Call, Ty: PredTupleTy);
4058
4059 llvm_unreachable("unsupported element count!");
4060 }
4061
4062 switch (BuiltinID) {
4063 default:
4064 return nullptr;
4065
4066 case SVE::BI__builtin_sve_svreinterpret_b: {
4067 auto SVCountTy =
4068 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4069 Function *CastFromSVCountF =
4070 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4071 return Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]);
4072 }
4073 case SVE::BI__builtin_sve_svreinterpret_c: {
4074 auto SVCountTy =
4075 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4076 Function *CastToSVCountF =
4077 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4078 return Builder.CreateCall(Callee: CastToSVCountF, Args: Ops[0]);
4079 }
4080
4081 case SVE::BI__builtin_sve_svpsel_lane_b8:
4082 case SVE::BI__builtin_sve_svpsel_lane_b16:
4083 case SVE::BI__builtin_sve_svpsel_lane_b32:
4084 case SVE::BI__builtin_sve_svpsel_lane_b64:
4085 case SVE::BI__builtin_sve_svpsel_lane_c8:
4086 case SVE::BI__builtin_sve_svpsel_lane_c16:
4087 case SVE::BI__builtin_sve_svpsel_lane_c32:
4088 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4089 bool IsSVCount = isa<TargetExtType>(Val: Ops[0]->getType());
4090 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4091 "aarch64.svcount")) &&
4092 "Unexpected TargetExtType");
4093 auto SVCountTy =
4094 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4095 Function *CastFromSVCountF =
4096 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4097 Function *CastToSVCountF =
4098 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4099
4100 auto OverloadedTy = getSVEType(TypeFlags: SVETypeFlags(Builtin->TypeModifier));
4101 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_psel, Tys: OverloadedTy);
4102 llvm::Value *Ops0 =
4103 IsSVCount ? Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]) : Ops[0];
4104 llvm::Value *Ops1 = EmitSVEPredicateCast(Pred: Ops[1], VTy: OverloadedTy);
4105 llvm::Value *PSel = Builder.CreateCall(Callee: F, Args: {Ops0, Ops1, Ops[2]});
4106 return IsSVCount ? Builder.CreateCall(Callee: CastToSVCountF, Args: PSel) : PSel;
4107 }
4108 case SVE::BI__builtin_sve_svmov_b_z: {
4109 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4110 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4111 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4112 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_and_z, Tys: OverloadedTy);
4113 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[1]});
4114 }
4115
4116 case SVE::BI__builtin_sve_svnot_b_z: {
4117 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4118 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4119 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4120 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_eor_z, Tys: OverloadedTy);
4121 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[0]});
4122 }
4123
4124 case SVE::BI__builtin_sve_svmovlb_u16:
4125 case SVE::BI__builtin_sve_svmovlb_u32:
4126 case SVE::BI__builtin_sve_svmovlb_u64:
4127 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllb);
4128
4129 case SVE::BI__builtin_sve_svmovlb_s16:
4130 case SVE::BI__builtin_sve_svmovlb_s32:
4131 case SVE::BI__builtin_sve_svmovlb_s64:
4132 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllb);
4133
4134 case SVE::BI__builtin_sve_svmovlt_u16:
4135 case SVE::BI__builtin_sve_svmovlt_u32:
4136 case SVE::BI__builtin_sve_svmovlt_u64:
4137 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllt);
4138
4139 case SVE::BI__builtin_sve_svmovlt_s16:
4140 case SVE::BI__builtin_sve_svmovlt_s32:
4141 case SVE::BI__builtin_sve_svmovlt_s64:
4142 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllt);
4143
4144 case SVE::BI__builtin_sve_svpmullt_u16:
4145 case SVE::BI__builtin_sve_svpmullt_u64:
4146 case SVE::BI__builtin_sve_svpmullt_n_u16:
4147 case SVE::BI__builtin_sve_svpmullt_n_u64:
4148 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullt_pair);
4149
4150 case SVE::BI__builtin_sve_svpmullb_u16:
4151 case SVE::BI__builtin_sve_svpmullb_u64:
4152 case SVE::BI__builtin_sve_svpmullb_n_u16:
4153 case SVE::BI__builtin_sve_svpmullb_n_u64:
4154 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullb_pair);
4155
4156 case SVE::BI__builtin_sve_svdup_n_b8:
4157 case SVE::BI__builtin_sve_svdup_n_b16:
4158 case SVE::BI__builtin_sve_svdup_n_b32:
4159 case SVE::BI__builtin_sve_svdup_n_b64: {
4160 Value *CmpNE =
4161 Builder.CreateICmpNE(LHS: Ops[0], RHS: Constant::getNullValue(Ty: Ops[0]->getType()));
4162 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4163 Value *Dup = EmitSVEDupX(Scalar: CmpNE, Ty: OverloadedTy);
4164 return EmitSVEPredicateCast(Pred: Dup, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4165 }
4166
4167 case SVE::BI__builtin_sve_svdupq_n_b8:
4168 case SVE::BI__builtin_sve_svdupq_n_b16:
4169 case SVE::BI__builtin_sve_svdupq_n_b32:
4170 case SVE::BI__builtin_sve_svdupq_n_b64:
4171 case SVE::BI__builtin_sve_svdupq_n_u8:
4172 case SVE::BI__builtin_sve_svdupq_n_s8:
4173 case SVE::BI__builtin_sve_svdupq_n_u64:
4174 case SVE::BI__builtin_sve_svdupq_n_f64:
4175 case SVE::BI__builtin_sve_svdupq_n_s64:
4176 case SVE::BI__builtin_sve_svdupq_n_u16:
4177 case SVE::BI__builtin_sve_svdupq_n_f16:
4178 case SVE::BI__builtin_sve_svdupq_n_bf16:
4179 case SVE::BI__builtin_sve_svdupq_n_s16:
4180 case SVE::BI__builtin_sve_svdupq_n_u32:
4181 case SVE::BI__builtin_sve_svdupq_n_f32:
4182 case SVE::BI__builtin_sve_svdupq_n_s32: {
4183 // These builtins are implemented by storing each element to an array and using
4184 // ld1rq to materialize a vector.
4185 unsigned NumOpnds = Ops.size();
4186
4187 bool IsBoolTy =
4188 cast<llvm::VectorType>(Val: Ty)->getElementType()->isIntegerTy(BitWidth: 1);
4189
4190 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4191 // so that the compare can use the width that is natural for the expected
4192 // number of predicate lanes.
4193 llvm::Type *EltTy = Ops[0]->getType();
4194 if (IsBoolTy)
4195 EltTy = IntegerType::get(C&: getLLVMContext(), NumBits: SVEBitsPerBlock / NumOpnds);
4196
4197 SmallVector<llvm::Value *, 16> VecOps;
4198 for (unsigned I = 0; I < NumOpnds; ++I)
4199 VecOps.push_back(Elt: Builder.CreateZExt(V: Ops[I], DestTy: EltTy));
4200 Value *Vec = BuildVector(Ops: VecOps);
4201
4202 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4203 Value *InsertSubVec = Builder.CreateInsertVector(
4204 DstType: OverloadedTy, SrcVec: PoisonValue::get(T: OverloadedTy), SubVec: Vec, Idx: uint64_t(0));
4205
4206 Function *F =
4207 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_dupq_lane, Tys: OverloadedTy);
4208 Value *DupQLane =
4209 Builder.CreateCall(Callee: F, Args: {InsertSubVec, Builder.getInt64(C: 0)});
4210
4211 if (!IsBoolTy)
4212 return DupQLane;
4213
4214 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4215 Constant *Pred = ConstantInt::getTrue(Ty: getSVEPredType(TypeFlags));
4216
4217 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4218 F = CGM.getIntrinsic(IID: NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4219 : Intrinsic::aarch64_sve_cmpne_wide,
4220 Tys: OverloadedTy);
4221 Value *Call = Builder.CreateCall(
4222 Callee: F, Args: {Pred, DupQLane, EmitSVEDupX(Scalar: Builder.getInt64(C: 0))});
4223 return EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4224 }
4225
4226 case SVE::BI__builtin_sve_svpfalse_b:
4227 return ConstantInt::getFalse(Ty);
4228
4229 case SVE::BI__builtin_sve_svpfalse_c: {
4230 auto SVBoolTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
4231 Function *CastToSVCountF =
4232 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: Ty);
4233 return Builder.CreateCall(Callee: CastToSVCountF, Args: ConstantInt::getFalse(Ty: SVBoolTy));
4234 }
4235
4236 case SVE::BI__builtin_sve_svlen_bf16:
4237 case SVE::BI__builtin_sve_svlen_f16:
4238 case SVE::BI__builtin_sve_svlen_f32:
4239 case SVE::BI__builtin_sve_svlen_f64:
4240 case SVE::BI__builtin_sve_svlen_s8:
4241 case SVE::BI__builtin_sve_svlen_s16:
4242 case SVE::BI__builtin_sve_svlen_s32:
4243 case SVE::BI__builtin_sve_svlen_s64:
4244 case SVE::BI__builtin_sve_svlen_u8:
4245 case SVE::BI__builtin_sve_svlen_u16:
4246 case SVE::BI__builtin_sve_svlen_u32:
4247 case SVE::BI__builtin_sve_svlen_u64: {
4248 SVETypeFlags TF(Builtin->TypeModifier);
4249 return Builder.CreateElementCount(Ty, EC: getSVEType(TypeFlags: TF)->getElementCount());
4250 }
4251
4252 case SVE::BI__builtin_sve_svtbl2_u8:
4253 case SVE::BI__builtin_sve_svtbl2_s8:
4254 case SVE::BI__builtin_sve_svtbl2_u16:
4255 case SVE::BI__builtin_sve_svtbl2_s16:
4256 case SVE::BI__builtin_sve_svtbl2_u32:
4257 case SVE::BI__builtin_sve_svtbl2_s32:
4258 case SVE::BI__builtin_sve_svtbl2_u64:
4259 case SVE::BI__builtin_sve_svtbl2_s64:
4260 case SVE::BI__builtin_sve_svtbl2_f16:
4261 case SVE::BI__builtin_sve_svtbl2_bf16:
4262 case SVE::BI__builtin_sve_svtbl2_f32:
4263 case SVE::BI__builtin_sve_svtbl2_f64: {
4264 SVETypeFlags TF(Builtin->TypeModifier);
4265 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_tbl2, Tys: getSVEType(TypeFlags: TF));
4266 return Builder.CreateCall(Callee: F, Args: Ops);
4267 }
4268
4269 case SVE::BI__builtin_sve_svset_neonq_s8:
4270 case SVE::BI__builtin_sve_svset_neonq_s16:
4271 case SVE::BI__builtin_sve_svset_neonq_s32:
4272 case SVE::BI__builtin_sve_svset_neonq_s64:
4273 case SVE::BI__builtin_sve_svset_neonq_u8:
4274 case SVE::BI__builtin_sve_svset_neonq_u16:
4275 case SVE::BI__builtin_sve_svset_neonq_u32:
4276 case SVE::BI__builtin_sve_svset_neonq_u64:
4277 case SVE::BI__builtin_sve_svset_neonq_f16:
4278 case SVE::BI__builtin_sve_svset_neonq_f32:
4279 case SVE::BI__builtin_sve_svset_neonq_f64:
4280 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4281 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[1], Idx: uint64_t(0));
4282 }
4283
4284 case SVE::BI__builtin_sve_svget_neonq_s8:
4285 case SVE::BI__builtin_sve_svget_neonq_s16:
4286 case SVE::BI__builtin_sve_svget_neonq_s32:
4287 case SVE::BI__builtin_sve_svget_neonq_s64:
4288 case SVE::BI__builtin_sve_svget_neonq_u8:
4289 case SVE::BI__builtin_sve_svget_neonq_u16:
4290 case SVE::BI__builtin_sve_svget_neonq_u32:
4291 case SVE::BI__builtin_sve_svget_neonq_u64:
4292 case SVE::BI__builtin_sve_svget_neonq_f16:
4293 case SVE::BI__builtin_sve_svget_neonq_f32:
4294 case SVE::BI__builtin_sve_svget_neonq_f64:
4295 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4296 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx: uint64_t(0));
4297 }
4298
4299 case SVE::BI__builtin_sve_svdup_neonq_s8:
4300 case SVE::BI__builtin_sve_svdup_neonq_s16:
4301 case SVE::BI__builtin_sve_svdup_neonq_s32:
4302 case SVE::BI__builtin_sve_svdup_neonq_s64:
4303 case SVE::BI__builtin_sve_svdup_neonq_u8:
4304 case SVE::BI__builtin_sve_svdup_neonq_u16:
4305 case SVE::BI__builtin_sve_svdup_neonq_u32:
4306 case SVE::BI__builtin_sve_svdup_neonq_u64:
4307 case SVE::BI__builtin_sve_svdup_neonq_f16:
4308 case SVE::BI__builtin_sve_svdup_neonq_f32:
4309 case SVE::BI__builtin_sve_svdup_neonq_f64:
4310 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4311 Value *Insert = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
4312 Idx: uint64_t(0));
4313 return Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_dupq_lane, OverloadTypes: {Ty},
4314 Args: {Insert, Builder.getInt64(C: 0)});
4315 }
4316 }
4317
4318 /// Should not happen
4319 return nullptr;
4320}
4321
4322static void swapCommutativeSMEOperands(unsigned BuiltinID,
4323 SmallVectorImpl<Value *> &Ops) {
4324 unsigned MultiVec;
4325 switch (BuiltinID) {
4326 default:
4327 return;
4328 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4329 MultiVec = 1;
4330 break;
4331 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4332 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4333 MultiVec = 2;
4334 break;
4335 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4336 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4337 MultiVec = 4;
4338 break;
4339 }
4340
4341 if (MultiVec > 0)
4342 for (unsigned I = 0; I < MultiVec; ++I)
4343 std::swap(a&: Ops[I + 1], b&: Ops[I + 1 + MultiVec]);
4344}
4345
4346Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
4347 const CallExpr *E) {
4348 auto *Builtin =
4349 findARMVectorIntrinsicInMap(IntrinsicMap: ArrayRef(AArch64SMEIntrinsicMap), BuiltinID,
4350 MapProvenSorted&: AArch64SMEIntrinsicsProvenSorted);
4351
4352 llvm::SmallVector<Value *, 4> Ops;
4353 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4354 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4355
4356 if (TypeFlags.isLoad() || TypeFlags.isStore())
4357 return EmitSMELd1St1(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4358 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4359 return EmitSMEReadWrite(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4360 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4361 BuiltinID == SME::BI__builtin_sme_svzero_za)
4362 return EmitSMEZero(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4363 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4364 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4365 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4366 BuiltinID == SME::BI__builtin_sme_svstr_za)
4367 return EmitSMELdrStr(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4368
4369 // Emit set FPMR for intrinsics that require it
4370 if (TypeFlags.setsFPMR())
4371 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
4372 Args: Ops.pop_back_val());
4373 // Handle builtins which require their multi-vector operands to be swapped
4374 swapCommutativeSMEOperands(BuiltinID, Ops);
4375
4376 auto isCntsBuiltin = [&]() {
4377 switch (BuiltinID) {
4378 default:
4379 return 0;
4380 case SME::BI__builtin_sme_svcntsb:
4381 return 8;
4382 case SME::BI__builtin_sme_svcntsh:
4383 return 4;
4384 case SME::BI__builtin_sme_svcntsw:
4385 return 2;
4386 }
4387 };
4388
4389 if (auto Mul = isCntsBuiltin()) {
4390 llvm::Value *Cntd =
4391 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsd));
4392 return Builder.CreateMul(LHS: Cntd, RHS: llvm::ConstantInt::get(Ty: Int64Ty, V: Mul),
4393 Name: "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4394 }
4395
4396 // Should not happen!
4397 if (Builtin->LLVMIntrinsic == 0)
4398 return nullptr;
4399
4400 // Predicates must match the main datatype.
4401 for (Value *&Op : Ops)
4402 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
4403 if (PredTy->getElementType()->isIntegerTy(BitWidth: 1))
4404 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
4405
4406 if (BuiltinID == SME::BI__builtin_sme_svldr_zt ||
4407 BuiltinID == SME::BI__builtin_sme_svstr_zt) {
4408 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: Ops[1]->getType());
4409 return Builder.CreateCall(Callee: F, Args: Ops);
4410 }
4411
4412 Function *F =
4413 TypeFlags.isOverloadNone()
4414 ? CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic)
4415 : CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: {getSVEType(TypeFlags)});
4416
4417 return Builder.CreateCall(Callee: F, Args: Ops);
4418}
4419
4420/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4421/// return it as an i8 pointer.
4422Value *readX18AsPtr(CodeGenFunction &CGF) {
4423 LLVMContext &Context = CGF.CGM.getLLVMContext();
4424 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
4425 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
4426 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
4427 llvm::Function *F =
4428 CGF.CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {CGF.Int64Ty});
4429 llvm::Value *X18 = CGF.Builder.CreateCall(Callee: F, Args: Metadata);
4430 return CGF.Builder.CreateIntToPtr(V: X18, DestTy: CGF.Int8PtrTy);
4431}
4432
4433Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4434 const CallExpr *E,
4435 llvm::Triple::ArchType Arch) {
4436 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4437 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4438 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4439
4440 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4441 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4442 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4443
4444 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4445 return EmitAArch64CpuSupports(E);
4446
4447 unsigned HintID = static_cast<unsigned>(-1);
4448 switch (BuiltinID) {
4449 default: break;
4450 case clang::AArch64::BI__builtin_arm_nop:
4451 HintID = 0;
4452 break;
4453 case clang::AArch64::BI__builtin_arm_yield:
4454 case clang::AArch64::BI__yield:
4455 HintID = 1;
4456 break;
4457 case clang::AArch64::BI__builtin_arm_wfe:
4458 case clang::AArch64::BI__wfe:
4459 HintID = 2;
4460 break;
4461 case clang::AArch64::BI__builtin_arm_wfi:
4462 case clang::AArch64::BI__wfi:
4463 HintID = 3;
4464 break;
4465 case clang::AArch64::BI__builtin_arm_sev:
4466 case clang::AArch64::BI__sev:
4467 HintID = 4;
4468 break;
4469 case clang::AArch64::BI__builtin_arm_sevl:
4470 case clang::AArch64::BI__sevl:
4471 HintID = 5;
4472 break;
4473 }
4474
4475 if (HintID != static_cast<unsigned>(-1)) {
4476 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hint);
4477 return Builder.CreateCall(Callee: F, Args: llvm::ConstantInt::get(Ty: Int32Ty, V: HintID));
4478 }
4479
4480 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
4481 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
4482 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4483 return Builder.CreateCall(Callee: F, Args: Builder.CreateZExt(V: Arg, DestTy: CGM.Int32Ty));
4484 }
4485
4486 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
4487 // Create call to __arm_sme_state and store the results to the two pointers.
4488 CallInst *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
4489 Ty: llvm::FunctionType::get(Result: StructType::get(elt1: CGM.Int64Ty, elts: CGM.Int64Ty), Params: {},
4490 isVarArg: false),
4491 Name: "__arm_sme_state"));
4492 auto Attrs = AttributeList().addFnAttribute(C&: getLLVMContext(),
4493 Kind: "aarch64_pstate_sm_compatible");
4494 CI->setAttributes(Attrs);
4495 CI->setCallingConv(
4496 llvm::CallingConv::
4497 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
4498 Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 0),
4499 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
4500 return Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 1),
4501 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)));
4502 }
4503
4504 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
4505 assert((getContext().getTypeSize(E->getType()) == 32) &&
4506 "rbit of unusual size!");
4507 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4508 return Builder.CreateCall(
4509 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
4510 }
4511 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
4512 assert((getContext().getTypeSize(E->getType()) == 64) &&
4513 "rbit of unusual size!");
4514 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4515 return Builder.CreateCall(
4516 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
4517 }
4518
4519 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
4520 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
4521 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4522 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
4523 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
4524 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
4525 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
4526 return Res;
4527 }
4528
4529 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
4530 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4531 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls), Args: Arg,
4532 Name: "cls");
4533 }
4534 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
4535 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4536 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64), Args: Arg,
4537 Name: "cls");
4538 }
4539
4540 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
4541 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
4542 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4543 llvm::Type *Ty = Arg->getType();
4544 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32z, Tys: Ty),
4545 Args: Arg, Name: "frint32z");
4546 }
4547
4548 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
4549 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
4550 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4551 llvm::Type *Ty = Arg->getType();
4552 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64z, Tys: Ty),
4553 Args: Arg, Name: "frint64z");
4554 }
4555
4556 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
4557 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
4558 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4559 llvm::Type *Ty = Arg->getType();
4560 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32x, Tys: Ty),
4561 Args: Arg, Name: "frint32x");
4562 }
4563
4564 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
4565 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
4566 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4567 llvm::Type *Ty = Arg->getType();
4568 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64x, Tys: Ty),
4569 Args: Arg, Name: "frint64x");
4570 }
4571
4572 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
4573 assert((getContext().getTypeSize(E->getType()) == 32) &&
4574 "__jcvt of unusual size!");
4575 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
4576 return Builder.CreateCall(
4577 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_fjcvtzs), Args: Arg);
4578 }
4579
4580 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
4581 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
4582 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
4583 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
4584 llvm::Value *MemAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
4585 llvm::Value *ValPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
4586
4587 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
4588 // Load from the address via an LLVM intrinsic, receiving a
4589 // tuple of 8 i64 words, and store each one to ValPtr.
4590 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_ld64b);
4591 llvm::Value *Val = Builder.CreateCall(Callee: F, Args: MemAddr);
4592 llvm::Value *ToRet;
4593 for (size_t i = 0; i < 8; i++) {
4594 llvm::Value *ValOffsetPtr =
4595 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
4596 Address Addr =
4597 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
4598 ToRet = Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: Val, Idxs: i), Addr);
4599 }
4600 return ToRet;
4601 }
4602
4603 // Load 8 i64 words from ValPtr, and store them to the address
4604 // via an LLVM intrinsic.
4605 SmallVector<llvm::Value *, 9> Args;
4606 Args.push_back(Elt: MemAddr);
4607 for (size_t i = 0; i < 8; i++) {
4608 llvm::Value *ValOffsetPtr =
4609 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
4610 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
4611 Args.push_back(Elt: Builder.CreateLoad(Addr));
4612 }
4613
4614 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
4615 ? Intrinsic::aarch64_st64b
4616 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
4617 ? Intrinsic::aarch64_st64bv
4618 : Intrinsic::aarch64_st64bv0);
4619 Function *F = CGM.getIntrinsic(IID: Intr);
4620 return Builder.CreateCall(Callee: F, Args);
4621 }
4622
4623 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
4624 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
4625
4626 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
4627 ? Intrinsic::aarch64_rndr
4628 : Intrinsic::aarch64_rndrrs);
4629 Function *F = CGM.getIntrinsic(IID: Intr);
4630 llvm::Value *Val = Builder.CreateCall(Callee: F);
4631 Value *RandomValue = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
4632 Value *Status = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
4633
4634 Address MemAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4635 Builder.CreateStore(Val: RandomValue, Addr: MemAddress);
4636 Status = Builder.CreateZExt(V: Status, DestTy: Int32Ty);
4637 return Status;
4638 }
4639
4640 if (BuiltinID == clang::AArch64::BI__clear_cache) {
4641 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4642 const FunctionDecl *FD = E->getDirectCallee();
4643 Value *Ops[2];
4644 for (unsigned i = 0; i < 2; i++)
4645 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
4646 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
4647 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
4648 StringRef Name = FD->getName();
4649 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
4650 }
4651
4652 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4653 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
4654 getContext().getTypeSize(T: E->getType()) == 128) {
4655 Function *F =
4656 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4657 ? Intrinsic::aarch64_ldaxp
4658 : Intrinsic::aarch64_ldxp);
4659
4660 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
4661 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldxp");
4662
4663 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
4664 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
4665 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
4666 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int128Ty);
4667 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int128Ty);
4668
4669 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int128Ty, V: 64);
4670 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
4671 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
4672 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
4673 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
4674 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
4675 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
4676
4677 QualType Ty = E->getType();
4678 llvm::Type *RealResTy = ConvertType(T: Ty);
4679 llvm::Type *IntTy =
4680 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
4681
4682 Function *F =
4683 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
4684 ? Intrinsic::aarch64_ldaxr
4685 : Intrinsic::aarch64_ldxr,
4686 Tys: DefaultPtrTy);
4687 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldxr");
4688 Val->addParamAttr(
4689 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
4690
4691 if (RealResTy->isPointerTy())
4692 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
4693
4694 llvm::Type *IntResTy = llvm::IntegerType::get(
4695 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
4696 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
4697 DestTy: RealResTy);
4698 }
4699
4700 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4701 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
4702 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 128) {
4703 Function *F =
4704 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4705 ? Intrinsic::aarch64_stlxp
4706 : Intrinsic::aarch64_stxp);
4707 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty, elts: Int64Ty);
4708
4709 Address Tmp = CreateMemTempWithoutCast(T: E->getArg(Arg: 0)->getType());
4710 EmitAnyExprToMem(E: E->getArg(Arg: 0), Location: Tmp, Quals: Qualifiers(), /*init*/ IsInitializer: true);
4711
4712 Tmp = Tmp.withElementType(ElemTy: STy);
4713 llvm::Value *Val = Builder.CreateLoad(Addr: Tmp);
4714
4715 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
4716 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
4717 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
4718 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "stxp");
4719 }
4720
4721 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
4722 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
4723 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
4724 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
4725
4726 QualType Ty = E->getArg(Arg: 0)->getType();
4727 llvm::Type *StoreTy =
4728 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
4729
4730 if (StoreVal->getType()->isPointerTy())
4731 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int64Ty);
4732 else {
4733 llvm::Type *IntTy = llvm::IntegerType::get(
4734 C&: getLLVMContext(),
4735 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
4736 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
4737 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int64Ty);
4738 }
4739
4740 Function *F =
4741 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
4742 ? Intrinsic::aarch64_stlxr
4743 : Intrinsic::aarch64_stxr,
4744 Tys: StoreAddr->getType());
4745 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "stxr");
4746 CI->addParamAttr(
4747 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
4748 return CI;
4749 }
4750
4751 if (BuiltinID == clang::AArch64::BI__getReg ||
4752 BuiltinID == clang::AArch64::BI__setReg) {
4753 Expr::EvalResult Result;
4754 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
4755 llvm_unreachable("Sema will ensure that the parameter is constant");
4756
4757 llvm::APSInt Value = Result.Val.getInt();
4758 LLVMContext &Context = CGM.getLLVMContext();
4759 std::string Reg = Value == 31 ? "sp" : "x" + toString(I: Value, Radix: 10);
4760
4761 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
4762 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
4763 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
4764
4765 CallInst *CI;
4766 if (BuiltinID == clang::AArch64::BI__getReg) {
4767 llvm::Function *F =
4768 CGM.getIntrinsic(IID: Intrinsic::read_volatile_register, Tys: {Int64Ty});
4769 CI = Builder.CreateCall(Callee: F, Args: Metadata);
4770 } else {
4771 llvm::Function *F =
4772 CGM.getIntrinsic(IID: Intrinsic::write_volatile_register, Tys: {Int64Ty});
4773 CI = Builder.CreateCall(Callee: F, Args: {Metadata, EmitScalarExpr(E: E->getArg(Arg: 1))});
4774 }
4775 return CI;
4776 }
4777
4778 if (BuiltinID == clang::AArch64::BI__getRegFp ||
4779 BuiltinID == clang::AArch64::BI__setRegFp) {
4780 Expr::EvalResult Result;
4781 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
4782 llvm_unreachable("Sema will ensure that the parameter is constant");
4783
4784 llvm::APSInt Value = Result.Val.getInt();
4785 LLVMContext &Context = CGM.getLLVMContext();
4786 std::string Reg = "d" + toString(I: Value, Radix: 10);
4787
4788 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
4789 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
4790 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
4791
4792 llvm::Value *Ret;
4793 if (BuiltinID == clang::AArch64::BI__getRegFp) {
4794 llvm::Function *F =
4795 CGM.getIntrinsic(IID: Intrinsic::read_volatile_register, Tys: {Int64Ty});
4796 llvm::Value *Bits = Builder.CreateCall(Callee: F, Args: Metadata);
4797 Ret = Builder.CreateBitCast(V: Bits, DestTy: llvm::Type::getDoubleTy(C&: Context));
4798 } else {
4799 llvm::Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
4800 llvm::Value *Bits = Builder.CreateBitCast(V: Val, DestTy: Int64Ty);
4801 llvm::Function *F =
4802 CGM.getIntrinsic(IID: Intrinsic::write_volatile_register, Tys: {Int64Ty});
4803 Ret = Builder.CreateCall(Callee: F, Args: {Metadata, Bits});
4804 }
4805 return Ret;
4806 }
4807
4808 if (BuiltinID == clang::AArch64::BI__break) {
4809 Expr::EvalResult Result;
4810 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
4811 llvm_unreachable("Sema will ensure that the parameter is constant");
4812
4813 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
4814 return Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
4815 }
4816
4817 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
4818 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_clrex);
4819 return Builder.CreateCall(Callee: F);
4820 }
4821
4822 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
4823 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
4824 SSID: llvm::SyncScope::SingleThread);
4825
4826 // CRC32
4827 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4828 switch (BuiltinID) {
4829 case clang::AArch64::BI__builtin_arm_crc32b:
4830 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4831 case clang::AArch64::BI__builtin_arm_crc32cb:
4832 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4833 case clang::AArch64::BI__builtin_arm_crc32h:
4834 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4835 case clang::AArch64::BI__builtin_arm_crc32ch:
4836 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4837 case clang::AArch64::BI__builtin_arm_crc32w:
4838 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4839 case clang::AArch64::BI__builtin_arm_crc32cw:
4840 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4841 case clang::AArch64::BI__builtin_arm_crc32d:
4842 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4843 case clang::AArch64::BI__builtin_arm_crc32cd:
4844 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4845 }
4846
4847 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4848 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
4849 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
4850 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
4851
4852 llvm::Type *DataTy = F->getFunctionType()->getParamType(i: 1);
4853 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: DataTy);
4854
4855 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
4856 }
4857
4858 // Memory Operations (MOPS)
4859 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
4860 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
4861 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
4862 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
4863 Val = Builder.CreateTrunc(V: Val, DestTy: Int8Ty);
4864 Size = Builder.CreateIntCast(V: Size, DestTy: Int64Ty, isSigned: false);
4865 return Builder.CreateCall(
4866 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_mops_memset_tag), Args: {Dst, Val, Size});
4867 }
4868
4869 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
4870 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
4871 return EmitRangePrefetchBuiltin(CGF&: *this, BuiltinID, E);
4872
4873 // Memory Tagging Extensions (MTE) Intrinsics
4874 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
4875 switch (BuiltinID) {
4876 case clang::AArch64::BI__builtin_arm_irg:
4877 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
4878 case clang::AArch64::BI__builtin_arm_addg:
4879 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
4880 case clang::AArch64::BI__builtin_arm_gmi:
4881 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
4882 case clang::AArch64::BI__builtin_arm_ldg:
4883 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
4884 case clang::AArch64::BI__builtin_arm_stg:
4885 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
4886 case clang::AArch64::BI__builtin_arm_subp:
4887 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
4888 }
4889
4890 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
4891 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
4892 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
4893 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 1));
4894 assert(Mask->getType()->getScalarSizeInBits() == 64 &&
4895 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4896 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
4897 Args: {Pointer, Mask});
4898 }
4899 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
4900 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
4901 Value *TagOffset = EmitScalarExpr(E: E->getArg(Arg: 1));
4902
4903 TagOffset = Builder.CreateZExt(V: TagOffset, DestTy: Int64Ty);
4904 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
4905 Args: {Pointer, TagOffset});
4906 }
4907 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
4908 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
4909 Value *ExcludedMask = EmitScalarExpr(E: E->getArg(Arg: 1));
4910 assert(ExcludedMask->getType()->getScalarSizeInBits() == 64 &&
4911 "SemaARM::BuiltinARMMemoryTaggingCall() enforces this");
4912 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
4913 Args: {Pointer, ExcludedMask});
4914 }
4915 // Although it is possible to supply a different return
4916 // address (first arg) to this intrinsic, for now we set
4917 // return address same as input address.
4918 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
4919 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
4920 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
4921 Args: {TagAddress, TagAddress});
4922 }
4923 // Although it is possible to supply a different tag (to set)
4924 // to this intrinsic (as first arg), for now we supply
4925 // the tag that is in input address arg (common use case).
4926 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
4927 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
4928 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
4929 Args: {TagAddress, TagAddress});
4930 }
4931 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
4932 Value *PointerA = EmitScalarExpr(E: E->getArg(Arg: 0));
4933 Value *PointerB = EmitScalarExpr(E: E->getArg(Arg: 1));
4934 return Builder.CreateCall(
4935 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {PointerA, PointerB});
4936 }
4937 }
4938
4939 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4940 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4941 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4942 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4943 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
4944 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
4945 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
4946 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
4947
4948 SpecialRegisterAccessKind AccessKind = Write;
4949 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4950 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
4951 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4952 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
4953 AccessKind = VolatileRead;
4954
4955 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
4956 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
4957
4958 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
4959 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
4960
4961 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
4962 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
4963
4964 llvm::Type *ValueType;
4965 llvm::Type *RegisterType = Int64Ty;
4966 if (Is32Bit) {
4967 ValueType = Int32Ty;
4968 } else if (Is128Bit) {
4969 llvm::Type *Int128Ty =
4970 llvm::IntegerType::getInt128Ty(C&: CGM.getLLVMContext());
4971 ValueType = Int128Ty;
4972 RegisterType = Int128Ty;
4973 } else if (IsPointerBuiltin) {
4974 ValueType = VoidPtrTy;
4975 } else {
4976 ValueType = Int64Ty;
4977 };
4978
4979 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
4980 AccessKind);
4981 }
4982
4983 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
4984 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
4985 LLVMContext &Context = CGM.getLLVMContext();
4986
4987 unsigned SysReg =
4988 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
4989
4990 std::string SysRegStr;
4991 llvm::raw_string_ostream(SysRegStr)
4992 << (0b10 | SysReg >> 14) << ":" << ((SysReg >> 11) & 7) << ":"
4993 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
4994 << (SysReg & 7);
4995
4996 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysRegStr) };
4997 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
4998 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
4999
5000 llvm::Type *RegisterType = Int64Ty;
5001 llvm::Type *Types[] = { RegisterType };
5002
5003 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5004 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: Types);
5005
5006 return Builder.CreateCall(Callee: F, Args: Metadata);
5007 }
5008
5009 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
5010 llvm::Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
5011 llvm::Value *Result = Builder.CreateCall(Callee: F, Args: {Metadata, ArgValue});
5012
5013 return Result;
5014 }
5015
5016 if (BuiltinID == clang::AArch64::BI__sys) {
5017 unsigned SysReg =
5018 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
5019 const unsigned Op1 = SysReg >> 11;
5020 const unsigned CRn = (SysReg >> 7) & 0xf;
5021 const unsigned CRm = (SysReg >> 3) & 0xf;
5022 const unsigned Op2 = SysReg & 0x7;
5023
5024 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_sys),
5025 Args: {Builder.getInt32(C: Op1), Builder.getInt32(C: CRn),
5026 Builder.getInt32(C: CRm), Builder.getInt32(C: Op2),
5027 EmitScalarExpr(E: E->getArg(Arg: 1))});
5028
5029 // Return 0 for convenience, even though MSVC returns some other undefined
5030 // value.
5031 return ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5032 }
5033
5034 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5035 llvm::Function *F =
5036 CGM.getIntrinsic(IID: Intrinsic::addressofreturnaddress, Tys: AllocaInt8PtrTy);
5037 return Builder.CreateCall(Callee: F);
5038 }
5039
5040 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5041 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
5042 return Builder.CreateCall(Callee: F);
5043 }
5044
5045 if (BuiltinID == clang::AArch64::BI__mulh ||
5046 BuiltinID == clang::AArch64::BI__umulh) {
5047 llvm::Type *ResType = ConvertType(T: E->getType());
5048 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5049
5050 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5051 Value *LHS =
5052 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int128Ty, isSigned: IsSigned);
5053 Value *RHS =
5054 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int128Ty, isSigned: IsSigned);
5055
5056 Value *MulResult, *HigherBits;
5057 if (IsSigned) {
5058 MulResult = Builder.CreateNSWMul(LHS, RHS);
5059 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
5060 } else {
5061 MulResult = Builder.CreateNUWMul(LHS, RHS);
5062 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
5063 }
5064 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
5065
5066 return HigherBits;
5067 }
5068
5069 if (BuiltinID == AArch64::BI__writex18byte ||
5070 BuiltinID == AArch64::BI__writex18word ||
5071 BuiltinID == AArch64::BI__writex18dword ||
5072 BuiltinID == AArch64::BI__writex18qword) {
5073 // Process the args first
5074 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5075 Value *DataArg = EmitScalarExpr(E: E->getArg(Arg: 1));
5076
5077 // Read x18 as i8*
5078 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5079
5080 // Store val at x18 + offset
5081 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5082 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5083 StoreInst *Store =
5084 Builder.CreateAlignedStore(Val: DataArg, Addr: Ptr, Align: CharUnits::One());
5085 return Store;
5086 }
5087
5088 if (BuiltinID == AArch64::BI__readx18byte ||
5089 BuiltinID == AArch64::BI__readx18word ||
5090 BuiltinID == AArch64::BI__readx18dword ||
5091 BuiltinID == AArch64::BI__readx18qword) {
5092 // Process the args first
5093 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5094
5095 // Read x18 as i8*
5096 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5097
5098 // Load x18 + offset
5099 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5100 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5101 llvm::Type *IntTy = ConvertType(T: E->getType());
5102 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5103 return Load;
5104 }
5105
5106 if (BuiltinID == AArch64::BI__addx18byte ||
5107 BuiltinID == AArch64::BI__addx18word ||
5108 BuiltinID == AArch64::BI__addx18dword ||
5109 BuiltinID == AArch64::BI__addx18qword ||
5110 BuiltinID == AArch64::BI__incx18byte ||
5111 BuiltinID == AArch64::BI__incx18word ||
5112 BuiltinID == AArch64::BI__incx18dword ||
5113 BuiltinID == AArch64::BI__incx18qword) {
5114 llvm::Type *IntTy;
5115 bool isIncrement;
5116 switch (BuiltinID) {
5117 case AArch64::BI__incx18byte:
5118 IntTy = Int8Ty;
5119 isIncrement = true;
5120 break;
5121 case AArch64::BI__incx18word:
5122 IntTy = Int16Ty;
5123 isIncrement = true;
5124 break;
5125 case AArch64::BI__incx18dword:
5126 IntTy = Int32Ty;
5127 isIncrement = true;
5128 break;
5129 case AArch64::BI__incx18qword:
5130 IntTy = Int64Ty;
5131 isIncrement = true;
5132 break;
5133 default:
5134 IntTy = ConvertType(T: E->getArg(Arg: 1)->getType());
5135 isIncrement = false;
5136 break;
5137 }
5138 // Process the args first
5139 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5140 Value *ValToAdd =
5141 isIncrement ? ConstantInt::get(Ty: IntTy, V: 1) : EmitScalarExpr(E: E->getArg(Arg: 1));
5142
5143 // Read x18 as i8*
5144 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5145
5146 // Load x18 + offset
5147 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5148 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5149 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5150
5151 // Add values
5152 Value *AddResult = Builder.CreateAdd(LHS: Load, RHS: ValToAdd);
5153
5154 // Store val at x18 + offset
5155 StoreInst *Store =
5156 Builder.CreateAlignedStore(Val: AddResult, Addr: Ptr, Align: CharUnits::One());
5157 return Store;
5158 }
5159
5160 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5161 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5162 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5163 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5164 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5165 llvm::Type *RetTy = ConvertType(T: E->getType());
5166 return Builder.CreateBitCast(V: Arg, DestTy: RetTy);
5167 }
5168
5169 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5170 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5171 BuiltinID == AArch64::BI_CountLeadingZeros ||
5172 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5173 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5174 llvm::Type *ArgType = Arg->getType();
5175
5176 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5177 BuiltinID == AArch64::BI_CountLeadingOnes64)
5178 Arg = Builder.CreateXor(LHS: Arg, RHS: Constant::getAllOnesValue(Ty: ArgType));
5179
5180 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: ArgType);
5181 Value *Result = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5182
5183 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5184 BuiltinID == AArch64::BI_CountLeadingZeros64)
5185 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5186 return Result;
5187 }
5188
5189 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5190 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5191 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5192
5193 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5194 ? CGM.getIntrinsic(IID: Intrinsic::aarch64_cls)
5195 : CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64);
5196
5197 Value *Result = Builder.CreateCall(Callee: F, Args: Arg, Name: "cls");
5198 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5199 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5200 return Result;
5201 }
5202
5203 if (BuiltinID == AArch64::BI_CountOneBits ||
5204 BuiltinID == AArch64::BI_CountOneBits64) {
5205 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
5206 llvm::Type *ArgType = ArgValue->getType();
5207 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctpop, Tys: ArgType);
5208
5209 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
5210 if (BuiltinID == AArch64::BI_CountOneBits64)
5211 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5212 return Result;
5213 }
5214
5215 if (BuiltinID == AArch64::BI_CountTrailingZeros ||
5216 BuiltinID == AArch64::BI_CountTrailingZeros64) {
5217 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
5218 llvm::Type *ArgType = ArgValue->getType();
5219 Function *F = CGM.getIntrinsic(IID: Intrinsic::cttz, Tys: ArgType);
5220
5221 // MSVC leaves 0 undefined; use false for predictable codegen
5222 Value *Result = Builder.CreateCall(Callee: F, Args: {ArgValue, Builder.getInt1(V: false)});
5223 if (BuiltinID == AArch64::BI_CountTrailingZeros64)
5224 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5225 return Result;
5226 }
5227
5228 if (BuiltinID == AArch64::BI__prefetch) {
5229 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
5230 Value *RW = llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
5231 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: 3);
5232 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
5233 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
5234 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
5235 }
5236
5237 if (BuiltinID == AArch64::BI__prefetch2) {
5238 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
5239 llvm::APSInt PrfOp = E->getArg(Arg: 1)->EvaluateKnownConstInt(Ctx: CGM.getContext());
5240 // Decode 5-bit PRFM encoding: bits[4:3]=type, bits[2:1]=target,
5241 // bit[0]=policy
5242 // type: PLD=0(load), PLI=1(instr), PST=2(store)
5243 // target: L1=0, L2=1, L3=2
5244 // policy: KEEP=0, STRM=1
5245 uint64_t Op = PrfOp.getZExtValue();
5246 uint64_t Type = (Op >> 3) & 0x3;
5247 uint64_t Target = (Op >> 1) & 0x3;
5248 uint64_t Policy = Op & 0x1;
5249 Value *RW = Builder.getInt32(C: Type == 2 ? 1 : 0);
5250 Value *Local = Builder.getInt32(C: Target);
5251 Value *IsStream = Builder.getInt32(C: Policy);
5252 Value *IsData = Builder.getInt32(C: Type == 1 ? 0 : 1);
5253 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_prefetch);
5254 return Builder.CreateCall(Callee: F, Args: {Address, RW, Local, IsStream, IsData});
5255 }
5256
5257 if (BuiltinID == AArch64::BI__hlt) {
5258 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hlt);
5259 Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5260
5261 // Return 0 for convenience, even though MSVC returns some other undefined
5262 // value.
5263 return ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5264 }
5265
5266 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5267 return Builder.CreateFPTrunc(
5268 V: Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)),
5269 DestTy: Builder.getFloatTy()),
5270 DestTy: Builder.getBFloatTy());
5271
5272 // Handle MSVC intrinsics before argument evaluation to prevent double
5273 // evaluation.
5274 if (std::optional<MSVCIntrin> MsvcIntId =
5275 translateAarch64ToMsvcIntrin(BuiltinID))
5276 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
5277
5278 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5279 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
5280 return P.first == BuiltinID;
5281 });
5282 if (It != end(arr: NEONEquivalentIntrinsicMap))
5283 BuiltinID = It->second;
5284
5285 // Check whether this is an SISD builtin.
5286 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5287 const ARMNeonVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5288 IntrinsicMap: SISDMap, BuiltinID, MapProvenSorted&: AArch64SISDIntrinsicsProvenSorted);
5289 bool IsSISD = (Builtin != nullptr);
5290
5291 // Find out if any arguments are required to be integer constant
5292 // expressions.
5293 unsigned ICEArguments = 0;
5294 ASTContext::GetBuiltinTypeError Error;
5295 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
5296 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5297
5298 llvm::SmallVector<Value*, 4> Ops;
5299 Address PtrOp0 = Address::invalid();
5300 // Note the assumption that SISD intrinsics do not contain extra arguments.
5301 // TODO: Fold this into a single function call instead of, effectively, two
5302 // separate checks.
5303 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5304 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5305 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5306 if (i == 0) {
5307 switch (BuiltinID) {
5308 case NEON::BI__builtin_neon_vld1_v:
5309 case NEON::BI__builtin_neon_vld1q_v:
5310 case NEON::BI__builtin_neon_vld1_dup_v:
5311 case NEON::BI__builtin_neon_vld1q_dup_v:
5312 case NEON::BI__builtin_neon_vld1_lane_v:
5313 case NEON::BI__builtin_neon_vld1q_lane_v:
5314 case NEON::BI__builtin_neon_vst1_v:
5315 case NEON::BI__builtin_neon_vst1q_v:
5316 case NEON::BI__builtin_neon_vst1_lane_v:
5317 case NEON::BI__builtin_neon_vst1q_lane_v:
5318 case NEON::BI__builtin_neon_vldap1_lane_s64:
5319 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5320 case NEON::BI__builtin_neon_vstl1_lane_s64:
5321 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5322 // Get the alignment for the argument in addition to the value;
5323 // we'll use it later.
5324 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5325 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
5326 continue;
5327 }
5328 }
5329 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
5330 }
5331
5332 if (Builtin) {
5333 Value *Result = EmitCommonNeonSISDBuiltinExpr(CGF&: *this, SISDInfo: *Builtin, Ops, E);
5334 assert(Result && "SISD intrinsic should have been handled");
5335 return Result;
5336 }
5337
5338 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
5339 NeonTypeFlags Type(0);
5340 if (std::optional<llvm::APSInt> Result =
5341 Arg->getIntegerConstantExpr(Ctx: getContext()))
5342 // Determine the type of this overloaded NEON intrinsic.
5343 Type = NeonTypeFlags(Result->getZExtValue());
5344
5345 bool usgn = Type.isUnsigned();
5346 bool quad = Type.isQuad();
5347 unsigned Int;
5348
5349 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5350 // defer to common code if it's been added to our special map.
5351 Builtin =
5352 findARMVectorIntrinsicInMap(IntrinsicMap: ArrayRef(AArch64SIMDIntrinsicMap), BuiltinID,
5353 MapProvenSorted&: AArch64SIMDIntrinsicsProvenSorted);
5354
5355 if (Builtin)
5356 return EmitCommonNeonBuiltinExpr(
5357 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
5358 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops,
5359 /*never use addresses*/ PtrOp0: Address::invalid(), PtrOp1: Address::invalid(), Arch);
5360
5361 if (Value *V = EmitAArch64TblBuiltinExpr(CGF&: *this, BuiltinID, E, Ops, Arch))
5362 return V;
5363
5364 // Handle non-overloaded intrinsics first.
5365 switch (BuiltinID) {
5366 default: break;
5367 case NEON::BI__builtin_neon_vabsh_f16:
5368 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: HalfTy), Ops, name: "vabs");
5369 case NEON::BI__builtin_neon_vaddq_p128: {
5370 llvm::Type *Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags::Poly128);
5371 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
5372 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
5373 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
5374 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5375 return Builder.CreateBitCast(V: Ops[0], DestTy: Int128Ty);
5376 }
5377 case NEON::BI__builtin_neon_vldrq_p128: {
5378 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5379 return Builder.CreateAlignedLoad(Ty: Int128Ty, Addr: Ops[0],
5380 Align: CharUnits::fromQuantity(Quantity: 16));
5381 }
5382 case NEON::BI__builtin_neon_vstrq_p128: {
5383 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
5384 }
5385 case NEON::BI__builtin_neon_vcvts_f32_u32:
5386 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5387 usgn = true;
5388 [[fallthrough]];
5389 case NEON::BI__builtin_neon_vcvts_f32_s32:
5390 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5391 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5392 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5393 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5394 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5395 if (usgn)
5396 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5397 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5398 }
5399 case NEON::BI__builtin_neon_vcvth_f16_u16:
5400 case NEON::BI__builtin_neon_vcvth_f16_u32:
5401 case NEON::BI__builtin_neon_vcvth_f16_u64:
5402 usgn = true;
5403 [[fallthrough]];
5404 case NEON::BI__builtin_neon_vcvth_f16_s16:
5405 case NEON::BI__builtin_neon_vcvth_f16_s32:
5406 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5407 llvm::Type *FTy = HalfTy;
5408 llvm::Type *InTy;
5409 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5410 InTy = Int64Ty;
5411 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5412 InTy = Int32Ty;
5413 else
5414 InTy = Int16Ty;
5415 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5416 if (usgn)
5417 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5418 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5419 }
5420 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5421 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5422 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5423 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5424 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5425 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5426 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5427 case NEON::BI__builtin_neon_vcvtph_s16_f16: {
5428 llvm::Type *InTy = Int16Ty;
5429 llvm::Type* FTy = HalfTy;
5430 llvm::Type *Tys[2] = {InTy, FTy};
5431 switch (BuiltinID) {
5432 default: llvm_unreachable("missing builtin ID in switch!");
5433 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5434 Int = Intrinsic::aarch64_neon_fcvtau; break;
5435 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5436 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5437 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5438 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5439 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5440 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5441 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5442 Int = Intrinsic::aarch64_neon_fcvtas; break;
5443 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5444 Int = Intrinsic::aarch64_neon_fcvtms; break;
5445 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5446 Int = Intrinsic::aarch64_neon_fcvtns; break;
5447 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5448 Int = Intrinsic::aarch64_neon_fcvtps; break;
5449 }
5450 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvt");
5451 }
5452 case NEON::BI__builtin_neon_vcaleh_f16:
5453 case NEON::BI__builtin_neon_vcalth_f16:
5454 case NEON::BI__builtin_neon_vcageh_f16:
5455 case NEON::BI__builtin_neon_vcagth_f16: {
5456 llvm::Type* InTy = Int32Ty;
5457 llvm::Type* FTy = HalfTy;
5458 llvm::Type *Tys[2] = {InTy, FTy};
5459 switch (BuiltinID) {
5460 default: llvm_unreachable("missing builtin ID in switch!");
5461 case NEON::BI__builtin_neon_vcageh_f16:
5462 Int = Intrinsic::aarch64_neon_facge; break;
5463 case NEON::BI__builtin_neon_vcagth_f16:
5464 Int = Intrinsic::aarch64_neon_facgt; break;
5465 case NEON::BI__builtin_neon_vcaleh_f16:
5466 Int = Intrinsic::aarch64_neon_facge; std::swap(a&: Ops[0], b&: Ops[1]); break;
5467 case NEON::BI__builtin_neon_vcalth_f16:
5468 Int = Intrinsic::aarch64_neon_facgt; std::swap(a&: Ops[0], b&: Ops[1]); break;
5469 }
5470 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "facg");
5471 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5472 }
5473 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5474 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5475 llvm::Type* InTy = Int32Ty;
5476 llvm::Type* FTy = HalfTy;
5477 llvm::Type *Tys[2] = {InTy, FTy};
5478 switch (BuiltinID) {
5479 default: llvm_unreachable("missing builtin ID in switch!");
5480 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5481 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5482 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5483 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5484 }
5485 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
5486 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5487 }
5488 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5489 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5490 llvm::Type* FTy = HalfTy;
5491 llvm::Type* InTy = Int32Ty;
5492 llvm::Type *Tys[2] = {FTy, InTy};
5493 switch (BuiltinID) {
5494 default: llvm_unreachable("missing builtin ID in switch!");
5495 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5496 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5497 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: InTy, Name: "sext");
5498 break;
5499 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5500 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5501 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: InTy);
5502 break;
5503 }
5504 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
5505 }
5506 case NEON::BI__builtin_neon_vpaddd_s64: {
5507 // TODO: Isn't this handled by
5508 // EmitCommonNeonSISDBuiltinExpr?
5509 auto *Ty = llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2);
5510 // The vector is v2f64, so make sure it's bitcast to that.
5511 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2i64");
5512 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5513 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5514 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
5515 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
5516 // Pairwise addition of a v2f64 into a scalar f64.
5517 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vpaddd");
5518 }
5519 case NEON::BI__builtin_neon_vpaddd_f64: {
5520 auto *Ty = llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2);
5521 // The vector is v2f64, so make sure it's bitcast to that.
5522 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2f64");
5523 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5524 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5525 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
5526 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
5527 // Pairwise addition of a v2f64 into a scalar f64.
5528 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
5529 }
5530 case NEON::BI__builtin_neon_vpadds_f32: {
5531 auto *Ty = llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2);
5532 // The vector is v2f32, so make sure it's bitcast to that.
5533 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2f32");
5534 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5535 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5536 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
5537 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
5538 // Pairwise addition of a v2f32 into a scalar f32.
5539 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
5540 }
5541 case NEON::BI__builtin_neon_vceqzd_s64:
5542 return EmitAArch64CompareBuiltinExpr(
5543 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5544 Pred: ICmpInst::ICMP_EQ, Name: "vceqz");
5545 case NEON::BI__builtin_neon_vceqzd_f64:
5546 case NEON::BI__builtin_neon_vceqzs_f32:
5547 case NEON::BI__builtin_neon_vceqzh_f16:
5548 return EmitAArch64CompareBuiltinExpr(
5549 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5550 Pred: ICmpInst::FCMP_OEQ, Name: "vceqz");
5551 case NEON::BI__builtin_neon_vcgezd_s64:
5552 return EmitAArch64CompareBuiltinExpr(
5553 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5554 Pred: ICmpInst::ICMP_SGE, Name: "vcgez");
5555 case NEON::BI__builtin_neon_vcgezd_f64:
5556 case NEON::BI__builtin_neon_vcgezs_f32:
5557 case NEON::BI__builtin_neon_vcgezh_f16:
5558 return EmitAArch64CompareBuiltinExpr(
5559 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5560 Pred: ICmpInst::FCMP_OGE, Name: "vcgez");
5561 case NEON::BI__builtin_neon_vclezd_s64:
5562 return EmitAArch64CompareBuiltinExpr(
5563 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5564 Pred: ICmpInst::ICMP_SLE, Name: "vclez");
5565 case NEON::BI__builtin_neon_vclezd_f64:
5566 case NEON::BI__builtin_neon_vclezs_f32:
5567 case NEON::BI__builtin_neon_vclezh_f16:
5568 return EmitAArch64CompareBuiltinExpr(
5569 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5570 Pred: ICmpInst::FCMP_OLE, Name: "vclez");
5571 case NEON::BI__builtin_neon_vcgtzd_s64:
5572 return EmitAArch64CompareBuiltinExpr(
5573 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5574 Pred: ICmpInst::ICMP_SGT, Name: "vcgtz");
5575 case NEON::BI__builtin_neon_vcgtzd_f64:
5576 case NEON::BI__builtin_neon_vcgtzs_f32:
5577 case NEON::BI__builtin_neon_vcgtzh_f16:
5578 return EmitAArch64CompareBuiltinExpr(
5579 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5580 Pred: ICmpInst::FCMP_OGT, Name: "vcgtz");
5581 case NEON::BI__builtin_neon_vcltzd_s64:
5582 return EmitAArch64CompareBuiltinExpr(
5583 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5584 Pred: ICmpInst::ICMP_SLT, Name: "vcltz");
5585
5586 case NEON::BI__builtin_neon_vcltzd_f64:
5587 case NEON::BI__builtin_neon_vcltzs_f32:
5588 case NEON::BI__builtin_neon_vcltzh_f16:
5589 return EmitAArch64CompareBuiltinExpr(
5590 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5591 Pred: ICmpInst::FCMP_OLT, Name: "vcltz");
5592
5593 case NEON::BI__builtin_neon_vceqzd_u64: {
5594 return EmitAArch64CompareBuiltinExpr(
5595 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5596 Pred: ICmpInst::ICMP_EQ, Name: "vceqzd");
5597 }
5598 case NEON::BI__builtin_neon_vceqd_f64:
5599 case NEON::BI__builtin_neon_vcled_f64:
5600 case NEON::BI__builtin_neon_vcltd_f64:
5601 case NEON::BI__builtin_neon_vcged_f64:
5602 case NEON::BI__builtin_neon_vcgtd_f64: {
5603 llvm::CmpInst::Predicate P;
5604 switch (BuiltinID) {
5605 default: llvm_unreachable("missing builtin ID in switch!");
5606 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5607 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5608 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5609 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5610 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5611 }
5612 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
5613 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
5614 if (P == llvm::FCmpInst::FCMP_OEQ)
5615 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
5616 else
5617 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
5618 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vcmpd");
5619 }
5620 case NEON::BI__builtin_neon_vceqs_f32:
5621 case NEON::BI__builtin_neon_vcles_f32:
5622 case NEON::BI__builtin_neon_vclts_f32:
5623 case NEON::BI__builtin_neon_vcges_f32:
5624 case NEON::BI__builtin_neon_vcgts_f32: {
5625 llvm::CmpInst::Predicate P;
5626 switch (BuiltinID) {
5627 default: llvm_unreachable("missing builtin ID in switch!");
5628 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5629 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5630 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5631 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5632 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5633 }
5634 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: FloatTy);
5635 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: FloatTy);
5636 if (P == llvm::FCmpInst::FCMP_OEQ)
5637 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
5638 else
5639 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
5640 return Builder.CreateSExt(V: Ops[0], DestTy: Int32Ty, Name: "vcmpd");
5641 }
5642 case NEON::BI__builtin_neon_vceqh_f16:
5643 case NEON::BI__builtin_neon_vcleh_f16:
5644 case NEON::BI__builtin_neon_vclth_f16:
5645 case NEON::BI__builtin_neon_vcgeh_f16:
5646 case NEON::BI__builtin_neon_vcgth_f16: {
5647 llvm::CmpInst::Predicate P;
5648 switch (BuiltinID) {
5649 default: llvm_unreachable("missing builtin ID in switch!");
5650 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
5651 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
5652 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
5653 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
5654 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
5655 }
5656 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: HalfTy);
5657 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: HalfTy);
5658 if (P == llvm::FCmpInst::FCMP_OEQ)
5659 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
5660 else
5661 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
5662 return Builder.CreateSExt(V: Ops[0], DestTy: Int16Ty, Name: "vcmpd");
5663 }
5664 case NEON::BI__builtin_neon_vceqd_s64:
5665 case NEON::BI__builtin_neon_vceqd_u64:
5666 case NEON::BI__builtin_neon_vcgtd_s64:
5667 case NEON::BI__builtin_neon_vcgtd_u64:
5668 case NEON::BI__builtin_neon_vcltd_s64:
5669 case NEON::BI__builtin_neon_vcltd_u64:
5670 case NEON::BI__builtin_neon_vcged_u64:
5671 case NEON::BI__builtin_neon_vcged_s64:
5672 case NEON::BI__builtin_neon_vcled_u64:
5673 case NEON::BI__builtin_neon_vcled_s64: {
5674 llvm::CmpInst::Predicate P;
5675 switch (BuiltinID) {
5676 default: llvm_unreachable("missing builtin ID in switch!");
5677 case NEON::BI__builtin_neon_vceqd_s64:
5678 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5679 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5680 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5681 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5682 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5683 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5684 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5685 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5686 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5687 }
5688 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
5689 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
5690 Ops[0] = Builder.CreateICmp(P, LHS: Ops[0], RHS: Ops[1]);
5691 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqd");
5692 }
5693 case NEON::BI__builtin_neon_vnegd_s64:
5694 return Builder.CreateNeg(V: Ops[0], Name: "vnegd");
5695 case NEON::BI__builtin_neon_vnegh_f16:
5696 return Builder.CreateFNeg(V: Ops[0], Name: "vnegh");
5697 case NEON::BI__builtin_neon_vtstd_s64:
5698 case NEON::BI__builtin_neon_vtstd_u64: {
5699 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
5700 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
5701 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
5702 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
5703 RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
5704 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vtstd");
5705 }
5706 case NEON::BI__builtin_neon_vset_lane_i8:
5707 case NEON::BI__builtin_neon_vset_lane_i16:
5708 case NEON::BI__builtin_neon_vset_lane_i32:
5709 case NEON::BI__builtin_neon_vset_lane_i64:
5710 case NEON::BI__builtin_neon_vset_lane_bf16:
5711 case NEON::BI__builtin_neon_vset_lane_f32:
5712 case NEON::BI__builtin_neon_vsetq_lane_i8:
5713 case NEON::BI__builtin_neon_vsetq_lane_i16:
5714 case NEON::BI__builtin_neon_vsetq_lane_i32:
5715 case NEON::BI__builtin_neon_vsetq_lane_i64:
5716 case NEON::BI__builtin_neon_vsetq_lane_bf16:
5717 case NEON::BI__builtin_neon_vsetq_lane_f32:
5718 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
5719 case NEON::BI__builtin_neon_vset_lane_f64:
5720 // The vector type needs a cast for the v1f64 variant.
5721 Ops[1] =
5722 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
5723 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
5724 case NEON::BI__builtin_neon_vset_lane_mf8:
5725 case NEON::BI__builtin_neon_vsetq_lane_mf8:
5726 // The input vector type needs a cast to scalar type.
5727 Ops[0] =
5728 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::Type::getInt8Ty(C&: getLLVMContext()));
5729 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
5730 case NEON::BI__builtin_neon_vsetq_lane_f64:
5731 // The vector type needs a cast for the v2f64 variant.
5732 Ops[1] =
5733 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
5734 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
5735
5736 case NEON::BI__builtin_neon_vget_lane_i8:
5737 case NEON::BI__builtin_neon_vdupb_lane_i8:
5738 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5739 case NEON::BI__builtin_neon_vgetq_lane_i8:
5740 case NEON::BI__builtin_neon_vdupb_laneq_i8:
5741 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5742 case NEON::BI__builtin_neon_vget_lane_mf8:
5743 case NEON::BI__builtin_neon_vdupb_lane_mf8:
5744 case NEON::BI__builtin_neon_vgetq_lane_mf8:
5745 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
5746 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5747 case NEON::BI__builtin_neon_vget_lane_i16:
5748 case NEON::BI__builtin_neon_vduph_lane_i16:
5749 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5750 case NEON::BI__builtin_neon_vgetq_lane_i16:
5751 case NEON::BI__builtin_neon_vduph_laneq_i16:
5752 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5753 case NEON::BI__builtin_neon_vget_lane_i32:
5754 case NEON::BI__builtin_neon_vdups_lane_i32:
5755 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5756 case NEON::BI__builtin_neon_vdups_lane_f32:
5757 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vdups_lane");
5758 case NEON::BI__builtin_neon_vgetq_lane_i32:
5759 case NEON::BI__builtin_neon_vdups_laneq_i32:
5760 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5761 case NEON::BI__builtin_neon_vget_lane_i64:
5762 case NEON::BI__builtin_neon_vdupd_lane_i64:
5763 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5764 case NEON::BI__builtin_neon_vdupd_lane_f64:
5765 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vdupd_lane");
5766 case NEON::BI__builtin_neon_vgetq_lane_i64:
5767 case NEON::BI__builtin_neon_vdupd_laneq_i64:
5768 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5769 case NEON::BI__builtin_neon_vget_lane_f32:
5770 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5771 case NEON::BI__builtin_neon_vget_lane_f64:
5772 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5773 case NEON::BI__builtin_neon_vgetq_lane_f32:
5774 case NEON::BI__builtin_neon_vdups_laneq_f32:
5775 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5776 case NEON::BI__builtin_neon_vgetq_lane_f64:
5777 case NEON::BI__builtin_neon_vdupd_laneq_f64:
5778 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5779 case NEON::BI__builtin_neon_vaddh_f16:
5780 return Builder.CreateFAdd(L: Ops[0], R: Ops[1], Name: "vaddh");
5781 case NEON::BI__builtin_neon_vsubh_f16:
5782 return Builder.CreateFSub(L: Ops[0], R: Ops[1], Name: "vsubh");
5783 case NEON::BI__builtin_neon_vmulh_f16:
5784 return Builder.CreateFMul(L: Ops[0], R: Ops[1], Name: "vmulh");
5785 case NEON::BI__builtin_neon_vdivh_f16:
5786 return Builder.CreateFDiv(L: Ops[0], R: Ops[1], Name: "vdivh");
5787 case NEON::BI__builtin_neon_vfmah_f16:
5788 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5789 return emitCallMaybeConstrainedFPBuiltin(
5790 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
5791 Args: {Ops[1], Ops[2], Ops[0]});
5792 case NEON::BI__builtin_neon_vfmsh_f16: {
5793 Value *Neg = Builder.CreateFNeg(V: Ops[1], Name: "vsubh");
5794
5795 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5796 return emitCallMaybeConstrainedFPBuiltin(
5797 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
5798 Args: {Neg, Ops[2], Ops[0]});
5799 }
5800 case NEON::BI__builtin_neon_vaddd_s64:
5801 case NEON::BI__builtin_neon_vaddd_u64:
5802 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddd");
5803 case NEON::BI__builtin_neon_vsubd_s64:
5804 case NEON::BI__builtin_neon_vsubd_u64:
5805 return Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubd");
5806 case NEON::BI__builtin_neon_vqdmlalh_s16:
5807 case NEON::BI__builtin_neon_vqdmlslh_s16: {
5808 SmallVector<Value *, 2> ProductOps;
5809 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
5810 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
5811 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
5812 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
5813 Ops&: ProductOps, name: "vqdmlXl");
5814 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
5815 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
5816
5817 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5818 ? Intrinsic::aarch64_neon_sqadd
5819 : Intrinsic::aarch64_neon_sqsub;
5820 // Drop the 2nd multiplication argument before the accumulation
5821 Ops.pop_back();
5822 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
5823 }
5824 case NEON::BI__builtin_neon_vqshlud_n_s64: {
5825 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
5826 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqshlu, Tys: Int64Ty),
5827 Ops, name: "vqshlu_n");
5828 }
5829 case NEON::BI__builtin_neon_vqshld_n_u64:
5830 case NEON::BI__builtin_neon_vqshld_n_s64: {
5831 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5832 ? Intrinsic::aarch64_neon_uqshl
5833 : Intrinsic::aarch64_neon_sqshl;
5834 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
5835 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vqshl_n");
5836 }
5837 case NEON::BI__builtin_neon_vrshrd_n_u64:
5838 case NEON::BI__builtin_neon_vrshrd_n_s64: {
5839 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5840 ? Intrinsic::aarch64_neon_urshl
5841 : Intrinsic::aarch64_neon_srshl;
5842 int SV = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
5843 Ops[1] = ConstantInt::get(Ty: Int64Ty, V: -SV);
5844 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vrshr_n");
5845 }
5846 case NEON::BI__builtin_neon_vrsrad_n_u64:
5847 case NEON::BI__builtin_neon_vrsrad_n_s64: {
5848 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5849 ? Intrinsic::aarch64_neon_urshl
5850 : Intrinsic::aarch64_neon_srshl;
5851 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
5852 Ops[2] = Builder.CreateNeg(V: Ops[2]);
5853 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Int64Ty),
5854 Args: {Ops[1], Builder.CreateSExt(V: Ops[2], DestTy: Int64Ty)});
5855 return Builder.CreateAdd(LHS: Ops[0], RHS: Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty));
5856 }
5857 case NEON::BI__builtin_neon_vshld_n_s64:
5858 case NEON::BI__builtin_neon_vshld_n_u64: {
5859 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[1]);
5860 return Builder.CreateShl(
5861 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: Amt->getZExtValue()), Name: "shld_n");
5862 }
5863 case NEON::BI__builtin_neon_vshrd_n_s64: {
5864 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[1]);
5865 return Builder.CreateAShr(
5866 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
5867 b: Amt->getZExtValue())),
5868 Name: "shrd_n");
5869 }
5870 case NEON::BI__builtin_neon_vshrd_n_u64: {
5871 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[1]);
5872 uint64_t ShiftAmt = Amt->getZExtValue();
5873 // Right-shifting an unsigned value by its size yields 0.
5874 if (ShiftAmt == 64)
5875 return ConstantInt::get(Ty: Int64Ty, V: 0);
5876 return Builder.CreateLShr(LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
5877 Name: "shrd_n");
5878 }
5879 case NEON::BI__builtin_neon_vsrad_n_s64: {
5880 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[2]);
5881 Ops[1] = Builder.CreateAShr(
5882 LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
5883 b: Amt->getZExtValue())),
5884 Name: "shrd_n");
5885 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
5886 }
5887 case NEON::BI__builtin_neon_vsrad_n_u64: {
5888 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[2]);
5889 uint64_t ShiftAmt = Amt->getZExtValue();
5890 // Right-shifting an unsigned value by its size yields 0.
5891 // As Op + 0 = Op, return Ops[0] directly.
5892 if (ShiftAmt == 64)
5893 return Ops[0];
5894 Ops[1] = Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
5895 Name: "shrd_n");
5896 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
5897 }
5898 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5899 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5900 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5901 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5902 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "lane");
5903 SmallVector<Value *, 2> ProductOps;
5904 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
5905 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
5906 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
5907 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
5908 Ops&: ProductOps, name: "vqdmlXl");
5909 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
5910 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
5911 // Drop lane-selection and the corresponding vector argument (these have
5912 // already been used)
5913 Ops.pop_back_n(NumItems: 2);
5914
5915 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5916 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5917 ? Intrinsic::aarch64_neon_sqadd
5918 : Intrinsic::aarch64_neon_sqsub;
5919 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
5920 }
5921 case NEON::BI__builtin_neon_vqdmlals_s32:
5922 case NEON::BI__builtin_neon_vqdmlsls_s32: {
5923 SmallVector<Value *, 2> ProductOps;
5924 ProductOps.push_back(Elt: Ops[1]);
5925 ProductOps.push_back(Elt: Ops[2]);
5926 Ops[1] =
5927 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
5928 Ops&: ProductOps, name: "vqdmlXl");
5929
5930 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5931 ? Intrinsic::aarch64_neon_sqadd
5932 : Intrinsic::aarch64_neon_sqsub;
5933 // Drop the 2nd multiplication argument before the accumulation
5934 Ops.pop_back();
5935 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
5936 }
5937 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5938 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5939 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5940 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5941 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "lane");
5942 SmallVector<Value *, 2> ProductOps;
5943 ProductOps.push_back(Elt: Ops[1]);
5944 ProductOps.push_back(Elt: Ops[2]);
5945 Ops[1] =
5946 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
5947 Ops&: ProductOps, name: "vqdmlXl");
5948 // Drop lane-selection and the corresponding vector argument (these have
5949 // already been used)
5950 Ops.pop_back_n(NumItems: 2);
5951
5952 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5953 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5954 ? Intrinsic::aarch64_neon_sqadd
5955 : Intrinsic::aarch64_neon_sqsub;
5956 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
5957 }
5958 case NEON::BI__builtin_neon_vget_lane_bf16:
5959 case NEON::BI__builtin_neon_vduph_lane_bf16:
5960 case NEON::BI__builtin_neon_vduph_lane_f16: {
5961 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
5962 }
5963 case NEON::BI__builtin_neon_vgetq_lane_bf16:
5964 case NEON::BI__builtin_neon_vduph_laneq_bf16:
5965 case NEON::BI__builtin_neon_vduph_laneq_f16: {
5966 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
5967 }
5968 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
5969 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
5970 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
5971 return Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
5972 }
5973 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
5974 SmallVector<int, 16> ConcatMask(8);
5975 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
5976 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
5977 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
5978 llvm::Value *Trunc =
5979 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
5980 return Builder.CreateShuffleVector(
5981 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
5982 }
5983 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
5984 SmallVector<int, 16> ConcatMask(8);
5985 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
5986 SmallVector<int, 16> LoMask(4);
5987 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
5988 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
5989 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
5990 llvm::Type *V8BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 8);
5991 llvm::Value *Inactive = Builder.CreateShuffleVector(
5992 V: Builder.CreateBitCast(V: Ops[0], DestTy: V8BF16), Mask: LoMask);
5993 llvm::Value *Trunc =
5994 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[1], DestTy: V4F32), DestTy: V4BF16);
5995 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
5996 }
5997 case NEON::BI__builtin_neon_vcvt_f16_f32: {
5998 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
5999 llvm::Type *V4F16 = FixedVectorType::get(ElementType: Builder.getHalfTy(), NumElts: 4);
6000 return Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4F16);
6001 }
6002 case NEON::BI__builtin_neon_vcvt_f32_f16: {
6003 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6004 llvm::Type *V4F16 = FixedVectorType::get(ElementType: Builder.getHalfTy(), NumElts: 4);
6005 return Builder.CreateFPExt(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F16), DestTy: V4F32);
6006 }
6007
6008 case clang::AArch64::BI_InterlockedAdd:
6009 case clang::AArch64::BI_InterlockedAdd_acq:
6010 case clang::AArch64::BI_InterlockedAdd_rel:
6011 case clang::AArch64::BI_InterlockedAdd_nf:
6012 case clang::AArch64::BI_InterlockedAdd64:
6013 case clang::AArch64::BI_InterlockedAdd64_acq:
6014 case clang::AArch64::BI_InterlockedAdd64_rel:
6015 case clang::AArch64::BI_InterlockedAdd64_nf: {
6016 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
6017 Value *Val = Ops[1];
6018 llvm::AtomicOrdering Ordering;
6019 switch (BuiltinID) {
6020 case clang::AArch64::BI_InterlockedAdd:
6021 case clang::AArch64::BI_InterlockedAdd64:
6022 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6023 break;
6024 case clang::AArch64::BI_InterlockedAdd_acq:
6025 case clang::AArch64::BI_InterlockedAdd64_acq:
6026 Ordering = llvm::AtomicOrdering::Acquire;
6027 break;
6028 case clang::AArch64::BI_InterlockedAdd_rel:
6029 case clang::AArch64::BI_InterlockedAdd64_rel:
6030 Ordering = llvm::AtomicOrdering::Release;
6031 break;
6032 case clang::AArch64::BI_InterlockedAdd_nf:
6033 case clang::AArch64::BI_InterlockedAdd64_nf:
6034 Ordering = llvm::AtomicOrdering::Monotonic;
6035 break;
6036 default:
6037 llvm_unreachable("missing builtin ID in switch!");
6038 }
6039 AtomicRMWInst *RMWI =
6040 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Addr: DestAddr, Val, Ordering);
6041 return Builder.CreateAdd(LHS: RMWI, RHS: Val);
6042 }
6043 }
6044
6045 llvm::FixedVectorType *VTy = GetNeonType(CGF: this, TypeFlags: Type);
6046 llvm::Type *Ty = VTy;
6047 if (!Ty)
6048 return nullptr;
6049
6050 bool ExtractLow = false;
6051 bool ExtendLaneArg = false;
6052 switch (BuiltinID) {
6053 default: return nullptr;
6054 case NEON::BI__builtin_neon_vbsl_v:
6055 case NEON::BI__builtin_neon_vbslq_v: {
6056 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6057 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: BitTy, Name: "vbsl");
6058 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: BitTy, Name: "vbsl");
6059 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: BitTy, Name: "vbsl");
6060
6061 Ops[1] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1], Name: "vbsl");
6062 Ops[2] = Builder.CreateAnd(LHS: Builder.CreateNot(V: Ops[0]), RHS: Ops[2], Name: "vbsl");
6063 Ops[0] = Builder.CreateOr(LHS: Ops[1], RHS: Ops[2], Name: "vbsl");
6064 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6065 }
6066 case NEON::BI__builtin_neon_vfma_lane_v:
6067 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6068 // The ARM builtins (and instructions) have the addend as the first
6069 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6070 Value *Addend = Ops[0];
6071 Value *Multiplicand = Ops[1];
6072 Value *LaneSource = Ops[2];
6073 Ops[0] = Multiplicand;
6074 Ops[1] = LaneSource;
6075 Ops[2] = Addend;
6076
6077 // Now adjust things to handle the lane access.
6078 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6079 ? llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6080 NumElts: VTy->getNumElements() / 2)
6081 : VTy;
6082 llvm::Constant *cst = cast<Constant>(Val: Ops[3]);
6083 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(), Elt: cst);
6084 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SourceTy);
6085 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV, Name: "lane");
6086
6087 Ops.pop_back();
6088 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6089 : Intrinsic::fma;
6090 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fmla");
6091 }
6092 case NEON::BI__builtin_neon_vfma_laneq_v: {
6093 auto *VTy = cast<llvm::FixedVectorType>(Val: Ty);
6094 // v1f64 fma should be mapped to Neon scalar f64 fma
6095 if (VTy && VTy->getElementType() == DoubleTy) {
6096 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6097 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6098 llvm::FixedVectorType *VTy =
6099 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6100 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: VTy);
6101 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6102 Value *Result;
6103 Result = emitCallMaybeConstrainedFPBuiltin(
6104 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma,
6105 Ty: DoubleTy, Args: {Ops[1], Ops[2], Ops[0]});
6106 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6107 }
6108 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6109 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6110
6111 auto *STy = llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6112 NumElts: VTy->getNumElements() * 2);
6113 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: STy);
6114 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(),
6115 Elt: cast<ConstantInt>(Val: Ops[3]));
6116 Ops[2] = Builder.CreateShuffleVector(V1: Ops[2], V2: Ops[2], Mask: SV, Name: "lane");
6117
6118 return emitCallMaybeConstrainedFPBuiltin(
6119 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6120 Args: {Ops[2], Ops[1], Ops[0]});
6121 }
6122 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6123 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6124 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6125
6126 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6127 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]));
6128 return emitCallMaybeConstrainedFPBuiltin(
6129 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6130 Args: {Ops[2], Ops[1], Ops[0]});
6131 }
6132 case NEON::BI__builtin_neon_vfmah_lane_f16:
6133 case NEON::BI__builtin_neon_vfmas_lane_f32:
6134 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6135 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6136 case NEON::BI__builtin_neon_vfmad_lane_f64:
6137 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6138 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
6139 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6140 return emitCallMaybeConstrainedFPBuiltin(
6141 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6142 Args: {Ops[1], Ops[2], Ops[0]});
6143 }
6144 case NEON::BI__builtin_neon_vmull_v:
6145 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6146 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6147 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6148 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
6149 case NEON::BI__builtin_neon_vmax_v:
6150 case NEON::BI__builtin_neon_vmaxq_v:
6151 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6152 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6153 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6154 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmax");
6155 case NEON::BI__builtin_neon_vmaxh_f16: {
6156 Int = Intrinsic::aarch64_neon_fmax;
6157 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmax");
6158 }
6159 case NEON::BI__builtin_neon_vmin_v:
6160 case NEON::BI__builtin_neon_vminq_v:
6161 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6162 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6163 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6164 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmin");
6165 case NEON::BI__builtin_neon_vminh_f16: {
6166 Int = Intrinsic::aarch64_neon_fmin;
6167 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmin");
6168 }
6169 case NEON::BI__builtin_neon_vabd_v:
6170 case NEON::BI__builtin_neon_vabdq_v:
6171 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6172 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6173 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6174 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vabd");
6175 case NEON::BI__builtin_neon_vpadal_v:
6176 case NEON::BI__builtin_neon_vpadalq_v: {
6177 unsigned ArgElts = VTy->getNumElements();
6178 llvm::IntegerType *EltTy = cast<IntegerType>(Val: VTy->getElementType());
6179 unsigned BitWidth = EltTy->getBitWidth();
6180 auto *ArgTy = llvm::FixedVectorType::get(
6181 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: BitWidth / 2), NumElts: 2 * ArgElts);
6182 llvm::Type* Tys[2] = { VTy, ArgTy };
6183 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6184 SmallVector<llvm::Value*, 1> TmpOps;
6185 TmpOps.push_back(Elt: Ops[1]);
6186 Function *F = CGM.getIntrinsic(IID: Int, Tys);
6187 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vpadal");
6188 llvm::Value *addend = Builder.CreateBitCast(V: Ops[0], DestTy: tmp->getType());
6189 return Builder.CreateAdd(LHS: tmp, RHS: addend);
6190 }
6191 case NEON::BI__builtin_neon_vpmin_v:
6192 case NEON::BI__builtin_neon_vpminq_v:
6193 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6194 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6195 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6196 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmin");
6197 case NEON::BI__builtin_neon_vpmax_v:
6198 case NEON::BI__builtin_neon_vpmaxq_v:
6199 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6200 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6201 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6202 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmax");
6203 case NEON::BI__builtin_neon_vminnm_v:
6204 case NEON::BI__builtin_neon_vminnmq_v:
6205 Int = Intrinsic::aarch64_neon_fminnm;
6206 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vminnm");
6207 case NEON::BI__builtin_neon_vminnmh_f16:
6208 Int = Intrinsic::aarch64_neon_fminnm;
6209 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vminnm");
6210 case NEON::BI__builtin_neon_vmaxnm_v:
6211 case NEON::BI__builtin_neon_vmaxnmq_v:
6212 Int = Intrinsic::aarch64_neon_fmaxnm;
6213 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmaxnm");
6214 case NEON::BI__builtin_neon_vmaxnmh_f16:
6215 Int = Intrinsic::aarch64_neon_fmaxnm;
6216 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmaxnm");
6217 case NEON::BI__builtin_neon_vrecpss_f32: {
6218 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: FloatTy),
6219 Ops, name: "vrecps");
6220 }
6221 case NEON::BI__builtin_neon_vrecpsd_f64:
6222 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: DoubleTy),
6223 Ops, name: "vrecps");
6224 case NEON::BI__builtin_neon_vrecpsh_f16:
6225 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: HalfTy),
6226 Ops, name: "vrecps");
6227 case NEON::BI__builtin_neon_vqshrun_n_v:
6228 Int = Intrinsic::aarch64_neon_sqshrun;
6229 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrun_n");
6230 case NEON::BI__builtin_neon_vqrshrun_n_v:
6231 Int = Intrinsic::aarch64_neon_sqrshrun;
6232 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrun_n");
6233 case NEON::BI__builtin_neon_vqshrn_n_v:
6234 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6235 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n");
6236 case NEON::BI__builtin_neon_vrshrn_n_v:
6237 Int = Intrinsic::aarch64_neon_rshrn;
6238 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshrn_n");
6239 case NEON::BI__builtin_neon_vqrshrn_n_v:
6240 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6241 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n");
6242 case NEON::BI__builtin_neon_vrndah_f16: {
6243 Int = Builder.getIsFPConstrained()
6244 ? Intrinsic::experimental_constrained_round
6245 : Intrinsic::round;
6246 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrnda");
6247 }
6248 case NEON::BI__builtin_neon_vrnda_v:
6249 case NEON::BI__builtin_neon_vrndaq_v: {
6250 Int = Builder.getIsFPConstrained()
6251 ? Intrinsic::experimental_constrained_round
6252 : Intrinsic::round;
6253 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnda");
6254 }
6255 case NEON::BI__builtin_neon_vrndih_f16: {
6256 Int = Builder.getIsFPConstrained()
6257 ? Intrinsic::experimental_constrained_nearbyint
6258 : Intrinsic::nearbyint;
6259 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndi");
6260 }
6261 case NEON::BI__builtin_neon_vrndmh_f16: {
6262 Int = Builder.getIsFPConstrained()
6263 ? Intrinsic::experimental_constrained_floor
6264 : Intrinsic::floor;
6265 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndm");
6266 }
6267 case NEON::BI__builtin_neon_vrndm_v:
6268 case NEON::BI__builtin_neon_vrndmq_v: {
6269 Int = Builder.getIsFPConstrained()
6270 ? Intrinsic::experimental_constrained_floor
6271 : Intrinsic::floor;
6272 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndm");
6273 }
6274 case NEON::BI__builtin_neon_vrndnh_f16: {
6275 Int = Builder.getIsFPConstrained()
6276 ? Intrinsic::experimental_constrained_roundeven
6277 : Intrinsic::roundeven;
6278 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndn");
6279 }
6280 case NEON::BI__builtin_neon_vrndn_v:
6281 case NEON::BI__builtin_neon_vrndnq_v: {
6282 Int = Builder.getIsFPConstrained()
6283 ? Intrinsic::experimental_constrained_roundeven
6284 : Intrinsic::roundeven;
6285 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndn");
6286 }
6287 case NEON::BI__builtin_neon_vrndns_f32: {
6288 Int = Builder.getIsFPConstrained()
6289 ? Intrinsic::experimental_constrained_roundeven
6290 : Intrinsic::roundeven;
6291 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: FloatTy), Ops, name: "vrndn");
6292 }
6293 case NEON::BI__builtin_neon_vrndph_f16: {
6294 Int = Builder.getIsFPConstrained()
6295 ? Intrinsic::experimental_constrained_ceil
6296 : Intrinsic::ceil;
6297 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndp");
6298 }
6299 case NEON::BI__builtin_neon_vrndp_v:
6300 case NEON::BI__builtin_neon_vrndpq_v: {
6301 Int = Builder.getIsFPConstrained()
6302 ? Intrinsic::experimental_constrained_ceil
6303 : Intrinsic::ceil;
6304 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndp");
6305 }
6306 case NEON::BI__builtin_neon_vrndxh_f16: {
6307 Int = Builder.getIsFPConstrained()
6308 ? Intrinsic::experimental_constrained_rint
6309 : Intrinsic::rint;
6310 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndx");
6311 }
6312 case NEON::BI__builtin_neon_vrndx_v:
6313 case NEON::BI__builtin_neon_vrndxq_v: {
6314 Int = Builder.getIsFPConstrained()
6315 ? Intrinsic::experimental_constrained_rint
6316 : Intrinsic::rint;
6317 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndx");
6318 }
6319 case NEON::BI__builtin_neon_vrndh_f16: {
6320 Int = Builder.getIsFPConstrained()
6321 ? Intrinsic::experimental_constrained_trunc
6322 : Intrinsic::trunc;
6323 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndz");
6324 }
6325 case NEON::BI__builtin_neon_vrnd_v:
6326 case NEON::BI__builtin_neon_vrndq_v: {
6327 Int = Builder.getIsFPConstrained()
6328 ? Intrinsic::experimental_constrained_trunc
6329 : Intrinsic::trunc;
6330 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndz");
6331 }
6332 case NEON::BI__builtin_neon_vcvt_f64_v:
6333 case NEON::BI__builtin_neon_vcvtq_f64_v:
6334 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6335 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6336 return usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
6337 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
6338 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6339 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6340 "unexpected vcvt_f64_f32 builtin");
6341 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6342 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
6343
6344 return Builder.CreateFPExt(V: Ops[0], DestTy: Ty, Name: "vcvt");
6345 }
6346 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6347 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6348 "unexpected vcvt_f32_f64 builtin");
6349 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6350 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
6351
6352 return Builder.CreateFPTrunc(V: Ops[0], DestTy: Ty, Name: "vcvt");
6353 }
6354 case NEON::BI__builtin_neon_vcvta_s16_f16:
6355 case NEON::BI__builtin_neon_vcvta_u16_f16:
6356 case NEON::BI__builtin_neon_vcvta_s32_v:
6357 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6358 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6359 case NEON::BI__builtin_neon_vcvta_u32_v:
6360 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6361 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6362 case NEON::BI__builtin_neon_vcvta_s64_v:
6363 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6364 case NEON::BI__builtin_neon_vcvta_u64_v:
6365 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6366 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6367 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6368 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvta");
6369 }
6370 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6371 case NEON::BI__builtin_neon_vcvtm_s32_v:
6372 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6373 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6374 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6375 case NEON::BI__builtin_neon_vcvtm_u32_v:
6376 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6377 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6378 case NEON::BI__builtin_neon_vcvtm_s64_v:
6379 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6380 case NEON::BI__builtin_neon_vcvtm_u64_v:
6381 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6382 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6383 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6384 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtm");
6385 }
6386 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6387 case NEON::BI__builtin_neon_vcvtn_s32_v:
6388 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6389 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6390 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6391 case NEON::BI__builtin_neon_vcvtn_u32_v:
6392 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6393 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6394 case NEON::BI__builtin_neon_vcvtn_s64_v:
6395 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6396 case NEON::BI__builtin_neon_vcvtn_u64_v:
6397 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6398 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6399 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6400 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtn");
6401 }
6402 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6403 case NEON::BI__builtin_neon_vcvtp_s32_v:
6404 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6405 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6406 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6407 case NEON::BI__builtin_neon_vcvtp_u32_v:
6408 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6409 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6410 case NEON::BI__builtin_neon_vcvtp_s64_v:
6411 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6412 case NEON::BI__builtin_neon_vcvtp_u64_v:
6413 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6414 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6415 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6416 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtp");
6417 }
6418 case NEON::BI__builtin_neon_vmulx_v:
6419 case NEON::BI__builtin_neon_vmulxq_v: {
6420 Int = Intrinsic::aarch64_neon_fmulx;
6421 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmulx");
6422 }
6423 case NEON::BI__builtin_neon_vmulxh_lane_f16:
6424 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
6425 // vmulx_lane should be mapped to Neon scalar mulx after
6426 // extracting the scalar element
6427 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
6428 Ops.pop_back();
6429 Int = Intrinsic::aarch64_neon_fmulx;
6430 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmulx");
6431 }
6432 case NEON::BI__builtin_neon_vmul_lane_v:
6433 case NEON::BI__builtin_neon_vmul_laneq_v: {
6434 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6435 bool Quad = false;
6436 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6437 Quad = true;
6438 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6439 llvm::FixedVectorType *VTy =
6440 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6441 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
6442 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
6443 Value *Result = Builder.CreateFMul(L: Ops[0], R: Ops[1]);
6444 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6445 }
6446 case NEON::BI__builtin_neon_vpmaxnm_v:
6447 case NEON::BI__builtin_neon_vpmaxnmq_v: {
6448 Int = Intrinsic::aarch64_neon_fmaxnmp;
6449 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmaxnm");
6450 }
6451 case NEON::BI__builtin_neon_vpminnm_v:
6452 case NEON::BI__builtin_neon_vpminnmq_v: {
6453 Int = Intrinsic::aarch64_neon_fminnmp;
6454 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpminnm");
6455 }
6456 case NEON::BI__builtin_neon_vsqrth_f16: {
6457 Int = Builder.getIsFPConstrained()
6458 ? Intrinsic::experimental_constrained_sqrt
6459 : Intrinsic::sqrt;
6460 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vsqrt");
6461 }
6462 case NEON::BI__builtin_neon_vsqrt_v:
6463 case NEON::BI__builtin_neon_vsqrtq_v: {
6464 Int = Builder.getIsFPConstrained()
6465 ? Intrinsic::experimental_constrained_sqrt
6466 : Intrinsic::sqrt;
6467 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6468 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqrt");
6469 }
6470 case NEON::BI__builtin_neon_vrbit_v:
6471 case NEON::BI__builtin_neon_vrbitq_v: {
6472 Int = Intrinsic::bitreverse;
6473 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrbit");
6474 }
6475 case NEON::BI__builtin_neon_vmaxv_f16: {
6476 Int = Intrinsic::aarch64_neon_fmaxv;
6477 Ty = HalfTy;
6478 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
6479 llvm::Type *Tys[2] = {Ty, VTy};
6480 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
6481 }
6482 case NEON::BI__builtin_neon_vmaxvq_f16: {
6483 Int = Intrinsic::aarch64_neon_fmaxv;
6484 Ty = HalfTy;
6485 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
6486 llvm::Type *Tys[2] = {Ty, VTy};
6487 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
6488 }
6489 case NEON::BI__builtin_neon_vminv_f16: {
6490 Int = Intrinsic::aarch64_neon_fminv;
6491 Ty = HalfTy;
6492 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
6493 llvm::Type *Tys[2] = {Ty, VTy};
6494 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
6495 }
6496 case NEON::BI__builtin_neon_vminvq_f16: {
6497 Int = Intrinsic::aarch64_neon_fminv;
6498 Ty = HalfTy;
6499 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
6500 llvm::Type *Tys[2] = {Ty, VTy};
6501 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
6502 }
6503 case NEON::BI__builtin_neon_vmaxnmv_f16: {
6504 Int = Intrinsic::aarch64_neon_fmaxnmv;
6505 Ty = HalfTy;
6506 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
6507 llvm::Type *Tys[2] = {Ty, VTy};
6508 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
6509 }
6510 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
6511 Int = Intrinsic::aarch64_neon_fmaxnmv;
6512 Ty = HalfTy;
6513 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
6514 llvm::Type *Tys[2] = {Ty, VTy};
6515 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
6516 }
6517 case NEON::BI__builtin_neon_vminnmv_f16: {
6518 Int = Intrinsic::aarch64_neon_fminnmv;
6519 Ty = HalfTy;
6520 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
6521 llvm::Type *Tys[2] = {Ty, VTy};
6522 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
6523 }
6524 case NEON::BI__builtin_neon_vminnmvq_f16: {
6525 Int = Intrinsic::aarch64_neon_fminnmv;
6526 Ty = HalfTy;
6527 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
6528 llvm::Type *Tys[2] = {Ty, VTy};
6529 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
6530 }
6531 case NEON::BI__builtin_neon_vmul_n_f64: {
6532 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6533 Value *RHS = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6534 return Builder.CreateFMul(L: Ops[0], R: RHS);
6535 }
6536 case NEON::BI__builtin_neon_vaddlv_u8:
6537 case NEON::BI__builtin_neon_vaddlvq_u8:
6538 case NEON::BI__builtin_neon_vaddlv_u16:
6539 case NEON::BI__builtin_neon_vaddlvq_u16: {
6540 Int = Intrinsic::aarch64_neon_uaddlv;
6541 Ty = Int32Ty;
6542 VTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
6543 llvm::Type *Tys[2] = {Ty, VTy};
6544 Value *Result = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
6545 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6546 return Builder.CreateTrunc(V: Result, DestTy: Int16Ty);
6547 return Result;
6548 }
6549 case NEON::BI__builtin_neon_vaddlv_s8:
6550 case NEON::BI__builtin_neon_vaddlvq_s8:
6551 case NEON::BI__builtin_neon_vaddlv_s16:
6552 case NEON::BI__builtin_neon_vaddlvq_s16: {
6553 Int = Intrinsic::aarch64_neon_saddlv;
6554 Ty = Int32Ty;
6555 VTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
6556 llvm::Type *Tys[2] = {Ty, VTy};
6557 Value *Result = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
6558 if (VTy->getElementType()->getPrimitiveSizeInBits() == 8)
6559 return Builder.CreateTrunc(V: Result, DestTy: Int16Ty);
6560 return Result;
6561 }
6562 case NEON::BI__builtin_neon_vsri_n_v:
6563 case NEON::BI__builtin_neon_vsriq_n_v: {
6564 Int = Intrinsic::aarch64_neon_vsri;
6565 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
6566 return EmitNeonCall(F: Intrin, Ops, name: "vsri_n");
6567 }
6568 case NEON::BI__builtin_neon_vsli_n_v:
6569 case NEON::BI__builtin_neon_vsliq_n_v: {
6570 Int = Intrinsic::aarch64_neon_vsli;
6571 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
6572 return EmitNeonCall(F: Intrin, Ops, name: "vsli_n");
6573 }
6574 case NEON::BI__builtin_neon_vsra_n_v:
6575 case NEON::BI__builtin_neon_vsraq_n_v:
6576 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6577 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
6578 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6579 case NEON::BI__builtin_neon_vrsra_n_v:
6580 case NEON::BI__builtin_neon_vrsraq_n_v: {
6581 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6582 SmallVector<llvm::Value*,2> TmpOps;
6583 TmpOps.push_back(Elt: Ops[1]);
6584 TmpOps.push_back(Elt: Ops[2]);
6585 Function* F = CGM.getIntrinsic(IID: Int, Tys: Ty);
6586 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vrshr_n", shift: 1, rightshift: true);
6587 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
6588 return Builder.CreateAdd(LHS: Ops[0], RHS: tmp);
6589 }
6590 case NEON::BI__builtin_neon_vld1_v:
6591 case NEON::BI__builtin_neon_vld1q_v: {
6592 return Builder.CreateAlignedLoad(Ty: VTy, Addr: Ops[0], Align: PtrOp0.getAlignment());
6593 }
6594 case NEON::BI__builtin_neon_vst1_v:
6595 case NEON::BI__builtin_neon_vst1q_v:
6596 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
6597 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
6598 case NEON::BI__builtin_neon_vld1_lane_v:
6599 case NEON::BI__builtin_neon_vld1q_lane_v: {
6600 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6601 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
6602 Align: PtrOp0.getAlignment());
6603 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vld1_lane");
6604 }
6605 case NEON::BI__builtin_neon_vldap1_lane_s64:
6606 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
6607 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6608 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
6609 Ty: VTy->getElementType(), Addr: Ops[0], Align: PtrOp0.getAlignment());
6610 LI->setAtomic(Ordering: llvm::AtomicOrdering::Acquire);
6611 Ops[0] = LI;
6612 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vldap1_lane");
6613 }
6614 case NEON::BI__builtin_neon_vld1_dup_v:
6615 case NEON::BI__builtin_neon_vld1q_dup_v: {
6616 Value *V = PoisonValue::get(T: Ty);
6617 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
6618 Align: PtrOp0.getAlignment());
6619 llvm::Constant *CI = ConstantInt::get(Ty: Int32Ty, V: 0);
6620 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ops[0], Idx: CI);
6621 return EmitNeonSplat(V: Ops[0], C: CI);
6622 }
6623 case NEON::BI__builtin_neon_vst1_lane_v:
6624 case NEON::BI__builtin_neon_vst1q_lane_v:
6625 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6626 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
6627 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
6628 case NEON::BI__builtin_neon_vstl1_lane_s64:
6629 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
6630 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6631 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
6632 llvm::StoreInst *SI =
6633 Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
6634 SI->setAtomic(Ordering: llvm::AtomicOrdering::Release);
6635 return SI;
6636 }
6637 case NEON::BI__builtin_neon_vld2_v:
6638 case NEON::BI__builtin_neon_vld2q_v: {
6639 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6640 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2, Tys);
6641 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
6642 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6643 }
6644 case NEON::BI__builtin_neon_vld3_v:
6645 case NEON::BI__builtin_neon_vld3q_v: {
6646 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6647 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3, Tys);
6648 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
6649 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6650 }
6651 case NEON::BI__builtin_neon_vld4_v:
6652 case NEON::BI__builtin_neon_vld4q_v: {
6653 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6654 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4, Tys);
6655 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
6656 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6657 }
6658 case NEON::BI__builtin_neon_vld2_dup_v:
6659 case NEON::BI__builtin_neon_vld2q_dup_v: {
6660 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6661 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2r, Tys);
6662 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
6663 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6664 }
6665 case NEON::BI__builtin_neon_vld3_dup_v:
6666 case NEON::BI__builtin_neon_vld3q_dup_v: {
6667 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6668 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3r, Tys);
6669 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
6670 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6671 }
6672 case NEON::BI__builtin_neon_vld4_dup_v:
6673 case NEON::BI__builtin_neon_vld4q_dup_v: {
6674 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
6675 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4r, Tys);
6676 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
6677 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6678 }
6679 case NEON::BI__builtin_neon_vld2_lane_v:
6680 case NEON::BI__builtin_neon_vld2q_lane_v: {
6681 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6682 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2lane, Tys);
6683 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
6684 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6685 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6686 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
6687 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld2_lane");
6688 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6689 }
6690 case NEON::BI__builtin_neon_vld3_lane_v:
6691 case NEON::BI__builtin_neon_vld3q_lane_v: {
6692 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6693 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3lane, Tys);
6694 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
6695 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6696 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6697 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
6698 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
6699 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld3_lane");
6700 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6701 }
6702 case NEON::BI__builtin_neon_vld4_lane_v:
6703 case NEON::BI__builtin_neon_vld4q_lane_v: {
6704 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6705 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4lane, Tys);
6706 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
6707 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6708 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6709 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
6710 Ops[4] = Builder.CreateBitCast(V: Ops[4], DestTy: Ty);
6711 Ops[5] = Builder.CreateZExt(V: Ops[5], DestTy: Int64Ty);
6712 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld4_lane");
6713 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6714 }
6715 case NEON::BI__builtin_neon_vst2_v:
6716 case NEON::BI__builtin_neon_vst2q_v: {
6717 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
6718 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6719 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2, Tys),
6720 Ops, name: "");
6721 }
6722 case NEON::BI__builtin_neon_vst2_lane_v:
6723 case NEON::BI__builtin_neon_vst2q_lane_v: {
6724 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
6725 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
6726 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6727 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2lane, Tys),
6728 Ops, name: "");
6729 }
6730 case NEON::BI__builtin_neon_vst3_v:
6731 case NEON::BI__builtin_neon_vst3q_v: {
6732 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
6733 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6734 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3, Tys),
6735 Ops, name: "");
6736 }
6737 case NEON::BI__builtin_neon_vst3_lane_v:
6738 case NEON::BI__builtin_neon_vst3q_lane_v: {
6739 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
6740 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
6741 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6742 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3lane, Tys),
6743 Ops, name: "");
6744 }
6745 case NEON::BI__builtin_neon_vst4_v:
6746 case NEON::BI__builtin_neon_vst4q_v: {
6747 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
6748 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6749 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4, Tys),
6750 Ops, name: "");
6751 }
6752 case NEON::BI__builtin_neon_vst4_lane_v:
6753 case NEON::BI__builtin_neon_vst4q_lane_v: {
6754 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
6755 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
6756 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6757 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4lane, Tys),
6758 Ops, name: "");
6759 }
6760 case NEON::BI__builtin_neon_vtrn_v:
6761 case NEON::BI__builtin_neon_vtrnq_v: {
6762 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6763 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6764 Value *SV = nullptr;
6765
6766 for (unsigned vi = 0; vi != 2; ++vi) {
6767 SmallVector<int, 16> Indices;
6768 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6769 Indices.push_back(Elt: i+vi);
6770 Indices.push_back(Elt: i+e+vi);
6771 }
6772 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
6773 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
6774 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
6775 }
6776 return SV;
6777 }
6778 case NEON::BI__builtin_neon_vuzp_v:
6779 case NEON::BI__builtin_neon_vuzpq_v: {
6780 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6781 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6782 Value *SV = nullptr;
6783
6784 for (unsigned vi = 0; vi != 2; ++vi) {
6785 SmallVector<int, 16> Indices;
6786 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6787 Indices.push_back(Elt: 2*i+vi);
6788
6789 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
6790 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
6791 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
6792 }
6793 return SV;
6794 }
6795 case NEON::BI__builtin_neon_vzip_v:
6796 case NEON::BI__builtin_neon_vzipq_v: {
6797 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6798 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6799 Value *SV = nullptr;
6800
6801 for (unsigned vi = 0; vi != 2; ++vi) {
6802 SmallVector<int, 16> Indices;
6803 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6804 Indices.push_back(Elt: (i + vi*e) >> 1);
6805 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
6806 }
6807 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
6808 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
6809 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
6810 }
6811 return SV;
6812 }
6813 case NEON::BI__builtin_neon_vqtbl1q_v: {
6814 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl1, Tys: Ty),
6815 Ops, name: "vtbl1");
6816 }
6817 case NEON::BI__builtin_neon_vqtbl2q_v: {
6818 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl2, Tys: Ty),
6819 Ops, name: "vtbl2");
6820 }
6821 case NEON::BI__builtin_neon_vqtbl3q_v: {
6822 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl3, Tys: Ty),
6823 Ops, name: "vtbl3");
6824 }
6825 case NEON::BI__builtin_neon_vqtbl4q_v: {
6826 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl4, Tys: Ty),
6827 Ops, name: "vtbl4");
6828 }
6829 case NEON::BI__builtin_neon_vqtbx1q_v: {
6830 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx1, Tys: Ty),
6831 Ops, name: "vtbx1");
6832 }
6833 case NEON::BI__builtin_neon_vqtbx2q_v: {
6834 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx2, Tys: Ty),
6835 Ops, name: "vtbx2");
6836 }
6837 case NEON::BI__builtin_neon_vqtbx3q_v: {
6838 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx3, Tys: Ty),
6839 Ops, name: "vtbx3");
6840 }
6841 case NEON::BI__builtin_neon_vqtbx4q_v: {
6842 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx4, Tys: Ty),
6843 Ops, name: "vtbx4");
6844 }
6845 case NEON::BI__builtin_neon_vsqadd_v:
6846 case NEON::BI__builtin_neon_vsqaddq_v: {
6847 Int = Intrinsic::aarch64_neon_usqadd;
6848 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqadd");
6849 }
6850 case NEON::BI__builtin_neon_vuqadd_v:
6851 case NEON::BI__builtin_neon_vuqaddq_v: {
6852 Int = Intrinsic::aarch64_neon_suqadd;
6853 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vuqadd");
6854 }
6855
6856 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
6857 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
6858 case NEON::BI__builtin_neon_vluti2_laneq_f16:
6859 case NEON::BI__builtin_neon_vluti2_laneq_p16:
6860 case NEON::BI__builtin_neon_vluti2_laneq_p8:
6861 case NEON::BI__builtin_neon_vluti2_laneq_s16:
6862 case NEON::BI__builtin_neon_vluti2_laneq_s8:
6863 case NEON::BI__builtin_neon_vluti2_laneq_u16:
6864 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
6865 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6866 llvm::Type *Tys[2];
6867 Tys[0] = Ty;
6868 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
6869 /*isQuad*/ false));
6870 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
6871 }
6872 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
6873 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
6874 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
6875 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
6876 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
6877 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
6878 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
6879 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
6880 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
6881 Int = Intrinsic::aarch64_neon_vluti2_laneq;
6882 llvm::Type *Tys[2];
6883 Tys[0] = Ty;
6884 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
6885 /*isQuad*/ true));
6886 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
6887 }
6888 case NEON::BI__builtin_neon_vluti2_lane_mf8:
6889 case NEON::BI__builtin_neon_vluti2_lane_bf16:
6890 case NEON::BI__builtin_neon_vluti2_lane_f16:
6891 case NEON::BI__builtin_neon_vluti2_lane_p16:
6892 case NEON::BI__builtin_neon_vluti2_lane_p8:
6893 case NEON::BI__builtin_neon_vluti2_lane_s16:
6894 case NEON::BI__builtin_neon_vluti2_lane_s8:
6895 case NEON::BI__builtin_neon_vluti2_lane_u16:
6896 case NEON::BI__builtin_neon_vluti2_lane_u8: {
6897 Int = Intrinsic::aarch64_neon_vluti2_lane;
6898 llvm::Type *Tys[2];
6899 Tys[0] = Ty;
6900 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
6901 /*isQuad*/ false));
6902 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
6903 }
6904 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
6905 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
6906 case NEON::BI__builtin_neon_vluti2q_lane_f16:
6907 case NEON::BI__builtin_neon_vluti2q_lane_p16:
6908 case NEON::BI__builtin_neon_vluti2q_lane_p8:
6909 case NEON::BI__builtin_neon_vluti2q_lane_s16:
6910 case NEON::BI__builtin_neon_vluti2q_lane_s8:
6911 case NEON::BI__builtin_neon_vluti2q_lane_u16:
6912 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
6913 Int = Intrinsic::aarch64_neon_vluti2_lane;
6914 llvm::Type *Tys[2];
6915 Tys[0] = Ty;
6916 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
6917 /*isQuad*/ true));
6918 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
6919 }
6920 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
6921 case NEON::BI__builtin_neon_vluti4q_lane_p8:
6922 case NEON::BI__builtin_neon_vluti4q_lane_s8:
6923 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
6924 Int = Intrinsic::aarch64_neon_vluti4q_lane;
6925 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane");
6926 }
6927 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
6928 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
6929 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
6930 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
6931 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
6932 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq");
6933 }
6934 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
6935 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
6936 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
6937 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
6938 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
6939 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
6940 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane_x2");
6941 }
6942 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
6943 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
6944 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
6945 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
6946 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
6947 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
6948 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq_x2");
6949 }
6950 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
6951 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fmmla,
6952 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
6953 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16)},
6954 Ops, E, name: "fmmla");
6955 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
6956 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fmmla,
6957 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4),
6958 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16)},
6959 Ops, E, name: "fmmla");
6960 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
6961 ExtractLow = true;
6962 [[fallthrough]];
6963 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
6964 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
6965 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
6966 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
6967 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
6968 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
6969 ExtractLow = true;
6970 [[fallthrough]];
6971 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
6972 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
6973 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
6974 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
6975 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
6976 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
6977 ExtractLow = true;
6978 [[fallthrough]];
6979 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
6980 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
6981 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
6982 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
6983 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
6984 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
6985 ExtractLow = true;
6986 [[fallthrough]];
6987 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
6988 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
6989 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
6990 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
6991 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
6992 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
6993 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
6994 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
6995 Ty1: Ops[0]->getType(), Extract: false, Ops, E, name: "vfcvtn");
6996 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
6997 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
6998 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
6999 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4), Extract: false, Ops,
7000 E, name: "vfcvtn");
7001 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7002 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7003 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16),
7004 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8), Extract: false, Ops,
7005 E, name: "vfcvtn");
7006 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7007 llvm::Type *Ty = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7008 Ops[0] = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
7009 Idx: uint64_t(0));
7010 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn2, Ty0: Ty,
7011 Ty1: Ops[1]->getType(), Extract: false, Ops, E, name: "vfcvtn2");
7012 }
7013
7014 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7015 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7016 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2, ExtendLaneArg: false, RetTy: HalfTy,
7017 Ops, E, name: "fdot2");
7018 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7019 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7020 ExtendLaneArg = true;
7021 [[fallthrough]];
7022 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7023 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7024 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2_lane,
7025 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "fdot2_lane");
7026 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7027 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7028 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4, ExtendLaneArg: false,
7029 RetTy: FloatTy, Ops, E, name: "fdot4");
7030 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7031 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7032 ExtendLaneArg = true;
7033 [[fallthrough]];
7034 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7035 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7036 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4_lane,
7037 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "fdot4_lane");
7038
7039 case NEON::BI__builtin_neon_vdot_f32_f16:
7040 case NEON::BI__builtin_neon_vdotq_f32_f16: {
7041 llvm::Type *InputTy =
7042 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
7043 llvm::Type *Tys[2] = {Ty, InputTy};
7044 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_fdot, Tys),
7045 Ops, name: "vdot");
7046 }
7047
7048 case NEON::BI__builtin_neon_vdot_lane_f32_f16:
7049 case NEON::BI__builtin_neon_vdot_laneq_f32_f16:
7050 case NEON::BI__builtin_neon_vdotq_lane_f32_f16:
7051 case NEON::BI__builtin_neon_vdotq_laneq_f32_f16: {
7052 llvm::FixedVectorType *InputTy =
7053 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
7054 llvm::FixedVectorType *LaneTy = llvm::FixedVectorType::get(
7055 ElementType: HalfTy, NumElts: Ops[2]->getType()->getPrimitiveSizeInBits() / 16);
7056 // Treat the lane argument as a splat and use non-lane version of the
7057 // intrinsic.
7058 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: LaneTy);
7059 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]),
7060 Count: InputTy->getElementCount());
7061 llvm::Type *Tys[2] = {Ty, InputTy};
7062 Ops.pop_back();
7063 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_fdot, Tys),
7064 Ops, name: "vdot");
7065 }
7066
7067 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7068 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalb,
7069 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7070 name: "vmlal");
7071 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7072 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalt,
7073 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7074 name: "vmlal");
7075 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7076 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb,
7077 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7078 name: "vmlall");
7079 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7080 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt,
7081 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7082 name: "vmlall");
7083 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7084 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb,
7085 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7086 name: "vmlall");
7087 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7088 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt,
7089 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7090 name: "vmlall");
7091 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7092 ExtendLaneArg = true;
7093 [[fallthrough]];
7094 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7095 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7096 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7097 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7098 ExtendLaneArg = true;
7099 [[fallthrough]];
7100 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7101 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7102 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7103 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7104 ExtendLaneArg = true;
7105 [[fallthrough]];
7106 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7107 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7108 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7109 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7110 ExtendLaneArg = true;
7111 [[fallthrough]];
7112 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7113 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7114 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7115 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7116 ExtendLaneArg = true;
7117 [[fallthrough]];
7118 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7119 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7120 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7121 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7122 ExtendLaneArg = true;
7123 [[fallthrough]];
7124 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7125 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7126 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7127 case NEON::BI__builtin_neon_vamin_f16:
7128 case NEON::BI__builtin_neon_vaminq_f16:
7129 case NEON::BI__builtin_neon_vamin_f32:
7130 case NEON::BI__builtin_neon_vaminq_f32:
7131 case NEON::BI__builtin_neon_vaminq_f64: {
7132 Int = Intrinsic::aarch64_neon_famin;
7133 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famin");
7134 }
7135 case NEON::BI__builtin_neon_vamax_f16:
7136 case NEON::BI__builtin_neon_vamaxq_f16:
7137 case NEON::BI__builtin_neon_vamax_f32:
7138 case NEON::BI__builtin_neon_vamaxq_f32:
7139 case NEON::BI__builtin_neon_vamaxq_f64: {
7140 Int = Intrinsic::aarch64_neon_famax;
7141 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famax");
7142 }
7143 case NEON::BI__builtin_neon_vscale_f16:
7144 case NEON::BI__builtin_neon_vscaleq_f16:
7145 case NEON::BI__builtin_neon_vscale_f32:
7146 case NEON::BI__builtin_neon_vscaleq_f32:
7147 case NEON::BI__builtin_neon_vscaleq_f64: {
7148 Int = Intrinsic::aarch64_neon_fp8_fscale;
7149 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fscale");
7150 }
7151 }
7152}
7153
7154Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
7155 const CallExpr *E) {
7156 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7157 BuiltinID == BPF::BI__builtin_btf_type_id ||
7158 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7159 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7160 "unexpected BPF builtin");
7161
7162 // A sequence number, injected into IR builtin functions, to
7163 // prevent CSE given the only difference of the function
7164 // may just be the debuginfo metadata.
7165 static uint32_t BuiltinSeqNum;
7166
7167 switch (BuiltinID) {
7168 default:
7169 llvm_unreachable("Unexpected BPF builtin");
7170 case BPF::BI__builtin_preserve_field_info: {
7171 const Expr *Arg = E->getArg(Arg: 0);
7172 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7173
7174 if (!getDebugInfo()) {
7175 CGM.Error(loc: E->getExprLoc(),
7176 error: "using __builtin_preserve_field_info() without -g");
7177 return IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7178 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7179 }
7180
7181 // Enable underlying preserve_*_access_index() generation.
7182 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7183 IsInPreservedAIRegion = true;
7184 Value *FieldAddr = IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7185 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7186 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7187
7188 ConstantInt *C = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7189 Value *InfoKind = ConstantInt::get(Ty: Int64Ty, V: C->getSExtValue());
7190
7191 // Built the IR for the preserve_field_info intrinsic.
7192 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7193 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_field_info,
7194 OverloadTys: {FieldAddr->getType()});
7195 return Builder.CreateCall(Callee: FnGetFieldInfo, Args: {FieldAddr, InfoKind});
7196 }
7197 case BPF::BI__builtin_btf_type_id:
7198 case BPF::BI__builtin_preserve_type_info: {
7199 if (!getDebugInfo()) {
7200 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7201 return nullptr;
7202 }
7203
7204 const Expr *Arg0 = E->getArg(Arg: 0);
7205 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7206 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7207
7208 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7209 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7210 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7211
7212 llvm::Function *FnDecl;
7213 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7214 FnDecl = Intrinsic::getOrInsertDeclaration(
7215 M: &CGM.getModule(), id: Intrinsic::bpf_btf_type_id, OverloadTys: {});
7216 else
7217 FnDecl = Intrinsic::getOrInsertDeclaration(
7218 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_type_info, OverloadTys: {});
7219 CallInst *Fn = Builder.CreateCall(Callee: FnDecl, Args: {SeqNumVal, FlagValue});
7220 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7221 return Fn;
7222 }
7223 case BPF::BI__builtin_preserve_enum_value: {
7224 if (!getDebugInfo()) {
7225 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7226 return nullptr;
7227 }
7228
7229 const Expr *Arg0 = E->getArg(Arg: 0);
7230 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7231 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7232
7233 // Find enumerator
7234 const auto *UO = cast<UnaryOperator>(Val: Arg0->IgnoreParens());
7235 const auto *CE = cast<CStyleCastExpr>(Val: UO->getSubExpr());
7236 const auto *DR = cast<DeclRefExpr>(Val: CE->getSubExpr());
7237 const auto *Enumerator = cast<EnumConstantDecl>(Val: DR->getDecl());
7238
7239 auto InitVal = Enumerator->getInitVal();
7240 std::string InitValStr;
7241 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7242 InitValStr = std::to_string(val: InitVal.getSExtValue());
7243 else
7244 InitValStr = std::to_string(val: InitVal.getZExtValue());
7245 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7246 Value *EnumStrVal = Builder.CreateGlobalString(Str: EnumStr);
7247
7248 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7249 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7250 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7251
7252 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7253 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_enum_value, OverloadTys: {});
7254 CallInst *Fn =
7255 Builder.CreateCall(Callee: IntrinsicFn, Args: {SeqNumVal, EnumStrVal, FlagValue});
7256 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7257 return Fn;
7258 }
7259 }
7260}
7261
7262llvm::Value *CodeGenFunction::
7263BuildVector(ArrayRef<llvm::Value*> Ops) {
7264 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7265 "Not a power-of-two sized vector!");
7266 bool AllConstants = true;
7267 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7268 AllConstants &= isa<Constant>(Val: Ops[i]);
7269
7270 // If this is a constant vector, create a ConstantVector.
7271 if (AllConstants) {
7272 SmallVector<llvm::Constant*, 16> CstOps;
7273 for (llvm::Value *Op : Ops)
7274 CstOps.push_back(Elt: cast<Constant>(Val: Op));
7275 return llvm::ConstantVector::get(V: CstOps);
7276 }
7277
7278 // Otherwise, insertelement the values to build the vector.
7279 Value *Result = llvm::PoisonValue::get(
7280 T: llvm::FixedVectorType::get(ElementType: Ops[0]->getType(), NumElts: Ops.size()));
7281
7282 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7283 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Ops[i], Idx: Builder.getInt64(C: i));
7284
7285 return Result;
7286}
7287
7288Value *CodeGenFunction::EmitAArch64CpuInit() {
7289 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
7290 llvm::FunctionCallee Func =
7291 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__init_cpu_features_resolver");
7292 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
7293 cast<llvm::GlobalValue>(Val: Func.getCallee())
7294 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7295 return Builder.CreateCall(Callee: Func);
7296}
7297
7298Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7299 const Expr *ArgExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
7300 StringRef ArgStr = cast<StringLiteral>(Val: ArgExpr)->getString();
7301 llvm::SmallVector<StringRef, 8> OrigFeatures;
7302 ArgStr.split(A&: OrigFeatures, Separator: "+");
7303 llvm::SmallVector<StringRef, 8> Features;
7304 for (StringRef Feature : OrigFeatures) {
7305 Feature = Feature.trim();
7306 if (!llvm::AArch64::parseFMVExtension(Extension: Feature))
7307 return Builder.getFalse();
7308 if (Feature != "default")
7309 Features.push_back(Elt: Feature);
7310 }
7311 return EmitAArch64CpuSupports(FeatureStrs: Features);
7312}
7313
7314llvm::Value *
7315CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
7316 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(Features: FeaturesStrs);
7317 Value *Result = Builder.getTrue();
7318 if (FeaturesMask != 0) {
7319 // Get features from structure in runtime library
7320 // struct {
7321 // unsigned long long features;
7322 // } __aarch64_cpu_features;
7323 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty);
7324 llvm::Constant *AArch64CPUFeatures =
7325 CGM.CreateRuntimeVariable(Ty: STy, Name: "__aarch64_cpu_features");
7326 cast<llvm::GlobalValue>(Val: AArch64CPUFeatures)->setDSOLocal(true);
7327 llvm::Value *CpuFeatures = Builder.CreateGEP(
7328 Ty: STy, Ptr: AArch64CPUFeatures,
7329 IdxList: {ConstantInt::get(Ty: Int32Ty, V: 0), ConstantInt::get(Ty: Int32Ty, V: 0)});
7330 Value *Features = Builder.CreateAlignedLoad(Ty: Int64Ty, Addr: CpuFeatures,
7331 Align: CharUnits::fromQuantity(Quantity: 8));
7332 Value *Mask = Builder.getInt(AI: FeaturesMask.trunc(width: 64));
7333 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
7334 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
7335 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
7336 }
7337 return Result;
7338}
7339