1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
17#include "clang/Basic/TargetBuiltins.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
342static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args);
355
356 return CGF.Builder.CreateCall(Callee: F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
366 case NeonTypeFlags::Int8:
367 case NeonTypeFlags::Poly8:
368 case NeonTypeFlags::MFloat8:
369 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: V1Ty ? 1 : (8 << IsQuad));
370 case NeonTypeFlags::Int16:
371 case NeonTypeFlags::Poly16:
372 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
373 case NeonTypeFlags::BFloat16:
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(ElementType: CGF->BFloatTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
376 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
377 case NeonTypeFlags::Float16:
378 if (HasFastHalfType)
379 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
380 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
381 case NeonTypeFlags::Int32:
382 return llvm::FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: V1Ty ? 1 : (2 << IsQuad));
383 case NeonTypeFlags::Int64:
384 case NeonTypeFlags::Poly64:
385 return llvm::FixedVectorType::get(ElementType: CGF->Int64Ty, NumElts: V1Ty ? 1 : (1 << IsQuad));
386 case NeonTypeFlags::Poly128:
387 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
388 // There is a lot of i128 and f128 API missing.
389 // so we use v16i8 to represent poly128 and get pattern matched.
390 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: 16);
391 case NeonTypeFlags::Float32:
392 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: V1Ty ? 1 : (2 << IsQuad));
393 case NeonTypeFlags::Float64:
394 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: V1Ty ? 1 : (1 << IsQuad));
395 }
396 llvm_unreachable("Unknown vector element type!");
397}
398
399static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
400 NeonTypeFlags IntTypeFlags) {
401 int IsQuad = IntTypeFlags.isQuad();
402 switch (IntTypeFlags.getEltType()) {
403 case NeonTypeFlags::Int16:
404 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: (4 << IsQuad));
405 case NeonTypeFlags::Int32:
406 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: (2 << IsQuad));
407 case NeonTypeFlags::Int64:
408 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: (1 << IsQuad));
409 default:
410 llvm_unreachable("Type can't be converted to floating-point!");
411 }
412}
413
414Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
415 const ElementCount &Count) {
416 Value *SV = llvm::ConstantVector::getSplat(EC: Count, Elt: C);
417 return Builder.CreateShuffleVector(V1: V, V2: V, Mask: SV, Name: "lane");
418}
419
420Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
421 ElementCount EC = cast<llvm::VectorType>(Val: V->getType())->getElementCount();
422 return EmitNeonSplat(V, C, Count: EC);
423}
424
425Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
426 const char *name,
427 unsigned shift, bool rightshift) {
428 unsigned j = 0;
429 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
430 ai != ae; ++ai, ++j) {
431 if (F->isConstrainedFPIntrinsic())
432 if (ai->getType()->isMetadataTy())
433 continue;
434 if (shift > 0 && shift == j)
435 Ops[j] = EmitNeonShiftVector(V: Ops[j], Ty: ai->getType(), negateForRightShift: rightshift);
436 else
437 Ops[j] = Builder.CreateBitCast(V: Ops[j], DestTy: ai->getType(), Name: name);
438 }
439
440 if (F->isConstrainedFPIntrinsic())
441 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops, Name: name);
442 return Builder.CreateCall(Callee: F, Args: Ops, Name: name);
443}
444
445Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
446 ArrayRef<llvm::Type *> Tys,
447 SmallVectorImpl<Value *> &Ops,
448 const CallExpr *E, const char *name) {
449 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
450 Args: Ops.pop_back_val());
451 return EmitNeonCall(F: CGM.getIntrinsic(IID, Tys), Ops, name);
452}
453
454llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
455 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
456 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
457
458 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
459 RetTy->getPrimitiveSizeInBits();
460 llvm::Type *Tys[] = {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount),
461 Ops[1]->getType()};
462 if (ExtendLaneArg) {
463 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
464 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
465 Idx: uint64_t(0));
466 }
467 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
468}
469
470llvm::Value *CodeGenFunction::EmitFP8NeonFMLACall(
471 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
472 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
473
474 if (ExtendLaneArg) {
475 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
476 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
477 Idx: uint64_t(0));
478 }
479 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
480 RetTy->getPrimitiveSizeInBits();
481 return EmitFP8NeonCall(IID, Tys: {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount)},
482 Ops, E, name);
483}
484
485Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
486 bool neg) {
487 int SV = cast<ConstantInt>(Val: V)->getSExtValue();
488 return ConstantInt::getSigned(Ty, V: neg ? -SV : SV);
489}
490
491Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
492 llvm::Type *Ty1, bool Extract,
493 SmallVectorImpl<llvm::Value *> &Ops,
494 const CallExpr *E,
495 const char *name) {
496 llvm::Type *Tys[] = {Ty0, Ty1};
497 if (Extract) {
498 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
499 // the vector.
500 Tys[1] = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
501 Ops[0] = Builder.CreateExtractVector(DstType: Tys[1], SrcVec: Ops[0], Idx: uint64_t(0));
502 }
503 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
504}
505
506// Right-shift a vector by a constant.
507Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
508 llvm::Type *Ty, bool usgn,
509 const char *name) {
510 llvm::VectorType *VTy = cast<llvm::VectorType>(Val: Ty);
511
512 int ShiftAmt = cast<ConstantInt>(Val: Shift)->getSExtValue();
513 int EltSize = VTy->getScalarSizeInBits();
514
515 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty);
516
517 // lshr/ashr are undefined when the shift amount is equal to the vector
518 // element size.
519 if (ShiftAmt == EltSize) {
520 if (usgn) {
521 // Right-shifting an unsigned value by its size yields 0.
522 return llvm::ConstantAggregateZero::get(Ty: VTy);
523 } else {
524 // Right-shifting a signed value by its size is equivalent
525 // to a shift of size-1.
526 --ShiftAmt;
527 Shift = ConstantInt::get(Ty: VTy->getElementType(), V: ShiftAmt);
528 }
529 }
530
531 Shift = EmitNeonShiftVector(V: Shift, Ty, neg: false);
532 if (usgn)
533 return Builder.CreateLShr(LHS: Vec, RHS: Shift, Name: name);
534 return Builder.CreateAShr(LHS: Vec, RHS: Shift, Name: name);
535}
536
537//===----------------------------------------------------------------------===//
538// Intrinsics maps
539//
540// Maps that help automate code-generation.
541//===----------------------------------------------------------------------===//
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
547 VectorizeRetType = (1 << 3),
548 VectorizeArgTypes = (1 << 4),
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
554 Use128BitVectors = (1 << 8),
555
556 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
557 VectorRet = AddRetType | VectorizeRetType,
558 VectorRetGetArgs01 =
559 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
560 FpCmpzModifiers =
561 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
562};
563
564//===----------------------------------------------------------------------===//
565// Intrinsic maps
566//
567// Maps that help automate code-generation.
568//===----------------------------------------------------------------------===//
569
570namespace {
571struct ARMVectorIntrinsicInfo {
572 const char *NameHint;
573 unsigned BuiltinID;
574 unsigned LLVMIntrinsic;
575 unsigned AltLLVMIntrinsic;
576 uint64_t TypeModifier;
577
578 bool operator<(unsigned RHSBuiltinID) const {
579 return BuiltinID < RHSBuiltinID;
580 }
581 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
582 return BuiltinID < TE.BuiltinID;
583 }
584};
585} // end anonymous namespace
586
587#define NEONMAP0(NameBase) \
588 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
589
590#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
591 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
592 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
593
594#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
595 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
596 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
597 TypeModifier }
598
599// clang-format off
600static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
601 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
602 NEONMAP0(splat_lane_v),
603 NEONMAP0(splat_laneq_v),
604 NEONMAP0(splatq_lane_v),
605 NEONMAP0(splatq_laneq_v),
606 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
607 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
608 NEONMAP1(vabs_v, arm_neon_vabs, 0),
609 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
610 NEONMAP0(vadd_v),
611 NEONMAP0(vaddhn_v),
612 NEONMAP0(vaddq_v),
613 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
614 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
615 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
616 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
617 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
618 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
619 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
620 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
621 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
622 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
623 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
624 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
625 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
626 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
628 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
629 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
630 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
631 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
632 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
633 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
634 NEONMAP1(vcage_v, arm_neon_vacge, 0),
635 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
636 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
637 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
638 NEONMAP1(vcale_v, arm_neon_vacge, 0),
639 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
640 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
641 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
642 NEONMAP0(vceqz_v),
643 NEONMAP0(vceqzq_v),
644 NEONMAP0(vcgez_v),
645 NEONMAP0(vcgezq_v),
646 NEONMAP0(vcgtz_v),
647 NEONMAP0(vcgtzq_v),
648 NEONMAP0(vclez_v),
649 NEONMAP0(vclezq_v),
650 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
651 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
652 NEONMAP0(vcltz_v),
653 NEONMAP0(vcltzq_v),
654 NEONMAP1(vclz_v, ctlz, Add1ArgType),
655 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
656 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
657 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
658 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
659 NEONMAP0(vcvt_f16_s16),
660 NEONMAP0(vcvt_f16_u16),
661 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
662 NEONMAP0(vcvt_f32_v),
663 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
664 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
665 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
666 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
667 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
668 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
669 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
670 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
671 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
672 NEONMAP0(vcvt_s16_f16),
673 NEONMAP0(vcvt_s32_v),
674 NEONMAP0(vcvt_s64_v),
675 NEONMAP0(vcvt_u16_f16),
676 NEONMAP0(vcvt_u32_v),
677 NEONMAP0(vcvt_u64_v),
678 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
681 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
684 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
685 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
686 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
687 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
688 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
689 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
690 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
691 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
694 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
697 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
698 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
699 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
700 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
701 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
702 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
703 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
706 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
709 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
710 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
711 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
712 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
713 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
714 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
715 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
718 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
720 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
721 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
722 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
723 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
724 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
725 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
726 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
727 NEONMAP0(vcvtq_f16_s16),
728 NEONMAP0(vcvtq_f16_u16),
729 NEONMAP0(vcvtq_f32_v),
730 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
731 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
732 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
733 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
734 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
735 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
736 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
737 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
738 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
739 NEONMAP0(vcvtq_s16_f16),
740 NEONMAP0(vcvtq_s32_v),
741 NEONMAP0(vcvtq_s64_v),
742 NEONMAP0(vcvtq_u16_f16),
743 NEONMAP0(vcvtq_u32_v),
744 NEONMAP0(vcvtq_u64_v),
745 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
746 NEONMAP1(vdot_u32, arm_neon_udot, 0),
747 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
748 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
749 NEONMAP0(vext_v),
750 NEONMAP0(vextq_v),
751 NEONMAP0(vfma_v),
752 NEONMAP0(vfmaq_v),
753 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
754 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
755 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
756 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
757 NEONMAP0(vld1_dup_v),
758 NEONMAP1(vld1_v, arm_neon_vld1, 0),
759 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
760 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
761 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
762 NEONMAP0(vld1q_dup_v),
763 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
764 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
765 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
766 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
767 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
768 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
769 NEONMAP1(vld2_v, arm_neon_vld2, 0),
770 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
771 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
772 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
773 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
774 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
775 NEONMAP1(vld3_v, arm_neon_vld3, 0),
776 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
777 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
778 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
779 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
780 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
781 NEONMAP1(vld4_v, arm_neon_vld4, 0),
782 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
783 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
784 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
785 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
787 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
788 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
789 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
790 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
791 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
792 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
793 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
794 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
795 NEONMAP0(vmovl_v),
796 NEONMAP0(vmovn_v),
797 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
798 NEONMAP0(vmull_v),
799 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
800 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
801 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
802 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
803 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
804 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
805 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
806 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
807 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
808 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
809 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
810 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
811 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
812 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
813 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
814 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
815 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
816 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
817 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
818 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
819 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
820 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
821 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
822 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
823 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
824 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
825 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
826 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
827 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
828 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
829 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
830 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
831 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
832 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
834 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
835 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
836 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
837 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
838 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
839 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
842 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
843 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
844 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
845 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
846 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
847 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
848 NEONMAP1(vrnd_v, trunc, Add1ArgType),
849 NEONMAP1(vrnda_v, round, Add1ArgType),
850 NEONMAP1(vrndaq_v, round, Add1ArgType),
851 NEONMAP0(vrndi_v),
852 NEONMAP0(vrndiq_v),
853 NEONMAP1(vrndm_v, floor, Add1ArgType),
854 NEONMAP1(vrndmq_v, floor, Add1ArgType),
855 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
856 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
857 NEONMAP1(vrndp_v, ceil, Add1ArgType),
858 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
859 NEONMAP1(vrndq_v, trunc, Add1ArgType),
860 NEONMAP1(vrndx_v, rint, Add1ArgType),
861 NEONMAP1(vrndxq_v, rint, Add1ArgType),
862 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
863 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
864 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
865 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
866 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
867 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
868 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
869 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
870 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
871 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
872 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
873 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
874 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
875 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
876 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
877 NEONMAP0(vshl_n_v),
878 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
879 NEONMAP0(vshll_n_v),
880 NEONMAP0(vshlq_n_v),
881 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
882 NEONMAP0(vshr_n_v),
883 NEONMAP0(vshrn_n_v),
884 NEONMAP0(vshrq_n_v),
885 NEONMAP1(vst1_v, arm_neon_vst1, 0),
886 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
887 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
888 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
889 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
890 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
891 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
892 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
893 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
894 NEONMAP1(vst2_v, arm_neon_vst2, 0),
895 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
896 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
897 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
898 NEONMAP1(vst3_v, arm_neon_vst3, 0),
899 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
900 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
901 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
902 NEONMAP1(vst4_v, arm_neon_vst4, 0),
903 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
904 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
905 NEONMAP0(vsubhn_v),
906 NEONMAP0(vtrn_v),
907 NEONMAP0(vtrnq_v),
908 NEONMAP0(vtst_v),
909 NEONMAP0(vtstq_v),
910 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
911 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
912 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
913 NEONMAP0(vuzp_v),
914 NEONMAP0(vuzpq_v),
915 NEONMAP0(vzip_v),
916 NEONMAP0(vzipq_v)
917};
918
919static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
920 NEONMAP0(splat_lane_v),
921 NEONMAP0(splat_laneq_v),
922 NEONMAP0(splatq_lane_v),
923 NEONMAP0(splatq_laneq_v),
924 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
925 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
926 NEONMAP0(vadd_v),
927 NEONMAP0(vaddhn_v),
928 NEONMAP0(vaddq_v),
929 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
930 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
931 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
932 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
933 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
936 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
937 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
938 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
939 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
940 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
941 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
942 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
943 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
944 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
945 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
946 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
948 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
951 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
952 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
953 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
954 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
955 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
956 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
957 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
958 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
959 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
960 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
961 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
962 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
963 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
964 NEONMAP0(vceqz_v),
965 NEONMAP0(vceqzq_v),
966 NEONMAP0(vcgez_v),
967 NEONMAP0(vcgezq_v),
968 NEONMAP0(vcgtz_v),
969 NEONMAP0(vcgtzq_v),
970 NEONMAP0(vclez_v),
971 NEONMAP0(vclezq_v),
972 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
973 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
974 NEONMAP0(vcltz_v),
975 NEONMAP0(vcltzq_v),
976 NEONMAP1(vclz_v, ctlz, Add1ArgType),
977 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
978 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
979 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
980 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
981 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
982 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
983 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
984 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
985 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
986 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
987 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
988 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
989 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
990 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
991 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
992 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
993 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
994 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
995 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
996 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
997 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
998 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
999 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
1000 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
1001 NEONMAP0(vcvt_f16_s16),
1002 NEONMAP0(vcvt_f16_u16),
1003 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
1004 NEONMAP0(vcvt_f32_v),
1005 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1006 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1007 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1008 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1009 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1010 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1011 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1012 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1013 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1014 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1015 NEONMAP0(vcvtq_f16_s16),
1016 NEONMAP0(vcvtq_f16_u16),
1017 NEONMAP0(vcvtq_f32_v),
1018 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1019 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1020 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1021 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1022 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1023 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1024 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1025 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1026 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1027 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1028 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1029 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1030 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1031 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1032 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1033 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1038 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1039 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1040 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1041 NEONMAP0(vext_v),
1042 NEONMAP0(vextq_v),
1043 NEONMAP0(vfma_v),
1044 NEONMAP0(vfmaq_v),
1045 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1046 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1047 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1048 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1049 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1050 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1051 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1052 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1053 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1054 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1055 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1056 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1057 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1058 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1059 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1060 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1061 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1062 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1063 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1064 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1065 NEONMAP0(vmovl_v),
1066 NEONMAP0(vmovn_v),
1067 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1068 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1069 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1070 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1071 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1072 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1073 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1074 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1075 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1076 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1077 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1078 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1079 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1080 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1081 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1082 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1083 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1084 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1085 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1086 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1087 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1088 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1089 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1090 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1091 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1092 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1093 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1094 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1095 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1096 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1097 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1098 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1099 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1100 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1101 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1102 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1103 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1104 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1105 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1107 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1108 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1109 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1111 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1112 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1113 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1114 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1115 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1116 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1117 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1118 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1119 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1120 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1121 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1122 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1123 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1124 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1125 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1126 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1127 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1128 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1129 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1130 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1131 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1132 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1133 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1134 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1135 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1136 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1137 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1138 NEONMAP0(vrndi_v),
1139 NEONMAP0(vrndiq_v),
1140 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1141 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1142 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1143 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1144 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1145 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1146 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1147 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1148 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1149 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1150 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1151 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1152 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1153 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1154 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1155 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1156 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1157 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1158 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1159 NEONMAP0(vshl_n_v),
1160 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1161 NEONMAP0(vshll_n_v),
1162 NEONMAP0(vshlq_n_v),
1163 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1164 NEONMAP0(vshr_n_v),
1165 NEONMAP0(vshrn_n_v),
1166 NEONMAP0(vshrq_n_v),
1167 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1168 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1169 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1170 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1171 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1172 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1173 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1174 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1175 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1176 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1177 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1178 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1179 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1180 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1181 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1182 NEONMAP0(vsubhn_v),
1183 NEONMAP0(vtst_v),
1184 NEONMAP0(vtstq_v),
1185 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1186 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1187 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1188 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1189};
1190
1191// Single-Instruction-Single-Data (SISD) intrinsics.
1192//
1193// The name is somewhat misleading: not all intrinsics in this table are
1194// strictly SISD. While many builtins operate on scalars,
1195// * some take vector operands (e.g. reduction builtins such as
1196// `vminvq_u16` and `vaddvq_s32`), and
1197// * some take both scalar and vector operands (e.g. crypto builtins
1198// such as `vsha1cq_u32`).
1199//
1200// TODO: Either rename this table to better reflect its contents, or
1201// restrict it to true SISD intrinsics only.
1202static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1203 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1204 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1205 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1206 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1207 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1208 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1209 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1210 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1211 NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
1212 NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
1213 NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
1214 NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
1215 NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
1216 NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
1217 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1218 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1219 NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
1220 NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
1221 NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
1222 NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
1223 NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
1224 NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
1225 NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
1226 NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
1227 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1228 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1229 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1230 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1231 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1232 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1233 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1234 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1242 NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1243 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1244 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1245 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1246 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1247 NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1248 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1249 NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1250 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1251 NEONMAP0(vcvth_bf16_f32),
1252 NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1253 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1254 NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1255 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1256 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1257 NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1258 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1259 NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1260 NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1261 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1262 NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1263 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1264 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1265 NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1266 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1267 NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1268 NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1269 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1270 NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1271 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1272 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1273 NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1274 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1275 NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1276 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1277 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1278 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1279 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1280 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1281 NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1282 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1283 NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1284 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1285 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1286 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1287 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1288 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1289 NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
1290 NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
1291 NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
1292 NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
1293 NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
1294 NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
1295 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1296 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1297 NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1298 NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1299 NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1300 NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1301 NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1302 NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
1303 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1304 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1305 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1306 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1307 NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
1308 NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
1309 NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
1310 NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
1311 NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
1312 NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
1313 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1314 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1315 NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
1316 NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
1317 NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
1318 NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
1319 NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
1320 NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
1321 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1322 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1323 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1324 NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
1325 NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
1326 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1327 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1328 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1329 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1330 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1331 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1332 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1333 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1334 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1336 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1338 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1339 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1340 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1341 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1342 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1344 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1345 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1346 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1347 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1348 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1349 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1350 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1351 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1352 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1354 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1355 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1356 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1357 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1358 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1359 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1360 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1361 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1362 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1363 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1364 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1365 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1366 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1367 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1368 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1369 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1370 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1371 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1372 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1373 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1374 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1375 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1376 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1377 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1378 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1379 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1380 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1381 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1382 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1383 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1384 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1385 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1386 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1387 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1389 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1390 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1391 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1392 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1393 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1394 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1395 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1396 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1397 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1398 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1399 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1400 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1401 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1402 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1403 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1404 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1405 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1406 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1407 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1408 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1409 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1410 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1411 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1412 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1413 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1414 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1415 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1416 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1417 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1418 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1419 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1420 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1422 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1423 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1424 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1425 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1426 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1427 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1428 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1429 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1430 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1431 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1432 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1433 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1434 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1435 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1436 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1437 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1438 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1439 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1440 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1441 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1442 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1443 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1444 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1445 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1446 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1447 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1448 // FP16 scalar intrinisics go here.
1449 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1450 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1451 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1452 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1453 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1454 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1455 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1456 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1457 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1458 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1459 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1460 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1461 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1462 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1463 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1464 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1465 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1466 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1467 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1468 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1469 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1470 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1471 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1472 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1473 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1474 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1475 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1476 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1477 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1478 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1479 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1480 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1481 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1482 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1483};
1484// clang-format on
1485
1486// Some intrinsics are equivalent for codegen.
1487static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1488 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1489 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1490 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1491 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1492 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1493 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1494 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1495 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1496 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1497 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1498 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1499 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1500 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1501 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1502 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1503 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1504 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1505 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1506 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1507 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1508 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1509 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1510 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1511 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1512 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1513 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1514 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1515 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1516 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1517 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1518 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1519 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1520 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1521 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1522 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1523 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1524 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1525 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1526 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1527 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1528 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1529 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1530 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1531 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1532 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1533 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1534 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1535 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1536 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1537 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1538 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1539 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1540 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1541 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1542 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1543 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1544 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1545 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1546 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1547 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1548 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1549 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1550 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1551 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1552 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1553 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1554 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1555 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1556 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1557 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1558 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1559 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1560 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1561 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1562 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1563 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1564 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1565 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1566 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1567 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1568 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1569 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1570 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1571 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1572 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1573 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1574 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1575 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1576 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1577 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1578 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1579 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1580 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1581 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1582 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1583 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1584 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1585 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1586 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1587 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1588 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1589 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1590 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1591 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1592 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1593 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1594 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1595 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1596 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1597 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1598 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1599 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1600 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1601 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1602 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1603 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1604 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1605 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1606 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1607 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1608 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1609 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1610 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1611 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1612 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1613 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1614 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1615 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1616 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1617 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1618 // arbitrary one to be handled as tha canonical variation.
1619 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1620 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1621 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1622 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1623 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1624 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1625 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1626 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1627 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1628 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1629 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1630 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1631};
1632
1633#undef NEONMAP0
1634#undef NEONMAP1
1635#undef NEONMAP2
1636
1637#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1638 { \
1639 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1640 TypeModifier \
1641 }
1642
1643#define SVEMAP2(NameBase, TypeModifier) \
1644 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1645static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1646#define GET_SVE_LLVM_INTRINSIC_MAP
1647#include "clang/Basic/arm_sve_builtin_cg.inc"
1648#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1649#undef GET_SVE_LLVM_INTRINSIC_MAP
1650};
1651
1652#undef SVEMAP1
1653#undef SVEMAP2
1654
1655#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1656 { \
1657 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1658 TypeModifier \
1659 }
1660
1661#define SMEMAP2(NameBase, TypeModifier) \
1662 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1663static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1664#define GET_SME_LLVM_INTRINSIC_MAP
1665#include "clang/Basic/arm_sme_builtin_cg.inc"
1666#undef GET_SME_LLVM_INTRINSIC_MAP
1667};
1668
1669#undef SMEMAP1
1670#undef SMEMAP2
1671
1672static bool NEONSIMDIntrinsicsProvenSorted = false;
1673
1674static bool AArch64SIMDIntrinsicsProvenSorted = false;
1675static bool AArch64SISDIntrinsicsProvenSorted = false;
1676static bool AArch64SVEIntrinsicsProvenSorted = false;
1677static bool AArch64SMEIntrinsicsProvenSorted = false;
1678
1679// Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
1680// the corresponding info struct.
1681static const ARMVectorIntrinsicInfo *
1682findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
1683 unsigned BuiltinID, bool &MapProvenSorted) {
1684
1685#ifndef NDEBUG
1686 if (!MapProvenSorted) {
1687 assert(llvm::is_sorted(IntrinsicMap));
1688 MapProvenSorted = true;
1689 }
1690#endif
1691
1692 const ARMVectorIntrinsicInfo *Builtin =
1693 llvm::lower_bound(Range&: IntrinsicMap, Value&: BuiltinID);
1694
1695 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1696 return Builtin;
1697
1698 return nullptr;
1699}
1700
1701Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
1702 unsigned Modifier,
1703 llvm::Type *ArgType,
1704 const CallExpr *E) {
1705 int VectorSize = 0;
1706 if (Modifier & Use64BitVectors)
1707 VectorSize = 64;
1708 else if (Modifier & Use128BitVectors)
1709 VectorSize = 128;
1710
1711 // Return type.
1712 SmallVector<llvm::Type *, 3> Tys;
1713 if (Modifier & AddRetType) {
1714 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
1715 if (Modifier & VectorizeRetType)
1716 Ty = llvm::FixedVectorType::get(
1717 ElementType: Ty, NumElts: VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1718
1719 Tys.push_back(Elt: Ty);
1720 }
1721
1722 // Arguments.
1723 if (Modifier & VectorizeArgTypes) {
1724 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1725 ArgType = llvm::FixedVectorType::get(ElementType: ArgType, NumElts: Elts);
1726 }
1727
1728 if (Modifier & (Add1ArgType | Add2ArgTypes))
1729 Tys.push_back(Elt: ArgType);
1730
1731 if (Modifier & Add2ArgTypes)
1732 Tys.push_back(Elt: ArgType);
1733
1734 if (Modifier & InventFloatType)
1735 Tys.push_back(Elt: FloatTy);
1736
1737 return CGM.getIntrinsic(IID: IntrinsicID, Tys);
1738}
1739
1740//===----------------------------------------------------------------------===//
1741// Emit-helpers
1742//===----------------------------------------------------------------------===//
1743static Value *EmitCommonNeonSISDBuiltinExpr(
1744 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1745 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1746 assert(SISDInfo.LLVMIntrinsic && "Generic code assumes a valid intrinsic");
1747
1748 switch (SISDInfo.BuiltinID) {
1749 case NEON::BI__builtin_neon_vcled_s64:
1750 case NEON::BI__builtin_neon_vcled_u64:
1751 case NEON::BI__builtin_neon_vcles_f32:
1752 case NEON::BI__builtin_neon_vcled_f64:
1753 case NEON::BI__builtin_neon_vcltd_s64:
1754 case NEON::BI__builtin_neon_vcltd_u64:
1755 case NEON::BI__builtin_neon_vclts_f32:
1756 case NEON::BI__builtin_neon_vcltd_f64:
1757 case NEON::BI__builtin_neon_vcales_f32:
1758 case NEON::BI__builtin_neon_vcaled_f64:
1759 case NEON::BI__builtin_neon_vcalts_f32:
1760 case NEON::BI__builtin_neon_vcaltd_f64:
1761 // Only one direction of comparisons actually exist, cmle is actually a cmge
1762 // with swapped operands. The table gives us the right intrinsic but we
1763 // still need to do the swap.
1764 std::swap(a&: Ops[0], b&: Ops[1]);
1765 break;
1766 }
1767
1768 // Determine the type(s) of this overloaded AArch64 intrinsic.
1769 llvm::Type *ArgTy = CGF.ConvertType(T: E->getArg(Arg: 0)->getType());
1770 Function *F = CGF.LookupNeonLLVMIntrinsic(IntrinsicID: SISDInfo.LLVMIntrinsic,
1771 Modifier: SISDInfo.TypeModifier, ArgType: ArgTy, E);
1772
1773 int j = 0;
1774 ConstantInt *C0 = ConstantInt::get(Ty: CGF.SizeTy, V: 0);
1775 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1776 ai != ae; ++ai, ++j) {
1777 llvm::Type *ArgTy = ai->getType();
1778 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1779 ArgTy->getPrimitiveSizeInBits())
1780 continue;
1781 assert(
1782 ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy() &&
1783 "Expecting vector LLVM intrinsic type and scalar Clang builtin type!");
1784
1785 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1786 // it before inserting.
1787 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1788 V: Ops[j], DestTy: cast<llvm::VectorType>(Val: ArgTy)->getElementType());
1789 Ops[j] =
1790 CGF.Builder.CreateInsertElement(Vec: PoisonValue::get(T: ArgTy), NewElt: Ops[j], Idx: C0);
1791 }
1792
1793 Value *Result = CGF.EmitNeonCall(F, Ops, name: SISDInfo.NameHint);
1794 llvm::Type *ResultType = CGF.ConvertType(T: E->getType());
1795 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1796 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1797 return CGF.Builder.CreateExtractElement(Vec: Result, Idx: C0);
1798
1799 return CGF.Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: SISDInfo.NameHint);
1800}
1801
1802Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
1803 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1804 const char *NameHint, unsigned Modifier, const CallExpr *E,
1805 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1806 llvm::Triple::ArchType Arch) {
1807
1808 // Extract the trailing immediate argument that encodes the type discriminator
1809 // for this overloaded intrinsic.
1810 // TODO: Move to the parent code that takes care of argument processing.
1811 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
1812 std::optional<llvm::APSInt> NeonTypeConst =
1813 Arg->getIntegerConstantExpr(Ctx: getContext());
1814 if (!NeonTypeConst)
1815 return nullptr;
1816
1817 // Determine the type of this overloaded NEON intrinsic.
1818 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1819 const bool Usgn = Type.isUnsigned();
1820 const bool Quad = Type.isQuad();
1821 const bool Floating = Type.isFloatingPoint();
1822 const bool HasFastHalfType = getTarget().hasFastHalfType();
1823 const bool AllowBFloatArgsAndRet =
1824 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1825
1826 llvm::FixedVectorType *VTy =
1827 GetNeonType(CGF: this, TypeFlags: Type, HasFastHalfType, V1Ty: false, AllowBFloatArgsAndRet);
1828 llvm::Type *Ty = VTy;
1829 if (!Ty)
1830 return nullptr;
1831
1832 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1833 return Builder.getInt32(C: addr.getAlignment().getQuantity());
1834 };
1835
1836 unsigned Int = LLVMIntrinsic;
1837 if ((Modifier & UnsignedAlts) && !Usgn)
1838 Int = AltLLVMIntrinsic;
1839
1840 switch (BuiltinID) {
1841 default: break;
1842 case NEON::BI__builtin_neon_splat_lane_v:
1843 case NEON::BI__builtin_neon_splat_laneq_v:
1844 case NEON::BI__builtin_neon_splatq_lane_v:
1845 case NEON::BI__builtin_neon_splatq_laneq_v: {
1846 auto NumElements = VTy->getElementCount();
1847 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1848 NumElements = NumElements * 2;
1849 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1850 NumElements = NumElements.divideCoefficientBy(RHS: 2);
1851
1852 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1853 return EmitNeonSplat(V: Ops[0], C: cast<ConstantInt>(Val: Ops[1]), Count: NumElements);
1854 }
1855 case NEON::BI__builtin_neon_vpadd_v:
1856 case NEON::BI__builtin_neon_vpaddq_v:
1857 // We don't allow fp/int overloading of intrinsics.
1858 if (VTy->getElementType()->isFloatingPointTy() &&
1859 Int == Intrinsic::aarch64_neon_addp)
1860 Int = Intrinsic::aarch64_neon_faddp;
1861 break;
1862 case NEON::BI__builtin_neon_vabs_v:
1863 case NEON::BI__builtin_neon_vabsq_v:
1864 if (VTy->getElementType()->isFloatingPointTy())
1865 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: Ty), Ops, name: "vabs");
1866 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops, name: "vabs");
1867 case NEON::BI__builtin_neon_vadd_v:
1868 case NEON::BI__builtin_neon_vaddq_v: {
1869 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Quad ? 16 : 8);
1870 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1871 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
1872 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
1873 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1874 }
1875 case NEON::BI__builtin_neon_vaddhn_v: {
1876 llvm::FixedVectorType *SrcTy =
1877 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1878
1879 // %sum = add <4 x i32> %lhs, %rhs
1880 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1881 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
1882 Ops[0] = Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddhn");
1883
1884 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1885 Constant *ShiftAmt =
1886 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
1887 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vaddhn");
1888
1889 // %res = trunc <4 x i32> %high to <4 x i16>
1890 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vaddhn");
1891 }
1892 case NEON::BI__builtin_neon_vcale_v:
1893 case NEON::BI__builtin_neon_vcaleq_v:
1894 case NEON::BI__builtin_neon_vcalt_v:
1895 case NEON::BI__builtin_neon_vcaltq_v:
1896 std::swap(a&: Ops[0], b&: Ops[1]);
1897 [[fallthrough]];
1898 case NEON::BI__builtin_neon_vcage_v:
1899 case NEON::BI__builtin_neon_vcageq_v:
1900 case NEON::BI__builtin_neon_vcagt_v:
1901 case NEON::BI__builtin_neon_vcagtq_v: {
1902 llvm::Type *Ty;
1903 switch (VTy->getScalarSizeInBits()) {
1904 default: llvm_unreachable("unexpected type");
1905 case 32:
1906 Ty = FloatTy;
1907 break;
1908 case 64:
1909 Ty = DoubleTy;
1910 break;
1911 case 16:
1912 Ty = HalfTy;
1913 break;
1914 }
1915 auto *VecFlt = llvm::FixedVectorType::get(ElementType: Ty, NumElts: VTy->getNumElements());
1916 llvm::Type *Tys[] = { VTy, VecFlt };
1917 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1918 return EmitNeonCall(F, Ops, name: NameHint);
1919 }
1920 case NEON::BI__builtin_neon_vceqz_v:
1921 case NEON::BI__builtin_neon_vceqzq_v:
1922 return EmitAArch64CompareBuiltinExpr(
1923 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, Name: "vceqz");
1924 case NEON::BI__builtin_neon_vcgez_v:
1925 case NEON::BI__builtin_neon_vcgezq_v:
1926 return EmitAArch64CompareBuiltinExpr(
1927 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1928 Name: "vcgez");
1929 case NEON::BI__builtin_neon_vclez_v:
1930 case NEON::BI__builtin_neon_vclezq_v:
1931 return EmitAArch64CompareBuiltinExpr(
1932 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1933 Name: "vclez");
1934 case NEON::BI__builtin_neon_vcgtz_v:
1935 case NEON::BI__builtin_neon_vcgtzq_v:
1936 return EmitAArch64CompareBuiltinExpr(
1937 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1938 Name: "vcgtz");
1939 case NEON::BI__builtin_neon_vcltz_v:
1940 case NEON::BI__builtin_neon_vcltzq_v:
1941 return EmitAArch64CompareBuiltinExpr(
1942 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1943 Name: "vcltz");
1944 case NEON::BI__builtin_neon_vclz_v:
1945 case NEON::BI__builtin_neon_vclzq_v:
1946 // We generate target-independent intrinsic, which needs a second argument
1947 // for whether or not clz of zero is undefined; on ARM it isn't.
1948 Ops.push_back(Elt: Builder.getInt1(V: getTarget().isCLZForZeroUndef()));
1949 break;
1950 case NEON::BI__builtin_neon_vcvt_f32_v:
1951 case NEON::BI__builtin_neon_vcvtq_f32_v:
1952 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1953 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1954 HasFastHalfType);
1955 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1956 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1957 case NEON::BI__builtin_neon_vcvt_f16_s16:
1958 case NEON::BI__builtin_neon_vcvt_f16_u16:
1959 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1960 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1961 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1962 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1963 HasFastHalfType);
1964 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1965 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1966 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1967 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1968 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1969 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1970 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1971 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1972 return EmitNeonCall(F, Ops, name: "vcvt_n");
1973 }
1974 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1975 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1976 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1977 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1978 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1979 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1980 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1981 return EmitNeonCall(F, Ops, name: "vcvt_n");
1982 }
1983 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1984 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1985 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1986 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1987 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1988 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1989 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1990 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1991 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1992 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1993 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1994 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1995 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1996 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1997 return EmitNeonCall(F, Ops, name: "vcvt_n");
1998 }
1999 case NEON::BI__builtin_neon_vcvt_s32_v:
2000 case NEON::BI__builtin_neon_vcvt_u32_v:
2001 case NEON::BI__builtin_neon_vcvt_s64_v:
2002 case NEON::BI__builtin_neon_vcvt_u64_v:
2003 case NEON::BI__builtin_neon_vcvt_s16_f16:
2004 case NEON::BI__builtin_neon_vcvt_u16_f16:
2005 case NEON::BI__builtin_neon_vcvtq_s32_v:
2006 case NEON::BI__builtin_neon_vcvtq_u32_v:
2007 case NEON::BI__builtin_neon_vcvtq_s64_v:
2008 case NEON::BI__builtin_neon_vcvtq_u64_v:
2009 case NEON::BI__builtin_neon_vcvtq_s16_f16:
2010 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
2011 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetFloatNeonType(CGF: this, IntTypeFlags: Type));
2012 return Usgn ? Builder.CreateFPToUI(V: Ops[0], DestTy: Ty, Name: "vcvt")
2013 : Builder.CreateFPToSI(V: Ops[0], DestTy: Ty, Name: "vcvt");
2014 }
2015 case NEON::BI__builtin_neon_vcvta_s16_f16:
2016 case NEON::BI__builtin_neon_vcvta_s32_v:
2017 case NEON::BI__builtin_neon_vcvta_s64_v:
2018 case NEON::BI__builtin_neon_vcvta_u16_f16:
2019 case NEON::BI__builtin_neon_vcvta_u32_v:
2020 case NEON::BI__builtin_neon_vcvta_u64_v:
2021 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
2022 case NEON::BI__builtin_neon_vcvtaq_s32_v:
2023 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2024 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2025 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2026 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2027 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2028 case NEON::BI__builtin_neon_vcvtn_s32_v:
2029 case NEON::BI__builtin_neon_vcvtn_s64_v:
2030 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2031 case NEON::BI__builtin_neon_vcvtn_u32_v:
2032 case NEON::BI__builtin_neon_vcvtn_u64_v:
2033 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2034 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2035 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2036 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2037 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2038 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2039 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2040 case NEON::BI__builtin_neon_vcvtp_s32_v:
2041 case NEON::BI__builtin_neon_vcvtp_s64_v:
2042 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2043 case NEON::BI__builtin_neon_vcvtp_u32_v:
2044 case NEON::BI__builtin_neon_vcvtp_u64_v:
2045 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2046 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2047 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2048 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2049 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2050 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2051 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2052 case NEON::BI__builtin_neon_vcvtm_s32_v:
2053 case NEON::BI__builtin_neon_vcvtm_s64_v:
2054 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2055 case NEON::BI__builtin_neon_vcvtm_u32_v:
2056 case NEON::BI__builtin_neon_vcvtm_u64_v:
2057 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2058 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2059 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2060 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2061 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2062 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2063 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
2064 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2065 }
2066 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2067 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2068 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2069
2070 }
2071 case NEON::BI__builtin_neon_vext_v:
2072 case NEON::BI__builtin_neon_vextq_v: {
2073 int CV = cast<ConstantInt>(Val: Ops[2])->getSExtValue();
2074 SmallVector<int, 16> Indices;
2075 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2076 Indices.push_back(Elt: i+CV);
2077
2078 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2079 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2080 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1], Mask: Indices, Name: "vext");
2081 }
2082 case NEON::BI__builtin_neon_vfma_v:
2083 case NEON::BI__builtin_neon_vfmaq_v: {
2084 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2085 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2086 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2087
2088 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2089 return emitCallMaybeConstrainedFPBuiltin(
2090 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
2091 Args: {Ops[1], Ops[2], Ops[0]});
2092 }
2093 case NEON::BI__builtin_neon_vld1_v:
2094 case NEON::BI__builtin_neon_vld1q_v: {
2095 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2096 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2097 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vld1");
2098 }
2099 case NEON::BI__builtin_neon_vld1_x2_v:
2100 case NEON::BI__builtin_neon_vld1q_x2_v:
2101 case NEON::BI__builtin_neon_vld1_x3_v:
2102 case NEON::BI__builtin_neon_vld1q_x3_v:
2103 case NEON::BI__builtin_neon_vld1_x4_v:
2104 case NEON::BI__builtin_neon_vld1q_x4_v: {
2105 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2106 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2107 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld1xN");
2108 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2109 }
2110 case NEON::BI__builtin_neon_vld2_v:
2111 case NEON::BI__builtin_neon_vld2q_v:
2112 case NEON::BI__builtin_neon_vld3_v:
2113 case NEON::BI__builtin_neon_vld3q_v:
2114 case NEON::BI__builtin_neon_vld4_v:
2115 case NEON::BI__builtin_neon_vld4q_v:
2116 case NEON::BI__builtin_neon_vld2_dup_v:
2117 case NEON::BI__builtin_neon_vld2q_dup_v:
2118 case NEON::BI__builtin_neon_vld3_dup_v:
2119 case NEON::BI__builtin_neon_vld3q_dup_v:
2120 case NEON::BI__builtin_neon_vld4_dup_v:
2121 case NEON::BI__builtin_neon_vld4q_dup_v: {
2122 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2123 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2124 Value *Align = getAlignmentValue32(PtrOp1);
2125 Ops[1] = Builder.CreateCall(Callee: F, Args: {Ops[1], Align}, Name: NameHint);
2126 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2127 }
2128 case NEON::BI__builtin_neon_vld1_dup_v:
2129 case NEON::BI__builtin_neon_vld1q_dup_v: {
2130 Value *V = PoisonValue::get(T: Ty);
2131 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
2132 LoadInst *Ld = Builder.CreateLoad(Addr: PtrOp0);
2133 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
2134 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ld, Idx: CI);
2135 return EmitNeonSplat(V: Ops[0], C: CI);
2136 }
2137 case NEON::BI__builtin_neon_vld2_lane_v:
2138 case NEON::BI__builtin_neon_vld2q_lane_v:
2139 case NEON::BI__builtin_neon_vld3_lane_v:
2140 case NEON::BI__builtin_neon_vld3q_lane_v:
2141 case NEON::BI__builtin_neon_vld4_lane_v:
2142 case NEON::BI__builtin_neon_vld4q_lane_v: {
2143 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2144 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2145 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2146 Ops[I] = Builder.CreateBitCast(V: Ops[I], DestTy: Ty);
2147 Ops.push_back(Elt: getAlignmentValue32(PtrOp1));
2148 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: NameHint);
2149 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2150 }
2151 case NEON::BI__builtin_neon_vmovl_v: {
2152 llvm::FixedVectorType *DTy =
2153 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2154 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DTy);
2155 if (Usgn)
2156 return Builder.CreateZExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2157 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2158 }
2159 case NEON::BI__builtin_neon_vmovn_v: {
2160 llvm::FixedVectorType *QTy =
2161 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2162 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: QTy);
2163 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vmovn");
2164 }
2165 case NEON::BI__builtin_neon_vmull_v:
2166 // FIXME: the integer vmull operations could be emitted in terms of pure
2167 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2168 // hoisting the exts outside loops. Until global ISel comes along that can
2169 // see through such movement this leads to bad CodeGen. So we need an
2170 // intrinsic for now.
2171 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2172 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2173 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
2174 case NEON::BI__builtin_neon_vpadal_v:
2175 case NEON::BI__builtin_neon_vpadalq_v: {
2176 // The source operand type has twice as many elements of half the size.
2177 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2178 llvm::Type *EltTy =
2179 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2180 auto *NarrowTy =
2181 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2182 llvm::Type *Tys[2] = { Ty, NarrowTy };
2183 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2184 }
2185 case NEON::BI__builtin_neon_vpaddl_v:
2186 case NEON::BI__builtin_neon_vpaddlq_v: {
2187 // The source operand type has twice as many elements of half the size.
2188 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2189 llvm::Type *EltTy = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2190 auto *NarrowTy =
2191 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2192 llvm::Type *Tys[2] = { Ty, NarrowTy };
2193 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vpaddl");
2194 }
2195 case NEON::BI__builtin_neon_vqdmlal_v:
2196 case NEON::BI__builtin_neon_vqdmlsl_v: {
2197 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2198 Ops[1] =
2199 EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops&: MulOps, name: "vqdmlal");
2200 Ops.resize(N: 2);
2201 return EmitNeonCall(F: CGM.getIntrinsic(IID: AltLLVMIntrinsic, Tys: Ty), Ops, name: NameHint);
2202 }
2203 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2204 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2205 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2206 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2207 auto *RTy = cast<llvm::FixedVectorType>(Val: Ty);
2208 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2209 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2210 RTy = llvm::FixedVectorType::get(ElementType: RTy->getElementType(),
2211 NumElts: RTy->getNumElements() * 2);
2212 llvm::Type *Tys[2] = {
2213 RTy, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2214 /*isQuad*/ false))};
2215 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2216 }
2217 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2218 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2219 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2220 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2221 llvm::Type *Tys[2] = {
2222 Ty, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2223 /*isQuad*/ true))};
2224 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2225 }
2226 case NEON::BI__builtin_neon_vqshl_n_v:
2227 case NEON::BI__builtin_neon_vqshlq_n_v:
2228 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshl_n",
2229 shift: 1, rightshift: false);
2230 case NEON::BI__builtin_neon_vqshlu_n_v:
2231 case NEON::BI__builtin_neon_vqshluq_n_v:
2232 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshlu_n",
2233 shift: 1, rightshift: false);
2234 case NEON::BI__builtin_neon_vrecpe_v:
2235 case NEON::BI__builtin_neon_vrecpeq_v:
2236 case NEON::BI__builtin_neon_vrsqrte_v:
2237 case NEON::BI__builtin_neon_vrsqrteq_v:
2238 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2239 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2240 case NEON::BI__builtin_neon_vrndi_v:
2241 case NEON::BI__builtin_neon_vrndiq_v:
2242 Int = Builder.getIsFPConstrained()
2243 ? Intrinsic::experimental_constrained_nearbyint
2244 : Intrinsic::nearbyint;
2245 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2246 case NEON::BI__builtin_neon_vrshr_n_v:
2247 case NEON::BI__builtin_neon_vrshrq_n_v:
2248 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshr_n",
2249 shift: 1, rightshift: true);
2250 case NEON::BI__builtin_neon_vsha512hq_u64:
2251 case NEON::BI__builtin_neon_vsha512h2q_u64:
2252 case NEON::BI__builtin_neon_vsha512su0q_u64:
2253 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2254 Function *F = CGM.getIntrinsic(IID: Int);
2255 return EmitNeonCall(F, Ops, name: "");
2256 }
2257 case NEON::BI__builtin_neon_vshl_n_v:
2258 case NEON::BI__builtin_neon_vshlq_n_v:
2259 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty, neg: false);
2260 return Builder.CreateShl(LHS: Builder.CreateBitCast(V: Ops[0],DestTy: Ty), RHS: Ops[1],
2261 Name: "vshl_n");
2262 case NEON::BI__builtin_neon_vshll_n_v: {
2263 llvm::FixedVectorType *SrcTy =
2264 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2265 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2266 if (Usgn)
2267 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: VTy);
2268 else
2269 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: VTy);
2270 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: VTy, neg: false);
2271 return Builder.CreateShl(LHS: Ops[0], RHS: Ops[1], Name: "vshll_n");
2272 }
2273 case NEON::BI__builtin_neon_vshrn_n_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2277 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: SrcTy, neg: false);
2278 if (Usgn)
2279 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: Ops[1]);
2280 else
2281 Ops[0] = Builder.CreateAShr(LHS: Ops[0], RHS: Ops[1]);
2282 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vshrn_n");
2283 }
2284 case NEON::BI__builtin_neon_vshr_n_v:
2285 case NEON::BI__builtin_neon_vshrq_n_v:
2286 return EmitNeonRShiftImm(Vec: Ops[0], Shift: Ops[1], Ty, usgn: Usgn, name: "vshr_n");
2287 case NEON::BI__builtin_neon_vst1_v:
2288 case NEON::BI__builtin_neon_vst1q_v:
2289 case NEON::BI__builtin_neon_vst2_v:
2290 case NEON::BI__builtin_neon_vst2q_v:
2291 case NEON::BI__builtin_neon_vst3_v:
2292 case NEON::BI__builtin_neon_vst3q_v:
2293 case NEON::BI__builtin_neon_vst4_v:
2294 case NEON::BI__builtin_neon_vst4q_v:
2295 case NEON::BI__builtin_neon_vst2_lane_v:
2296 case NEON::BI__builtin_neon_vst2q_lane_v:
2297 case NEON::BI__builtin_neon_vst3_lane_v:
2298 case NEON::BI__builtin_neon_vst3q_lane_v:
2299 case NEON::BI__builtin_neon_vst4_lane_v:
2300 case NEON::BI__builtin_neon_vst4q_lane_v: {
2301 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2302 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2303 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "");
2304 }
2305 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2306 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2307 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2308 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2309 case NEON::BI__builtin_neon_vsm4eq_u32: {
2310 Function *F = CGM.getIntrinsic(IID: Int);
2311 return EmitNeonCall(F, Ops, name: "");
2312 }
2313 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2314 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2315 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2316 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2317 Function *F = CGM.getIntrinsic(IID: Int);
2318 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
2319 return EmitNeonCall(F, Ops, name: "");
2320 }
2321 case NEON::BI__builtin_neon_vst1_x2_v:
2322 case NEON::BI__builtin_neon_vst1q_x2_v:
2323 case NEON::BI__builtin_neon_vst1_x3_v:
2324 case NEON::BI__builtin_neon_vst1q_x3_v:
2325 case NEON::BI__builtin_neon_vst1_x4_v:
2326 case NEON::BI__builtin_neon_vst1q_x4_v: {
2327 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2328 // in AArch64 it comes last. We may want to stick to one or another.
2329 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2330 Arch == llvm::Triple::aarch64_32) {
2331 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2332 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
2333 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2334 }
2335 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
2336 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2337 }
2338 case NEON::BI__builtin_neon_vsubhn_v: {
2339 llvm::FixedVectorType *SrcTy =
2340 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2341
2342 // %sum = add <4 x i32> %lhs, %rhs
2343 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2344 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
2345 Ops[0] = Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubhn");
2346
2347 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2348 Constant *ShiftAmt =
2349 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
2350 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vsubhn");
2351
2352 // %res = trunc <4 x i32> %high to <4 x i16>
2353 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vsubhn");
2354 }
2355 case NEON::BI__builtin_neon_vtrn_v:
2356 case NEON::BI__builtin_neon_vtrnq_v: {
2357 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2358 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2359 Value *SV = nullptr;
2360
2361 for (unsigned vi = 0; vi != 2; ++vi) {
2362 SmallVector<int, 16> Indices;
2363 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2364 Indices.push_back(Elt: i+vi);
2365 Indices.push_back(Elt: i+e+vi);
2366 }
2367 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2368 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
2369 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2370 }
2371 return SV;
2372 }
2373 case NEON::BI__builtin_neon_vtst_v:
2374 case NEON::BI__builtin_neon_vtstq_v: {
2375 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2376 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2377 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
2378 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
2379 RHS: ConstantAggregateZero::get(Ty));
2380 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vtst");
2381 }
2382 case NEON::BI__builtin_neon_vuzp_v:
2383 case NEON::BI__builtin_neon_vuzpq_v: {
2384 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2385 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2386 Value *SV = nullptr;
2387
2388 for (unsigned vi = 0; vi != 2; ++vi) {
2389 SmallVector<int, 16> Indices;
2390 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2391 Indices.push_back(Elt: 2*i+vi);
2392
2393 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2394 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
2395 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2396 }
2397 return SV;
2398 }
2399 case NEON::BI__builtin_neon_vxarq_u64: {
2400 Function *F = CGM.getIntrinsic(IID: Int);
2401 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
2402 return EmitNeonCall(F, Ops, name: "");
2403 }
2404 case NEON::BI__builtin_neon_vzip_v:
2405 case NEON::BI__builtin_neon_vzipq_v: {
2406 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2407 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2408 Value *SV = nullptr;
2409
2410 for (unsigned vi = 0; vi != 2; ++vi) {
2411 SmallVector<int, 16> Indices;
2412 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2413 Indices.push_back(Elt: (i + vi*e) >> 1);
2414 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
2415 }
2416 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2417 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
2418 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2419 }
2420 return SV;
2421 }
2422 case NEON::BI__builtin_neon_vdot_s32:
2423 case NEON::BI__builtin_neon_vdot_u32:
2424 case NEON::BI__builtin_neon_vdotq_s32:
2425 case NEON::BI__builtin_neon_vdotq_u32: {
2426 auto *InputTy =
2427 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2428 llvm::Type *Tys[2] = { Ty, InputTy };
2429 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vdot");
2430 }
2431 case NEON::BI__builtin_neon_vfmlal_low_f16:
2432 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2433 auto *InputTy =
2434 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2435 llvm::Type *Tys[2] = { Ty, InputTy };
2436 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_low");
2437 }
2438 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2439 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2440 auto *InputTy =
2441 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2442 llvm::Type *Tys[2] = { Ty, InputTy };
2443 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_low");
2444 }
2445 case NEON::BI__builtin_neon_vfmlal_high_f16:
2446 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2447 auto *InputTy =
2448 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2449 llvm::Type *Tys[2] = { Ty, InputTy };
2450 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_high");
2451 }
2452 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2453 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2454 auto *InputTy =
2455 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2456 llvm::Type *Tys[2] = { Ty, InputTy };
2457 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_high");
2458 }
2459 case NEON::BI__builtin_neon_vmmlaq_s32:
2460 case NEON::BI__builtin_neon_vmmlaq_u32: {
2461 auto *InputTy =
2462 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2463 llvm::Type *Tys[2] = { Ty, InputTy };
2464 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vmmla");
2465 }
2466 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2467 auto *InputTy =
2468 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2469 llvm::Type *Tys[2] = { Ty, InputTy };
2470 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusmmla");
2471 }
2472 case NEON::BI__builtin_neon_vusdot_s32:
2473 case NEON::BI__builtin_neon_vusdotq_s32: {
2474 auto *InputTy =
2475 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2476 llvm::Type *Tys[2] = { Ty, InputTy };
2477 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusdot");
2478 }
2479 case NEON::BI__builtin_neon_vbfdot_f32:
2480 case NEON::BI__builtin_neon_vbfdotq_f32: {
2481 llvm::Type *InputTy =
2482 llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2483 llvm::Type *Tys[2] = { Ty, InputTy };
2484 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vbfdot");
2485 }
2486 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2487 llvm::Type *Tys[1] = { Ty };
2488 Function *F = CGM.getIntrinsic(IID: Int, Tys);
2489 return EmitNeonCall(F, Ops, name: "vcvtfp2bf");
2490 }
2491
2492 }
2493
2494 assert(Int && "Expected valid intrinsic number");
2495
2496 // Determine the type(s) of this overloaded AArch64 intrinsic.
2497 Function *F = LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: Ty, E);
2498
2499 Value *Result = EmitNeonCall(F, Ops, name: NameHint);
2500 llvm::Type *ResultType = ConvertType(T: E->getType());
2501 // AArch64 intrinsic one-element vector type cast to
2502 // scalar type expected by the builtin
2503 return Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: NameHint);
2504}
2505
2506Value *
2507CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
2508 const CmpInst::Predicate Pred,
2509 const Twine &Name) {
2510
2511 if (isa<FixedVectorType>(Val: Ty)) {
2512 // Vector types are cast to i8 vectors. Recover original type.
2513 Op = Builder.CreateBitCast(V: Op, DestTy: Ty);
2514 }
2515
2516 Constant *zero = Constant::getNullValue(Ty: Op->getType());
2517
2518 if (CmpInst::isFPPredicate(P: Pred)) {
2519 if (Pred == CmpInst::FCMP_OEQ)
2520 Op = Builder.CreateFCmp(P: Pred, LHS: Op, RHS: zero);
2521 else
2522 Op = Builder.CreateFCmpS(P: Pred, LHS: Op, RHS: zero);
2523 } else {
2524 Op = Builder.CreateICmp(P: Pred, LHS: Op, RHS: zero);
2525 }
2526
2527 llvm::Type *ResTy = Ty;
2528 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Ty))
2529 ResTy = FixedVectorType::get(
2530 ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: VTy->getScalarSizeInBits()),
2531 NumElts: VTy->getNumElements());
2532
2533 return Builder.CreateSExt(V: Op, DestTy: ResTy, Name);
2534}
2535
2536static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
2537 Value *ExtOp, Value *IndexOp,
2538 llvm::Type *ResTy, unsigned IntID,
2539 const char *Name) {
2540 SmallVector<Value *, 2> TblOps;
2541 if (ExtOp)
2542 TblOps.push_back(Elt: ExtOp);
2543
2544 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2545 SmallVector<int, 16> Indices;
2546 auto *TblTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
2547 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2548 Indices.push_back(Elt: 2*i);
2549 Indices.push_back(Elt: 2*i+1);
2550 }
2551
2552 int PairPos = 0, End = Ops.size() - 1;
2553 while (PairPos < End) {
2554 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2555 V2: Ops[PairPos+1], Mask: Indices,
2556 Name));
2557 PairPos += 2;
2558 }
2559
2560 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2561 // of the 128-bit lookup table with zero.
2562 if (PairPos == End) {
2563 Value *ZeroTbl = ConstantAggregateZero::get(Ty: TblTy);
2564 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2565 V2: ZeroTbl, Mask: Indices, Name));
2566 }
2567
2568 Function *TblF;
2569 TblOps.push_back(Elt: IndexOp);
2570 TblF = CGF.CGM.getIntrinsic(IID: IntID, Tys: ResTy);
2571
2572 return CGF.EmitNeonCall(F: TblF, Ops&: TblOps, name: Name);
2573}
2574
2575Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2576 unsigned Value;
2577 switch (BuiltinID) {
2578 default:
2579 return nullptr;
2580 case clang::ARM::BI__builtin_arm_nop:
2581 Value = 0;
2582 break;
2583 case clang::ARM::BI__builtin_arm_yield:
2584 case clang::ARM::BI__yield:
2585 Value = 1;
2586 break;
2587 case clang::ARM::BI__builtin_arm_wfe:
2588 case clang::ARM::BI__wfe:
2589 Value = 2;
2590 break;
2591 case clang::ARM::BI__builtin_arm_wfi:
2592 case clang::ARM::BI__wfi:
2593 Value = 3;
2594 break;
2595 case clang::ARM::BI__builtin_arm_sev:
2596 case clang::ARM::BI__sev:
2597 Value = 4;
2598 break;
2599 case clang::ARM::BI__builtin_arm_sevl:
2600 case clang::ARM::BI__sevl:
2601 Value = 5;
2602 break;
2603 }
2604
2605 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_hint),
2606 Args: llvm::ConstantInt::get(Ty: Int32Ty, V: Value));
2607}
2608
2609enum SpecialRegisterAccessKind {
2610 NormalRead,
2611 VolatileRead,
2612 Write,
2613};
2614
2615// Generates the IR for the read/write special register builtin,
2616// ValueType is the type of the value that is to be written or read,
2617// RegisterType is the type of the register being written to or read from.
2618static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
2619 const CallExpr *E,
2620 llvm::Type *RegisterType,
2621 llvm::Type *ValueType,
2622 SpecialRegisterAccessKind AccessKind,
2623 StringRef SysReg = "") {
2624 // write and register intrinsics only support 32, 64 and 128 bit operations.
2625 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2626 RegisterType->isIntegerTy(128)) &&
2627 "Unsupported size for register.");
2628
2629 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2630 CodeGen::CodeGenModule &CGM = CGF.CGM;
2631 LLVMContext &Context = CGM.getLLVMContext();
2632
2633 if (SysReg.empty()) {
2634 const Expr *SysRegStrExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
2635 SysReg = cast<clang::StringLiteral>(Val: SysRegStrExpr)->getString();
2636 }
2637
2638 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysReg) };
2639 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
2640 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
2641
2642 llvm::Type *Types[] = { RegisterType };
2643
2644 bool MixedTypes = RegisterType->isIntegerTy(Bitwidth: 64) && ValueType->isIntegerTy(Bitwidth: 32);
2645 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2646 && "Can't fit 64-bit value in 32-bit register");
2647
2648 if (AccessKind != Write) {
2649 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2650 llvm::Function *F = CGM.getIntrinsic(
2651 IID: AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2652 : Intrinsic::read_register,
2653 Tys: Types);
2654 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: Metadata);
2655
2656 if (MixedTypes)
2657 // Read into 64 bit register and then truncate result to 32 bit.
2658 return Builder.CreateTrunc(V: Call, DestTy: ValueType);
2659
2660 if (ValueType->isPointerTy())
2661 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2662 return Builder.CreateIntToPtr(V: Call, DestTy: ValueType);
2663
2664 return Call;
2665 }
2666
2667 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
2668 llvm::Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
2669 if (MixedTypes) {
2670 // Extend 32 bit write value to 64 bit to pass to write.
2671 ArgValue = Builder.CreateZExt(V: ArgValue, DestTy: RegisterType);
2672 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2673 }
2674
2675 if (ValueType->isPointerTy()) {
2676 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2677 ArgValue = Builder.CreatePtrToInt(V: ArgValue, DestTy: RegisterType);
2678 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2679 }
2680
2681 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2682}
2683
2684static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2685 const CallExpr *E) {
2686 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2687 CodeGen::CodeGenModule &CGM = CGF.CGM;
2688 SmallVector<llvm::Value *, 4> Ops;
2689
2690 auto getIntArg = [&](unsigned ArgNo) {
2691 Expr::EvalResult Result;
2692 if (!E->getArg(Arg: ArgNo)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2693 llvm_unreachable("Expected constant argument to range prefetch.");
2694 return Result.Val.getInt().getExtValue();
2695 };
2696
2697 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 0))); /*Addr*/
2698 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 1))); /*Access Kind*/
2699 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 2))); /*Policy*/
2700
2701 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2702 auto Length = getIntArg(3);
2703 auto Count = getIntArg(4) - 1;
2704 auto Stride = getIntArg(5);
2705 auto Distance = getIntArg(6);
2706
2707 // Map ReuseDistance given in bytes to four bits representing decreasing
2708 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2709 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2710 // over the maximum is represented by 0 (distance not known).
2711 if (Distance > 0) {
2712 Distance = llvm::Log2_32_Ceil(Value: Distance);
2713 if (Distance < 15)
2714 Distance = 15;
2715 else if (Distance > 29)
2716 Distance = 0;
2717 else
2718 Distance = 30 - Distance;
2719 }
2720
2721 uint64_t Mask22 = (1ULL << 22) - 1;
2722 uint64_t Mask16 = (1ULL << 16) - 1;
2723 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2724 ((Count & Mask16) << 22) | (Length & Mask22);
2725
2726 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Builder.getInt64Ty(), V: Metadata));
2727 } else
2728 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 3)));
2729
2730 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_range_prefetch),
2731 Args: Ops);
2732}
2733
2734/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2735/// argument that specifies the vector type. The additional argument is meant
2736/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2737/// should be kept consistent with the logic in Sema.
2738/// TODO: Make this return false for SISD builtins.
2739static bool HasExtraNeonArgument(unsigned BuiltinID) {
2740 // Required by the headers included below, but not in this particular
2741 // function.
2742 [[maybe_unused]] int PtrArgNum = -1;
2743 [[maybe_unused]] bool HasConstPtr = false;
2744
2745 // The mask encodes the type. We don't care about the actual value. Instead,
2746 // we just check whether its been set.
2747 uint64_t mask = 0;
2748 switch (BuiltinID) {
2749#define GET_NEON_OVERLOAD_CHECK
2750#include "clang/Basic/arm_fp16.inc"
2751#include "clang/Basic/arm_neon.inc"
2752#undef GET_NEON_OVERLOAD_CHECK
2753 // Non-neon builtins for controling VFP that take extra argument for
2754 // discriminating the type.
2755 case ARM::BI__builtin_arm_vcvtr_f:
2756 case ARM::BI__builtin_arm_vcvtr_d:
2757 mask = 1;
2758 }
2759
2760 if (mask)
2761 return true;
2762
2763 return false;
2764}
2765
2766Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2767 const CallExpr *E,
2768 ReturnValueSlot ReturnValue,
2769 llvm::Triple::ArchType Arch) {
2770 if (auto Hint = GetValueForARMHint(BuiltinID))
2771 return Hint;
2772
2773 if (BuiltinID == clang::ARM::BI__emit) {
2774 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2775 llvm::FunctionType *FTy =
2776 llvm::FunctionType::get(Result: VoidTy, /*Variadic=*/isVarArg: false);
2777
2778 Expr::EvalResult Result;
2779 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2780 llvm_unreachable("Sema will ensure that the parameter is constant");
2781
2782 llvm::APSInt Value = Result.Val.getInt();
2783 uint64_t ZExtValue = Value.zextOrTrunc(width: IsThumb ? 16 : 32).getZExtValue();
2784
2785 llvm::InlineAsm *Emit =
2786 IsThumb ? InlineAsm::get(Ty: FTy, AsmString: ".inst.n 0x" + utohexstr(X: ZExtValue), Constraints: "",
2787 /*hasSideEffects=*/true)
2788 : InlineAsm::get(Ty: FTy, AsmString: ".inst 0x" + utohexstr(X: ZExtValue), Constraints: "",
2789 /*hasSideEffects=*/true);
2790
2791 return Builder.CreateCall(Callee: Emit);
2792 }
2793
2794 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2795 Value *Option = EmitScalarExpr(E: E->getArg(Arg: 0));
2796 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_dbg), Args: Option);
2797 }
2798
2799 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2800 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
2801 Value *RW = EmitScalarExpr(E: E->getArg(Arg: 1));
2802 Value *IsData = EmitScalarExpr(E: E->getArg(Arg: 2));
2803
2804 // Locality is not supported on ARM target
2805 Value *Locality = llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
2806
2807 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
2808 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, IsData});
2809 }
2810
2811 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2812 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2813 return Builder.CreateCall(
2814 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
2815 }
2816
2817 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2819 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2820 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
2821 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
2822 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2823 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
2824 return Res;
2825 }
2826
2827
2828 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2829 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2830 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls), Args: Arg, Name: "cls");
2831 }
2832 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2833 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2834 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls64), Args: Arg,
2835 Name: "cls");
2836 }
2837
2838 if (BuiltinID == clang::ARM::BI__clear_cache) {
2839 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2840 const FunctionDecl *FD = E->getDirectCallee();
2841 Value *Ops[2];
2842 for (unsigned i = 0; i < 2; i++)
2843 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
2844 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
2845 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
2846 StringRef Name = FD->getName();
2847 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
2848 }
2849
2850 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2851 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2852 Function *F;
2853
2854 switch (BuiltinID) {
2855 default: llvm_unreachable("unexpected builtin");
2856 case clang::ARM::BI__builtin_arm_mcrr:
2857 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr);
2858 break;
2859 case clang::ARM::BI__builtin_arm_mcrr2:
2860 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr2);
2861 break;
2862 }
2863
2864 // MCRR{2} instruction has 5 operands but
2865 // the intrinsic has 4 because Rt and Rt2
2866 // are represented as a single unsigned 64
2867 // bit integer in the intrinsic definition
2868 // but internally it's represented as 2 32
2869 // bit integers.
2870
2871 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2872 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2873 Value *RtAndRt2 = EmitScalarExpr(E: E->getArg(Arg: 2));
2874 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 3));
2875
2876 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2877 Value *Rt = Builder.CreateTruncOrBitCast(V: RtAndRt2, DestTy: Int32Ty);
2878 Value *Rt2 = Builder.CreateLShr(LHS: RtAndRt2, RHS: C1);
2879 Rt2 = Builder.CreateTruncOrBitCast(V: Rt2, DestTy: Int32Ty);
2880
2881 return Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, Rt, Rt2, CRm});
2882 }
2883
2884 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2885 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2886 Function *F;
2887
2888 switch (BuiltinID) {
2889 default: llvm_unreachable("unexpected builtin");
2890 case clang::ARM::BI__builtin_arm_mrrc:
2891 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc);
2892 break;
2893 case clang::ARM::BI__builtin_arm_mrrc2:
2894 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc2);
2895 break;
2896 }
2897
2898 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2899 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2900 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 2));
2901 Value *RtAndRt2 = Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, CRm});
2902
2903 // Returns an unsigned 64 bit integer, represented
2904 // as two 32 bit integers.
2905
2906 Value *Rt = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 1);
2907 Value *Rt1 = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 0);
2908 Rt = Builder.CreateZExt(V: Rt, DestTy: Int64Ty);
2909 Rt1 = Builder.CreateZExt(V: Rt1, DestTy: Int64Ty);
2910
2911 Value *ShiftCast = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2912 RtAndRt2 = Builder.CreateShl(LHS: Rt, RHS: ShiftCast, Name: "shl", HasNUW: true);
2913 RtAndRt2 = Builder.CreateOr(LHS: RtAndRt2, RHS: Rt1);
2914
2915 return Builder.CreateBitCast(V: RtAndRt2, DestTy: ConvertType(T: E->getType()));
2916 }
2917
2918 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2919 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2920 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2921 getContext().getTypeSize(T: E->getType()) == 64) ||
2922 BuiltinID == clang::ARM::BI__ldrexd) {
2923 Function *F;
2924
2925 switch (BuiltinID) {
2926 default: llvm_unreachable("unexpected builtin");
2927 case clang::ARM::BI__builtin_arm_ldaex:
2928 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldaexd);
2929 break;
2930 case clang::ARM::BI__builtin_arm_ldrexd:
2931 case clang::ARM::BI__builtin_arm_ldrex:
2932 case clang::ARM::BI__ldrexd:
2933 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldrexd);
2934 break;
2935 }
2936
2937 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
2938 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldrexd");
2939
2940 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2941 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2942 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int64Ty);
2943 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int64Ty);
2944
2945 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2946 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
2947 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
2948 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
2949 }
2950
2951 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2952 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2953 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
2954
2955 QualType Ty = E->getType();
2956 llvm::Type *RealResTy = ConvertType(T: Ty);
2957 llvm::Type *IntTy =
2958 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2959
2960 Function *F = CGM.getIntrinsic(
2961 IID: BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2962 : Intrinsic::arm_ldrex,
2963 Tys: DefaultPtrTy);
2964 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldrex");
2965 Val->addParamAttr(
2966 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
2967
2968 if (RealResTy->isPointerTy())
2969 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
2970 else {
2971 llvm::Type *IntResTy = llvm::IntegerType::get(
2972 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
2973 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
2974 DestTy: RealResTy);
2975 }
2976 }
2977
2978 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2979 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2981 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 64)) {
2982 Function *F = CGM.getIntrinsic(
2983 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2984 : Intrinsic::arm_strexd);
2985 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty);
2986
2987 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
2988 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
2989 Builder.CreateStore(Val, Addr: Tmp);
2990
2991 Address LdPtr = Tmp.withElementType(ElemTy: STy);
2992 Val = Builder.CreateLoad(Addr: LdPtr);
2993
2994 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2995 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2996 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
2997 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "strexd");
2998 }
2999
3000 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
3001 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
3002 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
3003 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
3004
3005 QualType Ty = E->getArg(Arg: 0)->getType();
3006 llvm::Type *StoreTy =
3007 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
3008
3009 if (StoreVal->getType()->isPointerTy())
3010 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int32Ty);
3011 else {
3012 llvm::Type *IntTy = llvm::IntegerType::get(
3013 C&: getLLVMContext(),
3014 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
3015 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
3016 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int32Ty);
3017 }
3018
3019 Function *F = CGM.getIntrinsic(
3020 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
3021 : Intrinsic::arm_strex,
3022 Tys: StoreAddr->getType());
3023
3024 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "strex");
3025 CI->addParamAttr(
3026 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
3027 return CI;
3028 }
3029
3030 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
3031 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_clrex);
3032 return Builder.CreateCall(Callee: F);
3033 }
3034
3035 // CRC32
3036 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3037 switch (BuiltinID) {
3038 case clang::ARM::BI__builtin_arm_crc32b:
3039 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3040 case clang::ARM::BI__builtin_arm_crc32cb:
3041 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3042 case clang::ARM::BI__builtin_arm_crc32h:
3043 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3044 case clang::ARM::BI__builtin_arm_crc32ch:
3045 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3046 case clang::ARM::BI__builtin_arm_crc32w:
3047 case clang::ARM::BI__builtin_arm_crc32d:
3048 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3049 case clang::ARM::BI__builtin_arm_crc32cw:
3050 case clang::ARM::BI__builtin_arm_crc32cd:
3051 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3052 }
3053
3054 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3055 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
3056 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
3057
3058 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
3059 // intrinsics, hence we need different codegen for these cases.
3060 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3061 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3062 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
3063 Value *Arg1a = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: Int32Ty);
3064 Value *Arg1b = Builder.CreateLShr(LHS: Arg1, RHS: C1);
3065 Arg1b = Builder.CreateTruncOrBitCast(V: Arg1b, DestTy: Int32Ty);
3066
3067 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
3068 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg0, Arg1a});
3069 return Builder.CreateCall(Callee: F, Args: {Res, Arg1b});
3070 } else {
3071 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: Int32Ty);
3072
3073 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
3074 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
3075 }
3076 }
3077
3078 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3079 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3080 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3081 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3082 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3083 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3084
3085 SpecialRegisterAccessKind AccessKind = Write;
3086 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3087 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3088 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3089 AccessKind = VolatileRead;
3090
3091 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3092 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3093
3094 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3095 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3096
3097 llvm::Type *ValueType;
3098 llvm::Type *RegisterType;
3099 if (IsPointerBuiltin) {
3100 ValueType = VoidPtrTy;
3101 RegisterType = Int32Ty;
3102 } else if (Is64Bit) {
3103 ValueType = RegisterType = Int64Ty;
3104 } else {
3105 ValueType = RegisterType = Int32Ty;
3106 }
3107
3108 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
3109 AccessKind);
3110 }
3111
3112 if (BuiltinID == ARM::BI__builtin_sponentry) {
3113 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
3114 return Builder.CreateCall(Callee: F);
3115 }
3116
3117 // Handle MSVC intrinsics before argument evaluation to prevent double
3118 // evaluation.
3119 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3120 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
3121
3122 // Deal with MVE builtins
3123 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3124 return Result;
3125 // Handle CDE builtins
3126 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3127 return Result;
3128
3129 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3130 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
3131 return P.first == BuiltinID;
3132 });
3133 if (It != end(arr: NEONEquivalentIntrinsicMap))
3134 BuiltinID = It->second;
3135
3136 // Find out if any arguments are required to be integer constant
3137 // expressions.
3138 unsigned ICEArguments = 0;
3139 ASTContext::GetBuiltinTypeError Error;
3140 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
3141 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3142
3143 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3144 return Builder.getInt32(C: addr.getAlignment().getQuantity());
3145 };
3146
3147 Address PtrOp0 = Address::invalid();
3148 Address PtrOp1 = Address::invalid();
3149 SmallVector<Value*, 4> Ops;
3150 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3151 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3152 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3153 if (i == 0) {
3154 switch (BuiltinID) {
3155 case NEON::BI__builtin_neon_vld1_v:
3156 case NEON::BI__builtin_neon_vld1q_v:
3157 case NEON::BI__builtin_neon_vld1q_lane_v:
3158 case NEON::BI__builtin_neon_vld1_lane_v:
3159 case NEON::BI__builtin_neon_vld1_dup_v:
3160 case NEON::BI__builtin_neon_vld1q_dup_v:
3161 case NEON::BI__builtin_neon_vst1_v:
3162 case NEON::BI__builtin_neon_vst1q_v:
3163 case NEON::BI__builtin_neon_vst1q_lane_v:
3164 case NEON::BI__builtin_neon_vst1_lane_v:
3165 case NEON::BI__builtin_neon_vst2_v:
3166 case NEON::BI__builtin_neon_vst2q_v:
3167 case NEON::BI__builtin_neon_vst2_lane_v:
3168 case NEON::BI__builtin_neon_vst2q_lane_v:
3169 case NEON::BI__builtin_neon_vst3_v:
3170 case NEON::BI__builtin_neon_vst3q_v:
3171 case NEON::BI__builtin_neon_vst3_lane_v:
3172 case NEON::BI__builtin_neon_vst3q_lane_v:
3173 case NEON::BI__builtin_neon_vst4_v:
3174 case NEON::BI__builtin_neon_vst4q_v:
3175 case NEON::BI__builtin_neon_vst4_lane_v:
3176 case NEON::BI__builtin_neon_vst4q_lane_v:
3177 // Get the alignment for the argument in addition to the value;
3178 // we'll use it later.
3179 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
3180 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
3181 continue;
3182 }
3183 }
3184 if (i == 1) {
3185 switch (BuiltinID) {
3186 case NEON::BI__builtin_neon_vld2_v:
3187 case NEON::BI__builtin_neon_vld2q_v:
3188 case NEON::BI__builtin_neon_vld3_v:
3189 case NEON::BI__builtin_neon_vld3q_v:
3190 case NEON::BI__builtin_neon_vld4_v:
3191 case NEON::BI__builtin_neon_vld4q_v:
3192 case NEON::BI__builtin_neon_vld2_lane_v:
3193 case NEON::BI__builtin_neon_vld2q_lane_v:
3194 case NEON::BI__builtin_neon_vld3_lane_v:
3195 case NEON::BI__builtin_neon_vld3q_lane_v:
3196 case NEON::BI__builtin_neon_vld4_lane_v:
3197 case NEON::BI__builtin_neon_vld4q_lane_v:
3198 case NEON::BI__builtin_neon_vld2_dup_v:
3199 case NEON::BI__builtin_neon_vld2q_dup_v:
3200 case NEON::BI__builtin_neon_vld3_dup_v:
3201 case NEON::BI__builtin_neon_vld3q_dup_v:
3202 case NEON::BI__builtin_neon_vld4_dup_v:
3203 case NEON::BI__builtin_neon_vld4q_dup_v:
3204 // Get the alignment for the argument in addition to the value;
3205 // we'll use it later.
3206 PtrOp1 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
3207 Ops.push_back(Elt: PtrOp1.emitRawPointer(CGF&: *this));
3208 continue;
3209 }
3210 }
3211
3212 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
3213 }
3214
3215 switch (BuiltinID) {
3216 default: break;
3217
3218 case NEON::BI__builtin_neon_vget_lane_i8:
3219 case NEON::BI__builtin_neon_vget_lane_i16:
3220 case NEON::BI__builtin_neon_vget_lane_i32:
3221 case NEON::BI__builtin_neon_vget_lane_i64:
3222 case NEON::BI__builtin_neon_vget_lane_bf16:
3223 case NEON::BI__builtin_neon_vget_lane_f32:
3224 case NEON::BI__builtin_neon_vgetq_lane_i8:
3225 case NEON::BI__builtin_neon_vgetq_lane_i16:
3226 case NEON::BI__builtin_neon_vgetq_lane_i32:
3227 case NEON::BI__builtin_neon_vgetq_lane_i64:
3228 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3229 case NEON::BI__builtin_neon_vgetq_lane_f32:
3230 case NEON::BI__builtin_neon_vduph_lane_bf16:
3231 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3232 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
3233
3234 case NEON::BI__builtin_neon_vrndns_f32: {
3235 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
3236 llvm::Type *Tys[] = {Arg->getType()};
3237 Function *F = CGM.getIntrinsic(IID: Intrinsic::roundeven, Tys);
3238 return Builder.CreateCall(Callee: F, Args: {Arg}, Name: "vrndn"); }
3239
3240 case NEON::BI__builtin_neon_vset_lane_i8:
3241 case NEON::BI__builtin_neon_vset_lane_i16:
3242 case NEON::BI__builtin_neon_vset_lane_i32:
3243 case NEON::BI__builtin_neon_vset_lane_i64:
3244 case NEON::BI__builtin_neon_vset_lane_bf16:
3245 case NEON::BI__builtin_neon_vset_lane_f32:
3246 case NEON::BI__builtin_neon_vsetq_lane_i8:
3247 case NEON::BI__builtin_neon_vsetq_lane_i16:
3248 case NEON::BI__builtin_neon_vsetq_lane_i32:
3249 case NEON::BI__builtin_neon_vsetq_lane_i64:
3250 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3251 case NEON::BI__builtin_neon_vsetq_lane_f32:
3252 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
3253
3254 case NEON::BI__builtin_neon_vsha1h_u32:
3255 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1h), Ops,
3256 name: "vsha1h");
3257 case NEON::BI__builtin_neon_vsha1cq_u32:
3258 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1c), Ops,
3259 name: "vsha1h");
3260 case NEON::BI__builtin_neon_vsha1pq_u32:
3261 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1p), Ops,
3262 name: "vsha1h");
3263 case NEON::BI__builtin_neon_vsha1mq_u32:
3264 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1m), Ops,
3265 name: "vsha1h");
3266
3267 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3268 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3269 name: "vcvtbfp2bf");
3270 }
3271
3272 // The ARM _MoveToCoprocessor builtins put the input register value as
3273 // the first argument, but the LLVM intrinsic expects it as the third one.
3274 case clang::ARM::BI_MoveToCoprocessor:
3275 case clang::ARM::BI_MoveToCoprocessor2: {
3276 Function *F = CGM.getIntrinsic(IID: BuiltinID == clang::ARM::BI_MoveToCoprocessor
3277 ? Intrinsic::arm_mcr
3278 : Intrinsic::arm_mcr2);
3279 return Builder.CreateCall(Callee: F, Args: {Ops[1], Ops[2], Ops[0],
3280 Ops[3], Ops[4], Ops[5]});
3281 }
3282 }
3283
3284 // Get the last argument, which specifies the vector type.
3285 assert(HasExtraArg);
3286 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
3287 std::optional<llvm::APSInt> Result =
3288 Arg->getIntegerConstantExpr(Ctx: getContext());
3289 if (!Result)
3290 return nullptr;
3291
3292 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3293 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3294 // Determine the overloaded type of this builtin.
3295 llvm::Type *Ty;
3296 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3297 Ty = FloatTy;
3298 else
3299 Ty = DoubleTy;
3300
3301 // Determine whether this is an unsigned conversion or not.
3302 bool usgn = Result->getZExtValue() == 1;
3303 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3304
3305 // Call the appropriate intrinsic.
3306 Function *F = CGM.getIntrinsic(IID: Int, Tys: Ty);
3307 return Builder.CreateCall(Callee: F, Args: Ops, Name: "vcvtr");
3308 }
3309
3310 // Determine the type of this overloaded NEON intrinsic.
3311 NeonTypeFlags Type = Result->getZExtValue();
3312 bool usgn = Type.isUnsigned();
3313 bool rightShift = false;
3314
3315 llvm::FixedVectorType *VTy =
3316 GetNeonType(CGF: this, TypeFlags: Type, HasFastHalfType: getTarget().hasFastHalfType(), V1Ty: false,
3317 AllowBFloatArgsAndRet: getTarget().hasBFloat16Type());
3318 llvm::Type *Ty = VTy;
3319 if (!Ty)
3320 return nullptr;
3321
3322 // Many NEON builtins have identical semantics and uses in ARM and
3323 // AArch64. Emit these in a single function.
3324 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3325 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3326 IntrinsicMap, BuiltinID, MapProvenSorted&: NEONSIMDIntrinsicsProvenSorted);
3327 if (Builtin)
3328 return EmitCommonNeonBuiltinExpr(
3329 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
3330 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3331
3332 unsigned Int;
3333 switch (BuiltinID) {
3334 default: return nullptr;
3335 case NEON::BI__builtin_neon_vld1q_lane_v:
3336 // Handle 64-bit integer elements as a special case. Use shuffles of
3337 // one-element vectors to avoid poor code for i64 in the backend.
3338 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3339 // Extract the other lane.
3340 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3341 int Lane = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
3342 Value *SV = llvm::ConstantVector::get(V: ConstantInt::get(Ty: Int32Ty, V: 1-Lane));
3343 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3344 // Load the value as a one-element vector.
3345 Ty = llvm::FixedVectorType::get(ElementType: VTy->getElementType(), NumElts: 1);
3346 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3347 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_neon_vld1, Tys);
3348 Value *Align = getAlignmentValue32(PtrOp0);
3349 Value *Ld = Builder.CreateCall(Callee: F, Args: {Ops[0], Align});
3350 // Combine them.
3351 int Indices[] = {1 - Lane, Lane};
3352 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ld, Mask: Indices, Name: "vld1q_lane");
3353 }
3354 [[fallthrough]];
3355 case NEON::BI__builtin_neon_vld1_lane_v: {
3356 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3357 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
3358 Value *Ld = Builder.CreateLoad(Addr: PtrOp0);
3359 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ld, Idx: Ops[2], Name: "vld1_lane");
3360 }
3361 case NEON::BI__builtin_neon_vqrshrn_n_v:
3362 Int =
3363 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3364 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n",
3365 shift: 1, rightshift: true);
3366 case NEON::BI__builtin_neon_vqrshrun_n_v:
3367 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqrshiftnsu, Tys: Ty),
3368 Ops, name: "vqrshrun_n", shift: 1, rightshift: true);
3369 case NEON::BI__builtin_neon_vqshrn_n_v:
3370 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3371 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n",
3372 shift: 1, rightshift: true);
3373 case NEON::BI__builtin_neon_vqshrun_n_v:
3374 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqshiftnsu, Tys: Ty),
3375 Ops, name: "vqshrun_n", shift: 1, rightshift: true);
3376 case NEON::BI__builtin_neon_vrecpe_v:
3377 case NEON::BI__builtin_neon_vrecpeq_v:
3378 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrecpe, Tys: Ty),
3379 Ops, name: "vrecpe");
3380 case NEON::BI__builtin_neon_vrshrn_n_v:
3381 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrshiftn, Tys: Ty),
3382 Ops, name: "vrshrn_n", shift: 1, rightshift: true);
3383 case NEON::BI__builtin_neon_vrsra_n_v:
3384 case NEON::BI__builtin_neon_vrsraq_n_v:
3385 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3386 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3387 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: true);
3388 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3389 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Ty), Args: {Ops[1], Ops[2]});
3390 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vrsra_n");
3391 case NEON::BI__builtin_neon_vsri_n_v:
3392 case NEON::BI__builtin_neon_vsriq_n_v:
3393 rightShift = true;
3394 [[fallthrough]];
3395 case NEON::BI__builtin_neon_vsli_n_v:
3396 case NEON::BI__builtin_neon_vsliq_n_v:
3397 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: rightShift);
3398 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vshiftins, Tys: Ty),
3399 Ops, name: "vsli_n");
3400 case NEON::BI__builtin_neon_vsra_n_v:
3401 case NEON::BI__builtin_neon_vsraq_n_v:
3402 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3403 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
3404 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
3405 case NEON::BI__builtin_neon_vst1q_lane_v:
3406 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3407 // a one-element vector and avoid poor code for i64 in the backend.
3408 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3409 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3410 Value *SV = llvm::ConstantVector::get(V: cast<llvm::Constant>(Val: Ops[2]));
3411 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3412 Ops[2] = getAlignmentValue32(PtrOp0);
3413 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3414 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vst1,
3415 Tys), Args: Ops);
3416 }
3417 [[fallthrough]];
3418 case NEON::BI__builtin_neon_vst1_lane_v: {
3419 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3420 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
3421 return Builder.CreateStore(Val: Ops[1],
3422 Addr: PtrOp0.withElementType(ElemTy: Ops[1]->getType()));
3423 }
3424 case NEON::BI__builtin_neon_vtbl1_v:
3425 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl1),
3426 Ops, name: "vtbl1");
3427 case NEON::BI__builtin_neon_vtbl2_v:
3428 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl2),
3429 Ops, name: "vtbl2");
3430 case NEON::BI__builtin_neon_vtbl3_v:
3431 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl3),
3432 Ops, name: "vtbl3");
3433 case NEON::BI__builtin_neon_vtbl4_v:
3434 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl4),
3435 Ops, name: "vtbl4");
3436 case NEON::BI__builtin_neon_vtbx1_v:
3437 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx1),
3438 Ops, name: "vtbx1");
3439 case NEON::BI__builtin_neon_vtbx2_v:
3440 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx2),
3441 Ops, name: "vtbx2");
3442 case NEON::BI__builtin_neon_vtbx3_v:
3443 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx3),
3444 Ops, name: "vtbx3");
3445 case NEON::BI__builtin_neon_vtbx4_v:
3446 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx4),
3447 Ops, name: "vtbx4");
3448 }
3449}
3450
3451template<typename Integer>
3452static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
3453 return E->getIntegerConstantExpr(Ctx: Context)->getExtValue();
3454}
3455
3456static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3457 llvm::Type *T, bool Unsigned) {
3458 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3459 // which finds it convenient to specify signed/unsigned as a boolean flag.
3460 return Unsigned ? Builder.CreateZExt(V, DestTy: T) : Builder.CreateSExt(V, DestTy: T);
3461}
3462
3463static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3464 uint32_t Shift, bool Unsigned) {
3465 // MVE helper function for integer shift right. This must handle signed vs
3466 // unsigned, and also deal specially with the case where the shift count is
3467 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3468 // undefined behavior, but in MVE it's legal, so we must convert it to code
3469 // that is not undefined in IR.
3470 unsigned LaneBits = cast<llvm::VectorType>(Val: V->getType())
3471 ->getElementType()
3472 ->getPrimitiveSizeInBits();
3473 if (Shift == LaneBits) {
3474 // An unsigned shift of the full lane size always generates zero, so we can
3475 // simply emit a zero vector. A signed shift of the full lane size does the
3476 // same thing as shifting by one bit fewer.
3477 if (Unsigned)
3478 return llvm::Constant::getNullValue(Ty: V->getType());
3479 else
3480 --Shift;
3481 }
3482 return Unsigned ? Builder.CreateLShr(LHS: V, RHS: Shift) : Builder.CreateAShr(LHS: V, RHS: Shift);
3483}
3484
3485static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3486 // MVE-specific helper function for a vector splat, which infers the element
3487 // count of the output vector by knowing that MVE vectors are all 128 bits
3488 // wide.
3489 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3490 return Builder.CreateVectorSplat(NumElts: Elements, V);
3491}
3492
3493static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3494 CodeGenFunction *CGF,
3495 llvm::Value *V,
3496 llvm::Type *DestType) {
3497 // Convert one MVE vector type into another by reinterpreting its in-register
3498 // format.
3499 //
3500 // Little-endian, this is identical to a bitcast (which reinterprets the
3501 // memory format). But big-endian, they're not necessarily the same, because
3502 // the register and memory formats map to each other differently depending on
3503 // the lane size.
3504 //
3505 // We generate a bitcast whenever we can (if we're little-endian, or if the
3506 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3507 // that performs the different kind of reinterpretation.
3508 if (CGF->getTarget().isBigEndian() &&
3509 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3510 return Builder.CreateCall(
3511 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vreinterpretq,
3512 Tys: {DestType, V->getType()}),
3513 Args: V);
3514 } else {
3515 return Builder.CreateBitCast(V, DestTy: DestType);
3516 }
3517}
3518
3519static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3520 // Make a shufflevector that extracts every other element of a vector (evens
3521 // or odds, as desired).
3522 SmallVector<int, 16> Indices;
3523 unsigned InputElements =
3524 cast<llvm::FixedVectorType>(Val: V->getType())->getNumElements();
3525 for (unsigned i = 0; i < InputElements; i += 2)
3526 Indices.push_back(Elt: i + Odd);
3527 return Builder.CreateShuffleVector(V, Mask: Indices);
3528}
3529
3530static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3531 llvm::Value *V1) {
3532 // Make a shufflevector that interleaves two vectors element by element.
3533 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3534 SmallVector<int, 16> Indices;
3535 unsigned InputElements =
3536 cast<llvm::FixedVectorType>(Val: V0->getType())->getNumElements();
3537 for (unsigned i = 0; i < InputElements; i++) {
3538 Indices.push_back(Elt: i);
3539 Indices.push_back(Elt: i + InputElements);
3540 }
3541 return Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: Indices);
3542}
3543
3544template<unsigned HighBit, unsigned OtherBits>
3545static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3546 // MVE-specific helper function to make a vector splat of a constant such as
3547 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3548 llvm::Type *T = cast<llvm::VectorType>(Val: VT)->getElementType();
3549 unsigned LaneBits = T->getPrimitiveSizeInBits();
3550 uint32_t Value = HighBit << (LaneBits - 1);
3551 if (OtherBits)
3552 Value |= (1UL << (LaneBits - 1)) - 1;
3553 llvm::Value *Lane = llvm::ConstantInt::get(Ty: T, V: Value);
3554 return ARMMVEVectorSplat(Builder, V: Lane);
3555}
3556
3557static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3558 llvm::Value *V,
3559 unsigned ReverseWidth) {
3560 // MVE-specific helper function which reverses the elements of a
3561 // vector within every (ReverseWidth)-bit collection of lanes.
3562 SmallVector<int, 16> Indices;
3563 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3564 unsigned Elements = 128 / LaneSize;
3565 unsigned Mask = ReverseWidth / LaneSize - 1;
3566 for (unsigned i = 0; i < Elements; i++)
3567 Indices.push_back(Elt: i ^ Mask);
3568 return Builder.CreateShuffleVector(V, Mask: Indices);
3569}
3570
3571static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
3572 CodeGenFunction *CGF, llvm::Value *V,
3573 llvm::Type *Ty) {
3574 return Builder.CreateCall(
3575 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_fp_int, Tys: {Ty, V->getType()}),
3576 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)});
3577}
3578
3579static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
3580 CodeGenFunction *CGF, llvm::Value *V,
3581 llvm::Type *Ty) {
3582 return Builder.CreateCall(
3583 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_fp_int, Tys: {Ty, V->getType()}),
3584 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 1)});
3585}
3586
3587static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
3588 CodeGenFunction *CGF, llvm::Value *V,
3589 llvm::Type *Ty) {
3590 return Builder.CreateCall(
3591 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_int_fp, Tys: {Ty, V->getType()}),
3592 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)});
3593}
3594
3595static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
3596 CodeGenFunction *CGF, llvm::Value *V,
3597 llvm::Type *Ty) {
3598 return Builder.CreateCall(
3599 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_int_fp, Tys: {Ty, V->getType()}),
3600 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 1)});
3601}
3602
3603Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
3604 const CallExpr *E,
3605 ReturnValueSlot ReturnValue,
3606 llvm::Triple::ArchType Arch) {
3607 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3608 Intrinsic::ID IRIntr;
3609 unsigned NumVectors;
3610
3611 // Code autogenerated by Tablegen will handle all the simple builtins.
3612 switch (BuiltinID) {
3613 #include "clang/Basic/arm_mve_builtin_cg.inc"
3614
3615 // If we didn't match an MVE builtin id at all, go back to the
3616 // main EmitARMBuiltinExpr.
3617 default:
3618 return nullptr;
3619 }
3620
3621 // Anything that breaks from that switch is an MVE builtin that
3622 // needs handwritten code to generate.
3623
3624 switch (CustomCodeGenType) {
3625
3626 case CustomCodeGen::VLD24: {
3627 llvm::SmallVector<Value *, 4> Ops;
3628 llvm::SmallVector<llvm::Type *, 4> Tys;
3629
3630 auto MvecCType = E->getType();
3631 auto MvecLType = ConvertType(T: MvecCType);
3632 assert(MvecLType->isStructTy() &&
3633 "Return type for vld[24]q should be a struct");
3634 assert(MvecLType->getStructNumElements() == 1 &&
3635 "Return-type struct for vld[24]q should have one element");
3636 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3637 assert(MvecLTypeInner->isArrayTy() &&
3638 "Return-type struct for vld[24]q should contain an array");
3639 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3640 "Array member of return-type struct vld[24]q has wrong length");
3641 auto VecLType = MvecLTypeInner->getArrayElementType();
3642
3643 Tys.push_back(Elt: VecLType);
3644
3645 auto Addr = E->getArg(Arg: 0);
3646 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3647 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3648
3649 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3650 Value *LoadResult = Builder.CreateCall(Callee: F, Args: Ops);
3651 Value *MvecOut = PoisonValue::get(T: MvecLType);
3652 for (unsigned i = 0; i < NumVectors; ++i) {
3653 Value *Vec = Builder.CreateExtractValue(Agg: LoadResult, Idxs: i);
3654 MvecOut = Builder.CreateInsertValue(Agg: MvecOut, Val: Vec, Idxs: {0, i});
3655 }
3656
3657 if (ReturnValue.isNull())
3658 return MvecOut;
3659 else
3660 return Builder.CreateStore(Val: MvecOut, Addr: ReturnValue.getAddress());
3661 }
3662
3663 case CustomCodeGen::VST24: {
3664 llvm::SmallVector<Value *, 4> Ops;
3665 llvm::SmallVector<llvm::Type *, 4> Tys;
3666
3667 auto Addr = E->getArg(Arg: 0);
3668 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3669 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3670
3671 auto MvecCType = E->getArg(Arg: 1)->getType();
3672 auto MvecLType = ConvertType(T: MvecCType);
3673 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3674 assert(MvecLType->getStructNumElements() == 1 &&
3675 "Data-type struct for vst2q should have one element");
3676 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3677 assert(MvecLTypeInner->isArrayTy() &&
3678 "Data-type struct for vst2q should contain an array");
3679 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3680 "Array member of return-type struct vld[24]q has wrong length");
3681 auto VecLType = MvecLTypeInner->getArrayElementType();
3682
3683 Tys.push_back(Elt: VecLType);
3684
3685 AggValueSlot MvecSlot = CreateAggTemp(T: MvecCType);
3686 EmitAggExpr(E: E->getArg(Arg: 1), AS: MvecSlot);
3687 auto Mvec = Builder.CreateLoad(Addr: MvecSlot.getAddress());
3688 for (unsigned i = 0; i < NumVectors; i++)
3689 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Mvec, Idxs: {0, i}));
3690
3691 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3692 Value *ToReturn = nullptr;
3693 for (unsigned i = 0; i < NumVectors; i++) {
3694 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: i));
3695 ToReturn = Builder.CreateCall(Callee: F, Args: Ops);
3696 Ops.pop_back();
3697 }
3698 return ToReturn;
3699 }
3700 }
3701 llvm_unreachable("unknown custom codegen type.");
3702}
3703
3704Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
3705 const CallExpr *E,
3706 ReturnValueSlot ReturnValue,
3707 llvm::Triple::ArchType Arch) {
3708 switch (BuiltinID) {
3709 default:
3710 return nullptr;
3711#include "clang/Basic/arm_cde_builtin_cg.inc"
3712 }
3713}
3714
3715static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3716 const CallExpr *E,
3717 SmallVectorImpl<Value *> &Ops,
3718 llvm::Triple::ArchType Arch) {
3719 unsigned int Int = 0;
3720 const char *s = nullptr;
3721
3722 switch (BuiltinID) {
3723 default:
3724 return nullptr;
3725 case NEON::BI__builtin_neon_vtbl1_v:
3726 case NEON::BI__builtin_neon_vqtbl1_v:
3727 case NEON::BI__builtin_neon_vqtbl1q_v:
3728 case NEON::BI__builtin_neon_vtbl2_v:
3729 case NEON::BI__builtin_neon_vqtbl2_v:
3730 case NEON::BI__builtin_neon_vqtbl2q_v:
3731 case NEON::BI__builtin_neon_vtbl3_v:
3732 case NEON::BI__builtin_neon_vqtbl3_v:
3733 case NEON::BI__builtin_neon_vqtbl3q_v:
3734 case NEON::BI__builtin_neon_vtbl4_v:
3735 case NEON::BI__builtin_neon_vqtbl4_v:
3736 case NEON::BI__builtin_neon_vqtbl4q_v:
3737 break;
3738 case NEON::BI__builtin_neon_vtbx1_v:
3739 case NEON::BI__builtin_neon_vqtbx1_v:
3740 case NEON::BI__builtin_neon_vqtbx1q_v:
3741 case NEON::BI__builtin_neon_vtbx2_v:
3742 case NEON::BI__builtin_neon_vqtbx2_v:
3743 case NEON::BI__builtin_neon_vqtbx2q_v:
3744 case NEON::BI__builtin_neon_vtbx3_v:
3745 case NEON::BI__builtin_neon_vqtbx3_v:
3746 case NEON::BI__builtin_neon_vqtbx3q_v:
3747 case NEON::BI__builtin_neon_vtbx4_v:
3748 case NEON::BI__builtin_neon_vqtbx4_v:
3749 case NEON::BI__builtin_neon_vqtbx4q_v:
3750 break;
3751 }
3752
3753 assert(E->getNumArgs() >= 3);
3754
3755 // Get the last argument, which specifies the vector type.
3756 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
3757 std::optional<llvm::APSInt> Result =
3758 Arg->getIntegerConstantExpr(Ctx: CGF.getContext());
3759 if (!Result)
3760 return nullptr;
3761
3762 // Determine the type of this overloaded NEON intrinsic.
3763 NeonTypeFlags Type = Result->getZExtValue();
3764 llvm::FixedVectorType *Ty = GetNeonType(CGF: &CGF, TypeFlags: Type);
3765 if (!Ty)
3766 return nullptr;
3767
3768 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3769
3770 // AArch64 scalar builtins are not overloaded, they do not have an extra
3771 // argument that specifies the vector type, need to handle each case.
3772 switch (BuiltinID) {
3773 case NEON::BI__builtin_neon_vtbl1_v: {
3774 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 1), ExtOp: nullptr, IndexOp: Ops[1],
3775 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3776 }
3777 case NEON::BI__builtin_neon_vtbl2_v: {
3778 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 2), ExtOp: nullptr, IndexOp: Ops[2],
3779 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3780 }
3781 case NEON::BI__builtin_neon_vtbl3_v: {
3782 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 3), ExtOp: nullptr, IndexOp: Ops[3],
3783 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3784 }
3785 case NEON::BI__builtin_neon_vtbl4_v: {
3786 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 4), ExtOp: nullptr, IndexOp: Ops[4],
3787 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3788 }
3789 case NEON::BI__builtin_neon_vtbx1_v: {
3790 Value *TblRes =
3791 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 1), ExtOp: nullptr, IndexOp: Ops[2], ResTy: Ty,
3792 IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3793
3794 llvm::Constant *EightV = ConstantInt::get(Ty, V: 8);
3795 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[2], RHS: EightV);
3796 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3797
3798 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3799 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3800 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3801 }
3802 case NEON::BI__builtin_neon_vtbx2_v: {
3803 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 2), ExtOp: Ops[0], IndexOp: Ops[3],
3804 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx1, Name: "vtbx1");
3805 }
3806 case NEON::BI__builtin_neon_vtbx3_v: {
3807 Value *TblRes =
3808 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 3), ExtOp: nullptr, IndexOp: Ops[4], ResTy: Ty,
3809 IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3810
3811 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, V: 24);
3812 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[4],
3813 RHS: TwentyFourV);
3814 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3815
3816 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3817 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3818 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3819 }
3820 case NEON::BI__builtin_neon_vtbx4_v: {
3821 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 4), ExtOp: Ops[0], IndexOp: Ops[5],
3822 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx2, Name: "vtbx2");
3823 }
3824 case NEON::BI__builtin_neon_vqtbl1_v:
3825 case NEON::BI__builtin_neon_vqtbl1q_v:
3826 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3827 case NEON::BI__builtin_neon_vqtbl2_v:
3828 case NEON::BI__builtin_neon_vqtbl2q_v: {
3829 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3830 case NEON::BI__builtin_neon_vqtbl3_v:
3831 case NEON::BI__builtin_neon_vqtbl3q_v:
3832 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3833 case NEON::BI__builtin_neon_vqtbl4_v:
3834 case NEON::BI__builtin_neon_vqtbl4q_v:
3835 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3836 case NEON::BI__builtin_neon_vqtbx1_v:
3837 case NEON::BI__builtin_neon_vqtbx1q_v:
3838 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3839 case NEON::BI__builtin_neon_vqtbx2_v:
3840 case NEON::BI__builtin_neon_vqtbx2q_v:
3841 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3842 case NEON::BI__builtin_neon_vqtbx3_v:
3843 case NEON::BI__builtin_neon_vqtbx3q_v:
3844 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3845 case NEON::BI__builtin_neon_vqtbx4_v:
3846 case NEON::BI__builtin_neon_vqtbx4q_v:
3847 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3848 }
3849 }
3850
3851 if (!Int)
3852 return nullptr;
3853
3854 Function *F = CGF.CGM.getIntrinsic(IID: Int, Tys: Ty);
3855 return CGF.EmitNeonCall(F, Ops, name: s);
3856}
3857
3858Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
3859 auto *VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
3860 Op = Builder.CreateBitCast(V: Op, DestTy: Int16Ty);
3861 Value *V = PoisonValue::get(T: VTy);
3862 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
3863 Op = Builder.CreateInsertElement(Vec: V, NewElt: Op, Idx: CI);
3864 return Op;
3865}
3866
3867/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3868/// access builtin. Only required if it can't be inferred from the base pointer
3869/// operand.
3870llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
3871 switch (TypeFlags.getMemEltType()) {
3872 case SVETypeFlags::MemEltTyDefault:
3873 return getEltType(TypeFlags);
3874 case SVETypeFlags::MemEltTyInt8:
3875 return Builder.getInt8Ty();
3876 case SVETypeFlags::MemEltTyInt16:
3877 return Builder.getInt16Ty();
3878 case SVETypeFlags::MemEltTyInt32:
3879 return Builder.getInt32Ty();
3880 case SVETypeFlags::MemEltTyInt64:
3881 return Builder.getInt64Ty();
3882 }
3883 llvm_unreachable("Unknown MemEltType");
3884}
3885
3886llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3887 switch (TypeFlags.getEltType()) {
3888 default:
3889 llvm_unreachable("Invalid SVETypeFlag!");
3890
3891 case SVETypeFlags::EltTyMFloat8:
3892 case SVETypeFlags::EltTyInt8:
3893 return Builder.getInt8Ty();
3894 case SVETypeFlags::EltTyInt16:
3895 return Builder.getInt16Ty();
3896 case SVETypeFlags::EltTyInt32:
3897 return Builder.getInt32Ty();
3898 case SVETypeFlags::EltTyInt64:
3899 return Builder.getInt64Ty();
3900 case SVETypeFlags::EltTyInt128:
3901 return Builder.getInt128Ty();
3902
3903 case SVETypeFlags::EltTyFloat16:
3904 return Builder.getHalfTy();
3905 case SVETypeFlags::EltTyFloat32:
3906 return Builder.getFloatTy();
3907 case SVETypeFlags::EltTyFloat64:
3908 return Builder.getDoubleTy();
3909
3910 case SVETypeFlags::EltTyBFloat16:
3911 return Builder.getBFloatTy();
3912
3913 case SVETypeFlags::EltTyBool8:
3914 case SVETypeFlags::EltTyBool16:
3915 case SVETypeFlags::EltTyBool32:
3916 case SVETypeFlags::EltTyBool64:
3917 return Builder.getInt1Ty();
3918 }
3919}
3920
3921// Return the llvm predicate vector type corresponding to the specified element
3922// TypeFlags.
3923llvm::ScalableVectorType *
3924CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
3925 switch (TypeFlags.getEltType()) {
3926 default: llvm_unreachable("Unhandled SVETypeFlag!");
3927
3928 case SVETypeFlags::EltTyInt8:
3929 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3930 case SVETypeFlags::EltTyInt16:
3931 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3932 case SVETypeFlags::EltTyInt32:
3933 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3934 case SVETypeFlags::EltTyInt64:
3935 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3936
3937 case SVETypeFlags::EltTyBFloat16:
3938 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3939 case SVETypeFlags::EltTyFloat16:
3940 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3941 case SVETypeFlags::EltTyFloat32:
3942 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3943 case SVETypeFlags::EltTyFloat64:
3944 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3945
3946 case SVETypeFlags::EltTyBool8:
3947 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3948 case SVETypeFlags::EltTyBool16:
3949 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3950 case SVETypeFlags::EltTyBool32:
3951 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3952 case SVETypeFlags::EltTyBool64:
3953 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3954 }
3955}
3956
3957// Return the llvm vector type corresponding to the specified element TypeFlags.
3958llvm::ScalableVectorType *
3959CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
3960 switch (TypeFlags.getEltType()) {
3961 default:
3962 llvm_unreachable("Invalid SVETypeFlag!");
3963
3964 case SVETypeFlags::EltTyInt8:
3965 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3966 case SVETypeFlags::EltTyInt16:
3967 return llvm::ScalableVectorType::get(ElementType: Builder.getInt16Ty(), MinNumElts: 8);
3968 case SVETypeFlags::EltTyInt32:
3969 return llvm::ScalableVectorType::get(ElementType: Builder.getInt32Ty(), MinNumElts: 4);
3970 case SVETypeFlags::EltTyInt64:
3971 return llvm::ScalableVectorType::get(ElementType: Builder.getInt64Ty(), MinNumElts: 2);
3972
3973 case SVETypeFlags::EltTyMFloat8:
3974 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3975 case SVETypeFlags::EltTyFloat16:
3976 return llvm::ScalableVectorType::get(ElementType: Builder.getHalfTy(), MinNumElts: 8);
3977 case SVETypeFlags::EltTyBFloat16:
3978 return llvm::ScalableVectorType::get(ElementType: Builder.getBFloatTy(), MinNumElts: 8);
3979 case SVETypeFlags::EltTyFloat32:
3980 return llvm::ScalableVectorType::get(ElementType: Builder.getFloatTy(), MinNumElts: 4);
3981 case SVETypeFlags::EltTyFloat64:
3982 return llvm::ScalableVectorType::get(ElementType: Builder.getDoubleTy(), MinNumElts: 2);
3983
3984 case SVETypeFlags::EltTyBool8:
3985 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3986 case SVETypeFlags::EltTyBool16:
3987 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3988 case SVETypeFlags::EltTyBool32:
3989 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3990 case SVETypeFlags::EltTyBool64:
3991 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3992 }
3993}
3994
3995llvm::Value *
3996CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
3997 Function *Ptrue =
3998 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_ptrue, Tys: getSVEPredType(TypeFlags));
3999 return Builder.CreateCall(Callee: Ptrue, Args: {Builder.getInt32(/*SV_ALL*/ C: 31)});
4000}
4001
4002constexpr unsigned SVEBitsPerBlock = 128;
4003
4004static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
4005 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
4006 return llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: NumElts);
4007}
4008
4009// Reinterpret the input predicate so that it can be used to correctly isolate
4010// the elements of the specified datatype.
4011Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
4012 llvm::ScalableVectorType *VTy) {
4013
4014 if (isa<TargetExtType>(Val: Pred->getType()) &&
4015 cast<TargetExtType>(Val: Pred->getType())->getName() == "aarch64.svcount")
4016 return Pred;
4017
4018 auto *RTy = llvm::VectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), Other: VTy);
4019 if (Pred->getType() == RTy)
4020 return Pred;
4021
4022 unsigned IntID;
4023 llvm::Type *IntrinsicTy;
4024 switch (VTy->getMinNumElements()) {
4025 default:
4026 llvm_unreachable("unsupported element count!");
4027 case 1:
4028 case 2:
4029 case 4:
4030 case 8:
4031 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
4032 IntrinsicTy = RTy;
4033 break;
4034 case 16:
4035 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
4036 IntrinsicTy = Pred->getType();
4037 break;
4038 }
4039
4040 Function *F = CGM.getIntrinsic(IID: IntID, Tys: IntrinsicTy);
4041 Value *C = Builder.CreateCall(Callee: F, Args: Pred);
4042 assert(C->getType() == RTy && "Unexpected return type!");
4043 return C;
4044}
4045
4046Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
4047 llvm::StructType *Ty) {
4048 if (PredTuple->getType() == Ty)
4049 return PredTuple;
4050
4051 Value *Ret = llvm::PoisonValue::get(T: Ty);
4052 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
4053 Value *Pred = Builder.CreateExtractValue(Agg: PredTuple, Idxs: I);
4054 Pred = EmitSVEPredicateCast(
4055 Pred, VTy: cast<llvm::ScalableVectorType>(Val: Ty->getTypeAtIndex(N: I)));
4056 Ret = Builder.CreateInsertValue(Agg: Ret, Val: Pred, Idxs: I);
4057 }
4058
4059 return Ret;
4060}
4061
4062Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
4063 SmallVectorImpl<Value *> &Ops,
4064 unsigned IntID) {
4065 auto *ResultTy = getSVEType(TypeFlags);
4066 auto *OverloadedTy =
4067 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: ResultTy);
4068
4069 Function *F = nullptr;
4070 if (Ops[1]->getType()->isVectorTy())
4071 // This is the "vector base, scalar offset" case. In order to uniquely
4072 // map this built-in to an LLVM IR intrinsic, we need both the return type
4073 // and the type of the vector base.
4074 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[1]->getType()});
4075 else
4076 // This is the "scalar base, vector offset case". The type of the offset
4077 // is encoded in the name of the intrinsic. We only need to specify the
4078 // return type in order to uniquely map this built-in to an LLVM IR
4079 // intrinsic.
4080 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4081
4082 // At the ACLE level there's only one predicate type, svbool_t, which is
4083 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4084 // actual type being loaded. For example, when loading doubles (i64) the
4085 // predicate should be <n x 2 x i1> instead. At the IR level the type of
4086 // the predicate and the data being loaded must match. Cast to the type
4087 // expected by the intrinsic. The intrinsic itself should be defined in
4088 // a way than enforces relations between parameter types.
4089 Ops[0] = EmitSVEPredicateCast(
4090 Pred: Ops[0], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 0)->getType()));
4091
4092 // Pass 0 when the offset is missing. This can only be applied when using
4093 // the "vector base" addressing mode for which ACLE allows no offset. The
4094 // corresponding LLVM IR always requires an offset.
4095 if (Ops.size() == 2) {
4096 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4097 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4098 }
4099
4100 // For "vector base, scalar index" scale the index so that it becomes a
4101 // scalar offset.
4102 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
4103 unsigned BytesPerElt =
4104 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4105 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
4106 }
4107
4108 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4109
4110 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
4111 // other cases it's folded into a nop.
4112 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(V: Call, DestTy: ResultTy)
4113 : Builder.CreateSExt(V: Call, DestTy: ResultTy);
4114}
4115
4116Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
4117 SmallVectorImpl<Value *> &Ops,
4118 unsigned IntID) {
4119 auto *SrcDataTy = getSVEType(TypeFlags);
4120 auto *OverloadedTy =
4121 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: SrcDataTy);
4122
4123 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
4124 // it's the first argument. Move it accordingly.
4125 Ops.insert(I: Ops.begin(), Elt: Ops.pop_back_val());
4126
4127 Function *F = nullptr;
4128 if (Ops[2]->getType()->isVectorTy())
4129 // This is the "vector base, scalar offset" case. In order to uniquely
4130 // map this built-in to an LLVM IR intrinsic, we need both the return type
4131 // and the type of the vector base.
4132 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[2]->getType()});
4133 else
4134 // This is the "scalar base, vector offset case". The type of the offset
4135 // is encoded in the name of the intrinsic. We only need to specify the
4136 // return type in order to uniquely map this built-in to an LLVM IR
4137 // intrinsic.
4138 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4139
4140 // Pass 0 when the offset is missing. This can only be applied when using
4141 // the "vector base" addressing mode for which ACLE allows no offset. The
4142 // corresponding LLVM IR always requires an offset.
4143 if (Ops.size() == 3) {
4144 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4145 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4146 }
4147
4148 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4149 // folded into a nop.
4150 Ops[0] = Builder.CreateTrunc(V: Ops[0], DestTy: OverloadedTy);
4151
4152 // At the ACLE level there's only one predicate type, svbool_t, which is
4153 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4154 // actual type being stored. For example, when storing doubles (i64) the
4155 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4156 // the predicate and the data being stored must match. Cast to the type
4157 // expected by the intrinsic. The intrinsic itself should be defined in
4158 // a way that enforces relations between parameter types.
4159 Ops[1] = EmitSVEPredicateCast(
4160 Pred: Ops[1], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 1)->getType()));
4161
4162 // For "vector base, scalar index" scale the index so that it becomes a
4163 // scalar offset.
4164 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4165 unsigned BytesPerElt =
4166 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4167 Ops[3] = Builder.CreateShl(LHS: Ops[3], RHS: Log2_32(Value: BytesPerElt));
4168 }
4169
4170 return Builder.CreateCall(Callee: F, Args: Ops);
4171}
4172
4173Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
4174 SmallVectorImpl<Value *> &Ops,
4175 unsigned IntID) {
4176 // The gather prefetches are overloaded on the vector input - this can either
4177 // be the vector of base addresses or vector of offsets.
4178 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[1]->getType());
4179 if (!OverloadedTy)
4180 OverloadedTy = cast<llvm::ScalableVectorType>(Val: Ops[2]->getType());
4181
4182 // Cast the predicate from svbool_t to the right number of elements.
4183 Ops[0] = EmitSVEPredicateCast(Pred: Ops[0], VTy: OverloadedTy);
4184
4185 // vector + imm addressing modes
4186 if (Ops[1]->getType()->isVectorTy()) {
4187 if (Ops.size() == 3) {
4188 // Pass 0 for 'vector+imm' when the index is omitted.
4189 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4190
4191 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4192 std::swap(a&: Ops[2], b&: Ops[3]);
4193 } else {
4194 // Index needs to be passed as scaled offset.
4195 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4196 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4197 if (BytesPerElt > 1)
4198 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
4199 }
4200 }
4201
4202 Function *F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4203 return Builder.CreateCall(Callee: F, Args: Ops);
4204}
4205
4206Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
4207 SmallVectorImpl<Value*> &Ops,
4208 unsigned IntID) {
4209 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4210 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4211 Value *BasePtr = Ops[1];
4212
4213 // Does the load have an offset?
4214 if (Ops.size() > 2)
4215 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4216
4217 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy});
4218 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
4219}
4220
4221Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
4222 SmallVectorImpl<Value*> &Ops,
4223 unsigned IntID) {
4224 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4225
4226 unsigned N;
4227 switch (IntID) {
4228 case Intrinsic::aarch64_sve_st2:
4229 case Intrinsic::aarch64_sve_st1_pn_x2:
4230 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4231 case Intrinsic::aarch64_sve_st2q:
4232 N = 2;
4233 break;
4234 case Intrinsic::aarch64_sve_st3:
4235 case Intrinsic::aarch64_sve_st3q:
4236 N = 3;
4237 break;
4238 case Intrinsic::aarch64_sve_st4:
4239 case Intrinsic::aarch64_sve_st1_pn_x4:
4240 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4241 case Intrinsic::aarch64_sve_st4q:
4242 N = 4;
4243 break;
4244 default:
4245 llvm_unreachable("unknown intrinsic!");
4246 }
4247
4248 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4249 Value *BasePtr = Ops[1];
4250
4251 // Does the store have an offset?
4252 if (Ops.size() > (2 + N))
4253 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4254
4255 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4256 // need to break up the tuple vector.
4257 SmallVector<llvm::Value*, 5> Operands;
4258 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4259 Operands.push_back(Elt: Ops[I]);
4260 Operands.append(IL: {Predicate, BasePtr});
4261 Function *F = CGM.getIntrinsic(IID: IntID, Tys: { VTy });
4262
4263 return Builder.CreateCall(Callee: F, Args: Operands);
4264}
4265
4266// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4267// svpmullt_pair intrinsics, with the exception that their results are bitcast
4268// to a wider type.
4269Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
4270 SmallVectorImpl<Value *> &Ops,
4271 unsigned BuiltinID) {
4272 // Splat scalar operand to vector (intrinsics with _n infix)
4273 if (TypeFlags.hasSplatOperand()) {
4274 unsigned OpNo = TypeFlags.getSplatOperand();
4275 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4276 }
4277
4278 // The pair-wise function has a narrower overloaded type.
4279 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Ops[0]->getType());
4280 Value *Call = Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
4281
4282 // Now bitcast to the wider result type.
4283 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4284 return EmitSVEReinterpret(Val: Call, Ty);
4285}
4286
4287Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
4288 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4289 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4290 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: OverloadedTy);
4291 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt32(C: 0)});
4292}
4293
4294Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
4295 SmallVectorImpl<Value *> &Ops,
4296 unsigned BuiltinID) {
4297 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4298 auto *VectorTy = getSVEVectorForElementType(EltTy: MemEltTy);
4299 auto *MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4300
4301 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: MemoryTy);
4302 Value *BasePtr = Ops[1];
4303
4304 // Implement the index operand if not omitted.
4305 if (Ops.size() > 3)
4306 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4307
4308 Value *PrfOp = Ops.back();
4309
4310 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Predicate->getType());
4311 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr, PrfOp});
4312}
4313
4314Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
4315 llvm::Type *ReturnTy,
4316 SmallVectorImpl<Value *> &Ops,
4317 unsigned IntrinsicID,
4318 bool IsZExtReturn) {
4319 QualType LangPTy = E->getArg(Arg: 1)->getType();
4320 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4321 T: LangPTy->castAs<PointerType>()->getPointeeType());
4322
4323 // Mfloat8 types is stored as a vector, so extra work
4324 // to extract sclar element type is necessary.
4325 if (MemEltTy->isVectorTy()) {
4326 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4327 "Only <1 x i8> expected");
4328 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4329 }
4330
4331 // The vector type that is returned may be different from the
4332 // eventual type loaded from memory.
4333 auto VectorTy = cast<llvm::ScalableVectorType>(Val: ReturnTy);
4334 llvm::ScalableVectorType *MemoryTy = nullptr;
4335 llvm::ScalableVectorType *PredTy = nullptr;
4336 bool IsQuadLoad = false;
4337 switch (IntrinsicID) {
4338 case Intrinsic::aarch64_sve_ld1uwq:
4339 case Intrinsic::aarch64_sve_ld1udq:
4340 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4341 PredTy = llvm::ScalableVectorType::get(
4342 ElementType: llvm::Type::getInt1Ty(C&: getLLVMContext()), MinNumElts: 1);
4343 IsQuadLoad = true;
4344 break;
4345 default:
4346 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4347 PredTy = MemoryTy;
4348 break;
4349 }
4350
4351 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4352 Value *BasePtr = Ops[1];
4353
4354 // Does the load have an offset?
4355 if (Ops.size() > 2)
4356 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4357
4358 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadLoad ? VectorTy : MemoryTy);
4359 auto *Load =
4360 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr}));
4361 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4362 CGM.DecorateInstructionWithTBAA(Inst: Load, TBAAInfo);
4363
4364 if (IsQuadLoad)
4365 return Load;
4366
4367 return IsZExtReturn ? Builder.CreateZExt(V: Load, DestTy: VectorTy)
4368 : Builder.CreateSExt(V: Load, DestTy: VectorTy);
4369}
4370
4371Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
4372 SmallVectorImpl<Value *> &Ops,
4373 unsigned IntrinsicID) {
4374 QualType LangPTy = E->getArg(Arg: 1)->getType();
4375 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4376 T: LangPTy->castAs<PointerType>()->getPointeeType());
4377
4378 // Mfloat8 types is stored as a vector, so extra work
4379 // to extract sclar element type is necessary.
4380 if (MemEltTy->isVectorTy()) {
4381 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4382 "Only <1 x i8> expected");
4383 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4384 }
4385
4386 // The vector type that is stored may be different from the
4387 // eventual type stored to memory.
4388 auto VectorTy = cast<llvm::ScalableVectorType>(Val: Ops.back()->getType());
4389 auto MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4390
4391 auto PredTy = MemoryTy;
4392 auto AddrMemoryTy = MemoryTy;
4393 bool IsQuadStore = false;
4394
4395 switch (IntrinsicID) {
4396 case Intrinsic::aarch64_sve_st1wq:
4397 case Intrinsic::aarch64_sve_st1dq:
4398 AddrMemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4399 PredTy =
4400 llvm::ScalableVectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), MinNumElts: 1);
4401 IsQuadStore = true;
4402 break;
4403 default:
4404 break;
4405 }
4406 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4407 Value *BasePtr = Ops[1];
4408
4409 // Does the store have an offset?
4410 if (Ops.size() == 4)
4411 BasePtr = Builder.CreateGEP(Ty: AddrMemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4412
4413 // Last value is always the data
4414 Value *Val =
4415 IsQuadStore ? Ops.back() : Builder.CreateTrunc(V: Ops.back(), DestTy: MemoryTy);
4416
4417 Function *F =
4418 CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadStore ? VectorTy : MemoryTy);
4419 auto *Store =
4420 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Val, Predicate, BasePtr}));
4421 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4422 CGM.DecorateInstructionWithTBAA(Inst: Store, TBAAInfo);
4423 return Store;
4424}
4425
4426Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
4427 SmallVectorImpl<Value *> &Ops,
4428 unsigned IntID) {
4429 Ops[2] = EmitSVEPredicateCast(
4430 Pred: Ops[2], VTy: getSVEVectorForElementType(EltTy: SVEBuiltinMemEltTy(TypeFlags)));
4431
4432 SmallVector<Value *> NewOps;
4433 NewOps.push_back(Elt: Ops[2]);
4434
4435 llvm::Value *BasePtr = Ops[3];
4436 llvm::Value *RealSlice = Ops[1];
4437 // If the intrinsic contains the vnum parameter, multiply it with the vector
4438 // size in bytes.
4439 if (Ops.size() == 5) {
4440 Function *StreamingVectorLength =
4441 CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsd);
4442 llvm::Value *StreamingVectorLengthCall =
4443 Builder.CreateMul(LHS: Builder.CreateCall(Callee: StreamingVectorLength),
4444 RHS: llvm::ConstantInt::get(Ty: Int64Ty, V: 8), Name: "svl",
4445 /* HasNUW */ true, /* HasNSW */ true);
4446 llvm::Value *Mulvl =
4447 Builder.CreateMul(LHS: StreamingVectorLengthCall, RHS: Ops[4], Name: "mulvl");
4448 // The type of the ptr parameter is void *, so use Int8Ty here.
4449 BasePtr = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[3], IdxList: Mulvl);
4450 RealSlice = Builder.CreateZExt(V: RealSlice, DestTy: Int64Ty);
4451 RealSlice = Builder.CreateAdd(LHS: RealSlice, RHS: Ops[4]);
4452 RealSlice = Builder.CreateTrunc(V: RealSlice, DestTy: Int32Ty);
4453 }
4454 NewOps.push_back(Elt: BasePtr);
4455 NewOps.push_back(Elt: Ops[0]);
4456 NewOps.push_back(Elt: RealSlice);
4457 Function *F = CGM.getIntrinsic(IID: IntID);
4458 return Builder.CreateCall(Callee: F, Args: NewOps);
4459}
4460
4461Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
4462 SmallVectorImpl<Value *> &Ops,
4463 unsigned IntID) {
4464 auto *VecTy = getSVEType(TypeFlags);
4465 Function *F = CGM.getIntrinsic(IID: IntID, Tys: VecTy);
4466 if (TypeFlags.isReadZA())
4467 Ops[1] = EmitSVEPredicateCast(Pred: Ops[1], VTy: VecTy);
4468 else if (TypeFlags.isWriteZA())
4469 Ops[2] = EmitSVEPredicateCast(Pred: Ops[2], VTy: VecTy);
4470 return Builder.CreateCall(Callee: F, Args: Ops);
4471}
4472
4473Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
4474 SmallVectorImpl<Value *> &Ops,
4475 unsigned IntID) {
4476 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4477 if (Ops.size() == 0)
4478 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: 255));
4479 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4480 return Builder.CreateCall(Callee: F, Args: Ops);
4481}
4482
4483Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
4484 SmallVectorImpl<Value *> &Ops,
4485 unsigned IntID) {
4486 if (Ops.size() == 2)
4487 Ops.push_back(Elt: Builder.getInt32(C: 0));
4488 else
4489 Ops[2] = Builder.CreateIntCast(V: Ops[2], DestTy: Int32Ty, isSigned: true);
4490 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4491 return Builder.CreateCall(Callee: F, Args: Ops);
4492}
4493
4494// Limit the usage of scalable llvm IR generated by the ACLE by using the
4495// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4496Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4497 return Builder.CreateVectorSplat(
4498 EC: cast<llvm::VectorType>(Val: Ty)->getElementCount(), V: Scalar);
4499}
4500
4501Value *CodeGenFunction::EmitSVEDupX(Value *Scalar) {
4502 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4503#ifndef NDEBUG
4504 auto *VecTy = cast<llvm::VectorType>(Ty);
4505 ElementCount EC = VecTy->getElementCount();
4506 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4507 "Only <1 x i8> expected");
4508#endif
4509 Scalar = Builder.CreateExtractElement(Vec: Scalar, Idx: uint64_t(0));
4510 }
4511 return EmitSVEDupX(Scalar, Ty: getSVEVectorForElementType(EltTy: Scalar->getType()));
4512}
4513
4514Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
4515 // FIXME: For big endian this needs an additional REV, or needs a separate
4516 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4517 // instruction is defined as 'bitwise' equivalent from memory point of
4518 // view (when storing/reloading), whereas the svreinterpret builtin
4519 // implements bitwise equivalent cast from register point of view.
4520 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4521
4522 if (auto *StructTy = dyn_cast<StructType>(Val: Ty)) {
4523 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4524
4525 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4526 Value *In = Builder.CreateExtractValue(Agg: Val, Idxs: I);
4527 Value *Out = Builder.CreateBitCast(V: In, DestTy: StructTy->getTypeAtIndex(N: I));
4528 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Out, Idxs: I);
4529 }
4530
4531 return Tuple;
4532 }
4533
4534 return Builder.CreateBitCast(V: Val, DestTy: Ty);
4535}
4536
4537static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4538 SmallVectorImpl<Value *> &Ops) {
4539 auto *SplatZero = Constant::getNullValue(Ty);
4540 Ops.insert(I: Ops.begin(), Elt: SplatZero);
4541}
4542
4543static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4544 SmallVectorImpl<Value *> &Ops) {
4545 auto *SplatUndef = UndefValue::get(T: Ty);
4546 Ops.insert(I: Ops.begin(), Elt: SplatUndef);
4547}
4548
4549SmallVector<llvm::Type *, 2>
4550CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
4551 llvm::Type *ResultType,
4552 ArrayRef<Value *> Ops) {
4553 if (TypeFlags.isOverloadNone())
4554 return {};
4555
4556 llvm::Type *DefaultType = getSVEType(TypeFlags);
4557
4558 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4559 return {DefaultType, Ops[1]->getType()};
4560
4561 if (TypeFlags.isOverloadWhileRW())
4562 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4563
4564 if (TypeFlags.isOverloadFirstandLast())
4565 return {Ops[0]->getType(), Ops.back()->getType()};
4566
4567 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4568 ResultType->isVectorTy())
4569 return {ResultType, Ops[1]->getType()};
4570
4571 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4572 return {DefaultType};
4573}
4574
4575Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
4576 ArrayRef<Value *> Ops) {
4577 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4578 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4579 unsigned Idx = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
4580
4581 if (TypeFlags.isTupleSet())
4582 return Builder.CreateInsertValue(Agg: Ops[0], Val: Ops[2], Idxs: Idx);
4583 return Builder.CreateExtractValue(Agg: Ops[0], Idxs: Idx);
4584}
4585
4586Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
4587 llvm::Type *Ty,
4588 ArrayRef<Value *> Ops) {
4589 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4590
4591 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4592 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4593 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Ops[Idx], Idxs: Idx);
4594
4595 return Tuple;
4596}
4597
4598void CodeGenFunction::GetAArch64SVEProcessedOperands(
4599 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4600 SVETypeFlags TypeFlags) {
4601 // Find out if any arguments are required to be integer constant expressions.
4602 unsigned ICEArguments = 0;
4603 ASTContext::GetBuiltinTypeError Error;
4604 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
4605 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4606
4607 // Tuple set/get only requires one insert/extract vector, which is
4608 // created by EmitSVETupleSetOrGet.
4609 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4610
4611 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4612 bool IsICE = ICEArguments & (1 << i);
4613 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: i));
4614
4615 if (IsICE) {
4616 // If this is required to be a constant, constant fold it so that we know
4617 // that the generated intrinsic gets a ConstantInt.
4618 std::optional<llvm::APSInt> Result =
4619 E->getArg(Arg: i)->getIntegerConstantExpr(Ctx: getContext());
4620 assert(Result && "Expected argument to be a constant");
4621
4622 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4623 // truncate because the immediate has been range checked and no valid
4624 // immediate requires more than a handful of bits.
4625 *Result = Result->extOrTrunc(width: 32);
4626 Ops.push_back(Elt: llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result));
4627 continue;
4628 }
4629
4630 if (isa<StructType>(Val: Arg->getType()) && !IsTupleGetOrSet) {
4631 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4632 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Arg, Idxs: I));
4633
4634 continue;
4635 }
4636
4637 Ops.push_back(Elt: Arg);
4638 }
4639}
4640
4641Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
4642 const CallExpr *E) {
4643 llvm::Type *Ty = ConvertType(T: E->getType());
4644 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4645 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4646 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
4647 return EmitSVEReinterpret(Val, Ty);
4648 }
4649
4650 auto *Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SVEIntrinsicMap, BuiltinID,
4651 MapProvenSorted&: AArch64SVEIntrinsicsProvenSorted);
4652
4653 llvm::SmallVector<Value *, 4> Ops;
4654 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4655 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4656
4657 if (TypeFlags.isLoad())
4658 return EmitSVEMaskedLoad(E, ReturnTy: Ty, Ops, IntrinsicID: Builtin->LLVMIntrinsic,
4659 IsZExtReturn: TypeFlags.isZExtReturn());
4660 if (TypeFlags.isStore())
4661 return EmitSVEMaskedStore(E, Ops, IntrinsicID: Builtin->LLVMIntrinsic);
4662 if (TypeFlags.isGatherLoad())
4663 return EmitSVEGatherLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4664 if (TypeFlags.isScatterStore())
4665 return EmitSVEScatterStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4666 if (TypeFlags.isPrefetch())
4667 return EmitSVEPrefetchLoad(TypeFlags, Ops, BuiltinID: Builtin->LLVMIntrinsic);
4668 if (TypeFlags.isGatherPrefetch())
4669 return EmitSVEGatherPrefetch(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4670 if (TypeFlags.isStructLoad())
4671 return EmitSVEStructLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4672 if (TypeFlags.isStructStore())
4673 return EmitSVEStructStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4674 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4675 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4676 if (TypeFlags.isTupleCreate())
4677 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4678 if (TypeFlags.isUndef())
4679 return UndefValue::get(T: Ty);
4680
4681 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4682 // -------------------------------------------------------------------
4683 if (Builtin->LLVMIntrinsic != 0) {
4684 // Emit set FPMR for intrinsics that require it
4685 if (TypeFlags.setsFPMR())
4686 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
4687 Args: Ops.pop_back_val());
4688 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4689 InsertExplicitZeroOperand(Builder, Ty, Ops);
4690
4691 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4692 InsertExplicitUndefOperand(Builder, Ty, Ops);
4693
4694 // Some ACLE builtins leave out the argument to specify the predicate
4695 // pattern, which is expected to be expanded to an SV_ALL pattern.
4696 if (TypeFlags.isAppendSVALL())
4697 Ops.push_back(Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4698 if (TypeFlags.isInsertOp1SVALL())
4699 Ops.insert(I: &Ops[1], Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4700
4701 // Predicates must match the main datatype.
4702 for (Value *&Op : Ops)
4703 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
4704 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
4705 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
4706
4707 // Splat scalar operand to vector (intrinsics with _n infix)
4708 if (TypeFlags.hasSplatOperand()) {
4709 unsigned OpNo = TypeFlags.getSplatOperand();
4710 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4711 }
4712
4713 if (TypeFlags.isReverseCompare())
4714 std::swap(a&: Ops[1], b&: Ops[2]);
4715 else if (TypeFlags.isReverseUSDOT())
4716 std::swap(a&: Ops[1], b&: Ops[2]);
4717 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4718 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4719 std::swap(a&: Ops[1], b&: Ops[2]);
4720 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4721 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4722 std::swap(a&: Ops[1], b&: Ops[3]);
4723
4724 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4725 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4726 llvm::Type *OpndTy = Ops[1]->getType();
4727 auto *SplatZero = Constant::getNullValue(Ty: OpndTy);
4728 Ops[1] = Builder.CreateSelect(C: Ops[0], True: Ops[1], False: SplatZero);
4729 }
4730
4731 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic,
4732 Tys: getSVEOverloadTypes(TypeFlags, ResultType: Ty, Ops));
4733 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4734
4735 if (Call->getType() == Ty)
4736 return Call;
4737
4738 // Predicate results must be converted to svbool_t.
4739 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Val: Ty))
4740 return EmitSVEPredicateCast(Pred: Call, VTy: PredTy);
4741 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Val: Ty))
4742 return EmitSVEPredicateTupleCast(PredTuple: Call, Ty: PredTupleTy);
4743
4744 llvm_unreachable("unsupported element count!");
4745 }
4746
4747 switch (BuiltinID) {
4748 default:
4749 return nullptr;
4750
4751 case SVE::BI__builtin_sve_svreinterpret_b: {
4752 auto SVCountTy =
4753 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4754 Function *CastFromSVCountF =
4755 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4756 return Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]);
4757 }
4758 case SVE::BI__builtin_sve_svreinterpret_c: {
4759 auto SVCountTy =
4760 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4761 Function *CastToSVCountF =
4762 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4763 return Builder.CreateCall(Callee: CastToSVCountF, Args: Ops[0]);
4764 }
4765
4766 case SVE::BI__builtin_sve_svpsel_lane_b8:
4767 case SVE::BI__builtin_sve_svpsel_lane_b16:
4768 case SVE::BI__builtin_sve_svpsel_lane_b32:
4769 case SVE::BI__builtin_sve_svpsel_lane_b64:
4770 case SVE::BI__builtin_sve_svpsel_lane_c8:
4771 case SVE::BI__builtin_sve_svpsel_lane_c16:
4772 case SVE::BI__builtin_sve_svpsel_lane_c32:
4773 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4774 bool IsSVCount = isa<TargetExtType>(Val: Ops[0]->getType());
4775 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4776 "aarch64.svcount")) &&
4777 "Unexpected TargetExtType");
4778 auto SVCountTy =
4779 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4780 Function *CastFromSVCountF =
4781 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4782 Function *CastToSVCountF =
4783 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4784
4785 auto OverloadedTy = getSVEType(TypeFlags: SVETypeFlags(Builtin->TypeModifier));
4786 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_psel, Tys: OverloadedTy);
4787 llvm::Value *Ops0 =
4788 IsSVCount ? Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]) : Ops[0];
4789 llvm::Value *Ops1 = EmitSVEPredicateCast(Pred: Ops[1], VTy: OverloadedTy);
4790 llvm::Value *PSel = Builder.CreateCall(Callee: F, Args: {Ops0, Ops1, Ops[2]});
4791 return IsSVCount ? Builder.CreateCall(Callee: CastToSVCountF, Args: PSel) : PSel;
4792 }
4793 case SVE::BI__builtin_sve_svmov_b_z: {
4794 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4795 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4796 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4797 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_and_z, Tys: OverloadedTy);
4798 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[1]});
4799 }
4800
4801 case SVE::BI__builtin_sve_svnot_b_z: {
4802 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4803 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4804 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4805 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_eor_z, Tys: OverloadedTy);
4806 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[0]});
4807 }
4808
4809 case SVE::BI__builtin_sve_svmovlb_u16:
4810 case SVE::BI__builtin_sve_svmovlb_u32:
4811 case SVE::BI__builtin_sve_svmovlb_u64:
4812 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllb);
4813
4814 case SVE::BI__builtin_sve_svmovlb_s16:
4815 case SVE::BI__builtin_sve_svmovlb_s32:
4816 case SVE::BI__builtin_sve_svmovlb_s64:
4817 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllb);
4818
4819 case SVE::BI__builtin_sve_svmovlt_u16:
4820 case SVE::BI__builtin_sve_svmovlt_u32:
4821 case SVE::BI__builtin_sve_svmovlt_u64:
4822 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllt);
4823
4824 case SVE::BI__builtin_sve_svmovlt_s16:
4825 case SVE::BI__builtin_sve_svmovlt_s32:
4826 case SVE::BI__builtin_sve_svmovlt_s64:
4827 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllt);
4828
4829 case SVE::BI__builtin_sve_svpmullt_u16:
4830 case SVE::BI__builtin_sve_svpmullt_u64:
4831 case SVE::BI__builtin_sve_svpmullt_n_u16:
4832 case SVE::BI__builtin_sve_svpmullt_n_u64:
4833 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullt_pair);
4834
4835 case SVE::BI__builtin_sve_svpmullb_u16:
4836 case SVE::BI__builtin_sve_svpmullb_u64:
4837 case SVE::BI__builtin_sve_svpmullb_n_u16:
4838 case SVE::BI__builtin_sve_svpmullb_n_u64:
4839 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullb_pair);
4840
4841 case SVE::BI__builtin_sve_svdup_n_b8:
4842 case SVE::BI__builtin_sve_svdup_n_b16:
4843 case SVE::BI__builtin_sve_svdup_n_b32:
4844 case SVE::BI__builtin_sve_svdup_n_b64: {
4845 Value *CmpNE =
4846 Builder.CreateICmpNE(LHS: Ops[0], RHS: Constant::getNullValue(Ty: Ops[0]->getType()));
4847 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4848 Value *Dup = EmitSVEDupX(Scalar: CmpNE, Ty: OverloadedTy);
4849 return EmitSVEPredicateCast(Pred: Dup, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4850 }
4851
4852 case SVE::BI__builtin_sve_svdupq_n_b8:
4853 case SVE::BI__builtin_sve_svdupq_n_b16:
4854 case SVE::BI__builtin_sve_svdupq_n_b32:
4855 case SVE::BI__builtin_sve_svdupq_n_b64:
4856 case SVE::BI__builtin_sve_svdupq_n_u8:
4857 case SVE::BI__builtin_sve_svdupq_n_s8:
4858 case SVE::BI__builtin_sve_svdupq_n_u64:
4859 case SVE::BI__builtin_sve_svdupq_n_f64:
4860 case SVE::BI__builtin_sve_svdupq_n_s64:
4861 case SVE::BI__builtin_sve_svdupq_n_u16:
4862 case SVE::BI__builtin_sve_svdupq_n_f16:
4863 case SVE::BI__builtin_sve_svdupq_n_bf16:
4864 case SVE::BI__builtin_sve_svdupq_n_s16:
4865 case SVE::BI__builtin_sve_svdupq_n_u32:
4866 case SVE::BI__builtin_sve_svdupq_n_f32:
4867 case SVE::BI__builtin_sve_svdupq_n_s32: {
4868 // These builtins are implemented by storing each element to an array and using
4869 // ld1rq to materialize a vector.
4870 unsigned NumOpnds = Ops.size();
4871
4872 bool IsBoolTy =
4873 cast<llvm::VectorType>(Val: Ty)->getElementType()->isIntegerTy(Bitwidth: 1);
4874
4875 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4876 // so that the compare can use the width that is natural for the expected
4877 // number of predicate lanes.
4878 llvm::Type *EltTy = Ops[0]->getType();
4879 if (IsBoolTy)
4880 EltTy = IntegerType::get(C&: getLLVMContext(), NumBits: SVEBitsPerBlock / NumOpnds);
4881
4882 SmallVector<llvm::Value *, 16> VecOps;
4883 for (unsigned I = 0; I < NumOpnds; ++I)
4884 VecOps.push_back(Elt: Builder.CreateZExt(V: Ops[I], DestTy: EltTy));
4885 Value *Vec = BuildVector(Ops: VecOps);
4886
4887 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4888 Value *InsertSubVec = Builder.CreateInsertVector(
4889 DstType: OverloadedTy, SrcVec: PoisonValue::get(T: OverloadedTy), SubVec: Vec, Idx: uint64_t(0));
4890
4891 Function *F =
4892 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_dupq_lane, Tys: OverloadedTy);
4893 Value *DupQLane =
4894 Builder.CreateCall(Callee: F, Args: {InsertSubVec, Builder.getInt64(C: 0)});
4895
4896 if (!IsBoolTy)
4897 return DupQLane;
4898
4899 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4900 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4901
4902 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4903 F = CGM.getIntrinsic(IID: NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4904 : Intrinsic::aarch64_sve_cmpne_wide,
4905 Tys: OverloadedTy);
4906 Value *Call = Builder.CreateCall(
4907 Callee: F, Args: {Pred, DupQLane, EmitSVEDupX(Scalar: Builder.getInt64(C: 0))});
4908 return EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4909 }
4910
4911 case SVE::BI__builtin_sve_svpfalse_b:
4912 return ConstantInt::getFalse(Ty);
4913
4914 case SVE::BI__builtin_sve_svpfalse_c: {
4915 auto SVBoolTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
4916 Function *CastToSVCountF =
4917 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: Ty);
4918 return Builder.CreateCall(Callee: CastToSVCountF, Args: ConstantInt::getFalse(Ty: SVBoolTy));
4919 }
4920
4921 case SVE::BI__builtin_sve_svlen_bf16:
4922 case SVE::BI__builtin_sve_svlen_f16:
4923 case SVE::BI__builtin_sve_svlen_f32:
4924 case SVE::BI__builtin_sve_svlen_f64:
4925 case SVE::BI__builtin_sve_svlen_s8:
4926 case SVE::BI__builtin_sve_svlen_s16:
4927 case SVE::BI__builtin_sve_svlen_s32:
4928 case SVE::BI__builtin_sve_svlen_s64:
4929 case SVE::BI__builtin_sve_svlen_u8:
4930 case SVE::BI__builtin_sve_svlen_u16:
4931 case SVE::BI__builtin_sve_svlen_u32:
4932 case SVE::BI__builtin_sve_svlen_u64: {
4933 SVETypeFlags TF(Builtin->TypeModifier);
4934 return Builder.CreateElementCount(Ty, EC: getSVEType(TypeFlags: TF)->getElementCount());
4935 }
4936
4937 case SVE::BI__builtin_sve_svtbl2_u8:
4938 case SVE::BI__builtin_sve_svtbl2_s8:
4939 case SVE::BI__builtin_sve_svtbl2_u16:
4940 case SVE::BI__builtin_sve_svtbl2_s16:
4941 case SVE::BI__builtin_sve_svtbl2_u32:
4942 case SVE::BI__builtin_sve_svtbl2_s32:
4943 case SVE::BI__builtin_sve_svtbl2_u64:
4944 case SVE::BI__builtin_sve_svtbl2_s64:
4945 case SVE::BI__builtin_sve_svtbl2_f16:
4946 case SVE::BI__builtin_sve_svtbl2_bf16:
4947 case SVE::BI__builtin_sve_svtbl2_f32:
4948 case SVE::BI__builtin_sve_svtbl2_f64: {
4949 SVETypeFlags TF(Builtin->TypeModifier);
4950 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_tbl2, Tys: getSVEType(TypeFlags: TF));
4951 return Builder.CreateCall(Callee: F, Args: Ops);
4952 }
4953
4954 case SVE::BI__builtin_sve_svset_neonq_s8:
4955 case SVE::BI__builtin_sve_svset_neonq_s16:
4956 case SVE::BI__builtin_sve_svset_neonq_s32:
4957 case SVE::BI__builtin_sve_svset_neonq_s64:
4958 case SVE::BI__builtin_sve_svset_neonq_u8:
4959 case SVE::BI__builtin_sve_svset_neonq_u16:
4960 case SVE::BI__builtin_sve_svset_neonq_u32:
4961 case SVE::BI__builtin_sve_svset_neonq_u64:
4962 case SVE::BI__builtin_sve_svset_neonq_f16:
4963 case SVE::BI__builtin_sve_svset_neonq_f32:
4964 case SVE::BI__builtin_sve_svset_neonq_f64:
4965 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4966 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[1], Idx: uint64_t(0));
4967 }
4968
4969 case SVE::BI__builtin_sve_svget_neonq_s8:
4970 case SVE::BI__builtin_sve_svget_neonq_s16:
4971 case SVE::BI__builtin_sve_svget_neonq_s32:
4972 case SVE::BI__builtin_sve_svget_neonq_s64:
4973 case SVE::BI__builtin_sve_svget_neonq_u8:
4974 case SVE::BI__builtin_sve_svget_neonq_u16:
4975 case SVE::BI__builtin_sve_svget_neonq_u32:
4976 case SVE::BI__builtin_sve_svget_neonq_u64:
4977 case SVE::BI__builtin_sve_svget_neonq_f16:
4978 case SVE::BI__builtin_sve_svget_neonq_f32:
4979 case SVE::BI__builtin_sve_svget_neonq_f64:
4980 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4981 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx: uint64_t(0));
4982 }
4983
4984 case SVE::BI__builtin_sve_svdup_neonq_s8:
4985 case SVE::BI__builtin_sve_svdup_neonq_s16:
4986 case SVE::BI__builtin_sve_svdup_neonq_s32:
4987 case SVE::BI__builtin_sve_svdup_neonq_s64:
4988 case SVE::BI__builtin_sve_svdup_neonq_u8:
4989 case SVE::BI__builtin_sve_svdup_neonq_u16:
4990 case SVE::BI__builtin_sve_svdup_neonq_u32:
4991 case SVE::BI__builtin_sve_svdup_neonq_u64:
4992 case SVE::BI__builtin_sve_svdup_neonq_f16:
4993 case SVE::BI__builtin_sve_svdup_neonq_f32:
4994 case SVE::BI__builtin_sve_svdup_neonq_f64:
4995 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4996 Value *Insert = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
4997 Idx: uint64_t(0));
4998 return Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_dupq_lane, Types: {Ty},
4999 Args: {Insert, Builder.getInt64(C: 0)});
5000 }
5001 }
5002
5003 /// Should not happen
5004 return nullptr;
5005}
5006
5007static void swapCommutativeSMEOperands(unsigned BuiltinID,
5008 SmallVectorImpl<Value *> &Ops) {
5009 unsigned MultiVec;
5010 switch (BuiltinID) {
5011 default:
5012 return;
5013 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
5014 MultiVec = 1;
5015 break;
5016 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
5017 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
5018 MultiVec = 2;
5019 break;
5020 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
5021 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
5022 MultiVec = 4;
5023 break;
5024 }
5025
5026 if (MultiVec > 0)
5027 for (unsigned I = 0; I < MultiVec; ++I)
5028 std::swap(a&: Ops[I + 1], b&: Ops[I + 1 + MultiVec]);
5029}
5030
5031Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
5032 const CallExpr *E) {
5033 auto *Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SMEIntrinsicMap, BuiltinID,
5034 MapProvenSorted&: AArch64SMEIntrinsicsProvenSorted);
5035
5036 llvm::SmallVector<Value *, 4> Ops;
5037 SVETypeFlags TypeFlags(Builtin->TypeModifier);
5038 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
5039
5040 if (TypeFlags.isLoad() || TypeFlags.isStore())
5041 return EmitSMELd1St1(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5042 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
5043 return EmitSMEReadWrite(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5044 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
5045 BuiltinID == SME::BI__builtin_sme_svzero_za)
5046 return EmitSMEZero(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5047 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
5048 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
5049 BuiltinID == SME::BI__builtin_sme_svldr_za ||
5050 BuiltinID == SME::BI__builtin_sme_svstr_za)
5051 return EmitSMELdrStr(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5052
5053 // Emit set FPMR for intrinsics that require it
5054 if (TypeFlags.setsFPMR())
5055 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
5056 Args: Ops.pop_back_val());
5057 // Handle builtins which require their multi-vector operands to be swapped
5058 swapCommutativeSMEOperands(BuiltinID, Ops);
5059
5060 auto isCntsBuiltin = [&]() {
5061 switch (BuiltinID) {
5062 default:
5063 return 0;
5064 case SME::BI__builtin_sme_svcntsb:
5065 return 8;
5066 case SME::BI__builtin_sme_svcntsh:
5067 return 4;
5068 case SME::BI__builtin_sme_svcntsw:
5069 return 2;
5070 }
5071 };
5072
5073 if (auto Mul = isCntsBuiltin()) {
5074 llvm::Value *Cntd =
5075 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsd));
5076 return Builder.CreateMul(LHS: Cntd, RHS: llvm::ConstantInt::get(Ty: Int64Ty, V: Mul),
5077 Name: "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
5078 }
5079
5080 // Should not happen!
5081 if (Builtin->LLVMIntrinsic == 0)
5082 return nullptr;
5083
5084 // Predicates must match the main datatype.
5085 for (Value *&Op : Ops)
5086 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
5087 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
5088 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
5089
5090 Function *F =
5091 TypeFlags.isOverloadNone()
5092 ? CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic)
5093 : CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: {getSVEType(TypeFlags)});
5094
5095 return Builder.CreateCall(Callee: F, Args: Ops);
5096}
5097
5098/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
5099/// return it as an i8 pointer.
5100Value *readX18AsPtr(CodeGenFunction &CGF) {
5101 LLVMContext &Context = CGF.CGM.getLLVMContext();
5102 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
5103 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5104 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5105 llvm::Function *F =
5106 CGF.CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {CGF.Int64Ty});
5107 llvm::Value *X18 = CGF.Builder.CreateCall(Callee: F, Args: Metadata);
5108 return CGF.Builder.CreateIntToPtr(V: X18, DestTy: CGF.Int8PtrTy);
5109}
5110
5111Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5112 const CallExpr *E,
5113 llvm::Triple::ArchType Arch) {
5114 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
5115 BuiltinID <= clang::AArch64::LastSVEBuiltin)
5116 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
5117
5118 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
5119 BuiltinID <= clang::AArch64::LastSMEBuiltin)
5120 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
5121
5122 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5123 return EmitAArch64CpuSupports(E);
5124
5125 unsigned HintID = static_cast<unsigned>(-1);
5126 switch (BuiltinID) {
5127 default: break;
5128 case clang::AArch64::BI__builtin_arm_nop:
5129 HintID = 0;
5130 break;
5131 case clang::AArch64::BI__builtin_arm_yield:
5132 case clang::AArch64::BI__yield:
5133 HintID = 1;
5134 break;
5135 case clang::AArch64::BI__builtin_arm_wfe:
5136 case clang::AArch64::BI__wfe:
5137 HintID = 2;
5138 break;
5139 case clang::AArch64::BI__builtin_arm_wfi:
5140 case clang::AArch64::BI__wfi:
5141 HintID = 3;
5142 break;
5143 case clang::AArch64::BI__builtin_arm_sev:
5144 case clang::AArch64::BI__sev:
5145 HintID = 4;
5146 break;
5147 case clang::AArch64::BI__builtin_arm_sevl:
5148 case clang::AArch64::BI__sevl:
5149 HintID = 5;
5150 break;
5151 }
5152
5153 if (HintID != static_cast<unsigned>(-1)) {
5154 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hint);
5155 return Builder.CreateCall(Callee: F, Args: llvm::ConstantInt::get(Ty: Int32Ty, V: HintID));
5156 }
5157
5158 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5159 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
5160 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5161 return Builder.CreateCall(Callee: F, Args: Builder.CreateZExt(V: Arg, DestTy: CGM.Int32Ty));
5162 }
5163
5164 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5165 // Create call to __arm_sme_state and store the results to the two pointers.
5166 CallInst *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
5167 Ty: llvm::FunctionType::get(Result: StructType::get(elt1: CGM.Int64Ty, elts: CGM.Int64Ty), Params: {},
5168 isVarArg: false),
5169 Name: "__arm_sme_state"));
5170 auto Attrs = AttributeList().addFnAttribute(C&: getLLVMContext(),
5171 Kind: "aarch64_pstate_sm_compatible");
5172 CI->setAttributes(Attrs);
5173 CI->setCallingConv(
5174 llvm::CallingConv::
5175 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5176 Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 0),
5177 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
5178 return Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 1),
5179 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)));
5180 }
5181
5182 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5183 assert((getContext().getTypeSize(E->getType()) == 32) &&
5184 "rbit of unusual size!");
5185 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5186 return Builder.CreateCall(
5187 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
5188 }
5189 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5190 assert((getContext().getTypeSize(E->getType()) == 64) &&
5191 "rbit of unusual size!");
5192 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5193 return Builder.CreateCall(
5194 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
5195 }
5196
5197 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5198 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5199 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5200 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
5201 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5202 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5203 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
5204 return Res;
5205 }
5206
5207 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5208 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5209 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls), Args: Arg,
5210 Name: "cls");
5211 }
5212 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5213 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5214 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64), Args: Arg,
5215 Name: "cls");
5216 }
5217
5218 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5219 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5220 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5221 llvm::Type *Ty = Arg->getType();
5222 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32z, Tys: Ty),
5223 Args: Arg, Name: "frint32z");
5224 }
5225
5226 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5227 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5228 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5229 llvm::Type *Ty = Arg->getType();
5230 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64z, Tys: Ty),
5231 Args: Arg, Name: "frint64z");
5232 }
5233
5234 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5235 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5236 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5237 llvm::Type *Ty = Arg->getType();
5238 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32x, Tys: Ty),
5239 Args: Arg, Name: "frint32x");
5240 }
5241
5242 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5243 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5244 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5245 llvm::Type *Ty = Arg->getType();
5246 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64x, Tys: Ty),
5247 Args: Arg, Name: "frint64x");
5248 }
5249
5250 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5251 assert((getContext().getTypeSize(E->getType()) == 32) &&
5252 "__jcvt of unusual size!");
5253 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5254 return Builder.CreateCall(
5255 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_fjcvtzs), Args: Arg);
5256 }
5257
5258 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5259 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5260 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5261 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5262 llvm::Value *MemAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5263 llvm::Value *ValPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5264
5265 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5266 // Load from the address via an LLVM intrinsic, receiving a
5267 // tuple of 8 i64 words, and store each one to ValPtr.
5268 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_ld64b);
5269 llvm::Value *Val = Builder.CreateCall(Callee: F, Args: MemAddr);
5270 llvm::Value *ToRet;
5271 for (size_t i = 0; i < 8; i++) {
5272 llvm::Value *ValOffsetPtr =
5273 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5274 Address Addr =
5275 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5276 ToRet = Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: Val, Idxs: i), Addr);
5277 }
5278 return ToRet;
5279 }
5280
5281 // Load 8 i64 words from ValPtr, and store them to the address
5282 // via an LLVM intrinsic.
5283 SmallVector<llvm::Value *, 9> Args;
5284 Args.push_back(Elt: MemAddr);
5285 for (size_t i = 0; i < 8; i++) {
5286 llvm::Value *ValOffsetPtr =
5287 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5288 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5289 Args.push_back(Elt: Builder.CreateLoad(Addr));
5290 }
5291
5292 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5293 ? Intrinsic::aarch64_st64b
5294 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5295 ? Intrinsic::aarch64_st64bv
5296 : Intrinsic::aarch64_st64bv0);
5297 Function *F = CGM.getIntrinsic(IID: Intr);
5298 return Builder.CreateCall(Callee: F, Args);
5299 }
5300
5301 if (BuiltinID == clang::AArch64::BI__builtin_arm_atomic_store_with_stshh) {
5302 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5303 Value *StoreValue = EmitScalarExpr(E: E->getArg(Arg: 1));
5304
5305 auto *OrderC = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
5306 auto *PolicyC = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 3)));
5307
5308 // Compute pointee bit-width from arg0 and create as i32 constant
5309 QualType ValQT =
5310 E->getArg(Arg: 0)->getType()->castAs<PointerType>()->getPointeeType();
5311 unsigned SizeBits = getContext().getTypeSize(T: ValQT);
5312 auto *SizeC = llvm::ConstantInt::get(Ty: Int32Ty, V: SizeBits);
5313
5314 Value *StoreValue64 = Builder.CreateIntCast(V: StoreValue, DestTy: Int64Ty,
5315 isSigned: ValQT->isSignedIntegerType());
5316
5317 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_stshh_atomic_store,
5318 Tys: {StoreAddr->getType()});
5319
5320 // Emit a single intrinsic so backend can expand to STSHH followed by
5321 // atomic store, to guarantee STSHH immediately precedes STR insn
5322 return Builder.CreateCall(
5323 Callee: F, Args: {StoreAddr, StoreValue64,
5324 ConstantInt::get(Ty: Int32Ty, V: OrderC->getZExtValue()),
5325 ConstantInt::get(Ty: Int32Ty, V: PolicyC->getZExtValue()), SizeC});
5326 }
5327
5328 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5329 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5330
5331 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5332 ? Intrinsic::aarch64_rndr
5333 : Intrinsic::aarch64_rndrrs);
5334 Function *F = CGM.getIntrinsic(IID: Intr);
5335 llvm::Value *Val = Builder.CreateCall(Callee: F);
5336 Value *RandomValue = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5337 Value *Status = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5338
5339 Address MemAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5340 Builder.CreateStore(Val: RandomValue, Addr: MemAddress);
5341 Status = Builder.CreateZExt(V: Status, DestTy: Int32Ty);
5342 return Status;
5343 }
5344
5345 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5346 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5347 const FunctionDecl *FD = E->getDirectCallee();
5348 Value *Ops[2];
5349 for (unsigned i = 0; i < 2; i++)
5350 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
5351 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
5352 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
5353 StringRef Name = FD->getName();
5354 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
5355 }
5356
5357 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5358 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5359 getContext().getTypeSize(T: E->getType()) == 128) {
5360 Function *F =
5361 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5362 ? Intrinsic::aarch64_ldaxp
5363 : Intrinsic::aarch64_ldxp);
5364
5365 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
5366 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldxp");
5367
5368 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5369 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5370 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5371 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int128Ty);
5372 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int128Ty);
5373
5374 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int128Ty, V: 64);
5375 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
5376 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
5377 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
5378 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5379 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5380 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5381
5382 QualType Ty = E->getType();
5383 llvm::Type *RealResTy = ConvertType(T: Ty);
5384 llvm::Type *IntTy =
5385 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5386
5387 Function *F =
5388 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5389 ? Intrinsic::aarch64_ldaxr
5390 : Intrinsic::aarch64_ldxr,
5391 Tys: DefaultPtrTy);
5392 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldxr");
5393 Val->addParamAttr(
5394 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
5395
5396 if (RealResTy->isPointerTy())
5397 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
5398
5399 llvm::Type *IntResTy = llvm::IntegerType::get(
5400 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
5401 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
5402 DestTy: RealResTy);
5403 }
5404
5405 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5406 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5407 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 128) {
5408 Function *F =
5409 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5410 ? Intrinsic::aarch64_stlxp
5411 : Intrinsic::aarch64_stxp);
5412 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty, elts: Int64Ty);
5413
5414 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
5415 EmitAnyExprToMem(E: E->getArg(Arg: 0), Location: Tmp, Quals: Qualifiers(), /*init*/ IsInitializer: true);
5416
5417 Tmp = Tmp.withElementType(ElemTy: STy);
5418 llvm::Value *Val = Builder.CreateLoad(Addr: Tmp);
5419
5420 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5421 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5422 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5423 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "stxp");
5424 }
5425
5426 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5427 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5428 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
5429 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
5430
5431 QualType Ty = E->getArg(Arg: 0)->getType();
5432 llvm::Type *StoreTy =
5433 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5434
5435 if (StoreVal->getType()->isPointerTy())
5436 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int64Ty);
5437 else {
5438 llvm::Type *IntTy = llvm::IntegerType::get(
5439 C&: getLLVMContext(),
5440 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
5441 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
5442 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int64Ty);
5443 }
5444
5445 Function *F =
5446 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5447 ? Intrinsic::aarch64_stlxr
5448 : Intrinsic::aarch64_stxr,
5449 Tys: StoreAddr->getType());
5450 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "stxr");
5451 CI->addParamAttr(
5452 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
5453 return CI;
5454 }
5455
5456 if (BuiltinID == clang::AArch64::BI__getReg) {
5457 Expr::EvalResult Result;
5458 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5459 llvm_unreachable("Sema will ensure that the parameter is constant");
5460
5461 llvm::APSInt Value = Result.Val.getInt();
5462 LLVMContext &Context = CGM.getLLVMContext();
5463 std::string Reg = Value == 31 ? "sp" : "x" + toString(I: Value, Radix: 10);
5464
5465 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
5466 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5467 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5468
5469 llvm::Function *F =
5470 CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {Int64Ty});
5471 return Builder.CreateCall(Callee: F, Args: Metadata);
5472 }
5473
5474 if (BuiltinID == clang::AArch64::BI__break) {
5475 Expr::EvalResult Result;
5476 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5477 llvm_unreachable("Sema will ensure that the parameter is constant");
5478
5479 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
5480 return Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5481 }
5482
5483 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5484 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_clrex);
5485 return Builder.CreateCall(Callee: F);
5486 }
5487
5488 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5489 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
5490 SSID: llvm::SyncScope::SingleThread);
5491
5492 // CRC32
5493 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5494 switch (BuiltinID) {
5495 case clang::AArch64::BI__builtin_arm_crc32b:
5496 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5497 case clang::AArch64::BI__builtin_arm_crc32cb:
5498 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5499 case clang::AArch64::BI__builtin_arm_crc32h:
5500 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5501 case clang::AArch64::BI__builtin_arm_crc32ch:
5502 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5503 case clang::AArch64::BI__builtin_arm_crc32w:
5504 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5505 case clang::AArch64::BI__builtin_arm_crc32cw:
5506 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5507 case clang::AArch64::BI__builtin_arm_crc32d:
5508 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5509 case clang::AArch64::BI__builtin_arm_crc32cd:
5510 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5511 }
5512
5513 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5514 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
5515 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5516 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
5517
5518 llvm::Type *DataTy = F->getFunctionType()->getParamType(i: 1);
5519 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: DataTy);
5520
5521 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
5522 }
5523
5524 // Memory Operations (MOPS)
5525 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5526 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
5527 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
5528 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
5529 Val = Builder.CreateTrunc(V: Val, DestTy: Int8Ty);
5530 Size = Builder.CreateIntCast(V: Size, DestTy: Int64Ty, isSigned: false);
5531 return Builder.CreateCall(
5532 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_mops_memset_tag), Args: {Dst, Val, Size});
5533 }
5534
5535 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5536 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5537 return EmitRangePrefetchBuiltin(CGF&: *this, BuiltinID, E);
5538
5539 // Memory Tagging Extensions (MTE) Intrinsics
5540 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5541 switch (BuiltinID) {
5542 case clang::AArch64::BI__builtin_arm_irg:
5543 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5544 case clang::AArch64::BI__builtin_arm_addg:
5545 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5546 case clang::AArch64::BI__builtin_arm_gmi:
5547 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5548 case clang::AArch64::BI__builtin_arm_ldg:
5549 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5550 case clang::AArch64::BI__builtin_arm_stg:
5551 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5552 case clang::AArch64::BI__builtin_arm_subp:
5553 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5554 }
5555
5556 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5557 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5558 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5559 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 1));
5560
5561 Mask = Builder.CreateZExt(V: Mask, DestTy: Int64Ty);
5562 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5563 Args: {Pointer, Mask});
5564 }
5565 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5566 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5567 Value *TagOffset = EmitScalarExpr(E: E->getArg(Arg: 1));
5568
5569 TagOffset = Builder.CreateZExt(V: TagOffset, DestTy: Int64Ty);
5570 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5571 Args: {Pointer, TagOffset});
5572 }
5573 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5574 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5575 Value *ExcludedMask = EmitScalarExpr(E: E->getArg(Arg: 1));
5576
5577 ExcludedMask = Builder.CreateZExt(V: ExcludedMask, DestTy: Int64Ty);
5578 return Builder.CreateCall(
5579 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, ExcludedMask});
5580 }
5581 // Although it is possible to supply a different return
5582 // address (first arg) to this intrinsic, for now we set
5583 // return address same as input address.
5584 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5585 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5586 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5587 Args: {TagAddress, TagAddress});
5588 }
5589 // Although it is possible to supply a different tag (to set)
5590 // to this intrinsic (as first arg), for now we supply
5591 // the tag that is in input address arg (common use case).
5592 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5593 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5594 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5595 Args: {TagAddress, TagAddress});
5596 }
5597 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5598 Value *PointerA = EmitScalarExpr(E: E->getArg(Arg: 0));
5599 Value *PointerB = EmitScalarExpr(E: E->getArg(Arg: 1));
5600 return Builder.CreateCall(
5601 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {PointerA, PointerB});
5602 }
5603 }
5604
5605 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5606 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5607 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5608 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5609 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5610 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5611 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5612 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5613
5614 SpecialRegisterAccessKind AccessKind = Write;
5615 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5616 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5617 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5618 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5619 AccessKind = VolatileRead;
5620
5621 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5622 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5623
5624 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5625 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5626
5627 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5628 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5629
5630 llvm::Type *ValueType;
5631 llvm::Type *RegisterType = Int64Ty;
5632 if (Is32Bit) {
5633 ValueType = Int32Ty;
5634 } else if (Is128Bit) {
5635 llvm::Type *Int128Ty =
5636 llvm::IntegerType::getInt128Ty(C&: CGM.getLLVMContext());
5637 ValueType = Int128Ty;
5638 RegisterType = Int128Ty;
5639 } else if (IsPointerBuiltin) {
5640 ValueType = VoidPtrTy;
5641 } else {
5642 ValueType = Int64Ty;
5643 };
5644
5645 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
5646 AccessKind);
5647 }
5648
5649 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5650 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5651 BuiltinID == clang::AArch64::BI__sys) {
5652 LLVMContext &Context = CGM.getLLVMContext();
5653
5654 unsigned SysReg =
5655 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
5656
5657 std::string SysRegStr;
5658 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5659 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5660 ? ((1 << 1) | ((SysReg >> 14) & 1))
5661 : 1;
5662 llvm::raw_string_ostream(SysRegStr)
5663 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5664 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5665 << (SysReg & 7);
5666
5667 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysRegStr) };
5668 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5669 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5670
5671 llvm::Type *RegisterType = Int64Ty;
5672 llvm::Type *Types[] = { RegisterType };
5673
5674 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5675 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: Types);
5676
5677 return Builder.CreateCall(Callee: F, Args: Metadata);
5678 }
5679
5680 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
5681 llvm::Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
5682 llvm::Value *Result = Builder.CreateCall(Callee: F, Args: {Metadata, ArgValue});
5683 if (BuiltinID == clang::AArch64::BI__sys) {
5684 // Return 0 for convenience, even though MSVC returns some other undefined
5685 // value.
5686 Result = ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5687 }
5688 return Result;
5689 }
5690
5691 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5692 llvm::Function *F =
5693 CGM.getIntrinsic(IID: Intrinsic::addressofreturnaddress, Tys: AllocaInt8PtrTy);
5694 return Builder.CreateCall(Callee: F);
5695 }
5696
5697 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5698 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
5699 return Builder.CreateCall(Callee: F);
5700 }
5701
5702 if (BuiltinID == clang::AArch64::BI__mulh ||
5703 BuiltinID == clang::AArch64::BI__umulh) {
5704 llvm::Type *ResType = ConvertType(T: E->getType());
5705 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5706
5707 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5708 Value *LHS =
5709 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int128Ty, isSigned: IsSigned);
5710 Value *RHS =
5711 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int128Ty, isSigned: IsSigned);
5712
5713 Value *MulResult, *HigherBits;
5714 if (IsSigned) {
5715 MulResult = Builder.CreateNSWMul(LHS, RHS);
5716 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
5717 } else {
5718 MulResult = Builder.CreateNUWMul(LHS, RHS);
5719 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
5720 }
5721 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
5722
5723 return HigherBits;
5724 }
5725
5726 if (BuiltinID == AArch64::BI__writex18byte ||
5727 BuiltinID == AArch64::BI__writex18word ||
5728 BuiltinID == AArch64::BI__writex18dword ||
5729 BuiltinID == AArch64::BI__writex18qword) {
5730 // Process the args first
5731 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5732 Value *DataArg = EmitScalarExpr(E: E->getArg(Arg: 1));
5733
5734 // Read x18 as i8*
5735 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5736
5737 // Store val at x18 + offset
5738 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5739 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5740 StoreInst *Store =
5741 Builder.CreateAlignedStore(Val: DataArg, Addr: Ptr, Align: CharUnits::One());
5742 return Store;
5743 }
5744
5745 if (BuiltinID == AArch64::BI__readx18byte ||
5746 BuiltinID == AArch64::BI__readx18word ||
5747 BuiltinID == AArch64::BI__readx18dword ||
5748 BuiltinID == AArch64::BI__readx18qword) {
5749 // Process the args first
5750 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5751
5752 // Read x18 as i8*
5753 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5754
5755 // Load x18 + offset
5756 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5757 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5758 llvm::Type *IntTy = ConvertType(T: E->getType());
5759 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5760 return Load;
5761 }
5762
5763 if (BuiltinID == AArch64::BI__addx18byte ||
5764 BuiltinID == AArch64::BI__addx18word ||
5765 BuiltinID == AArch64::BI__addx18dword ||
5766 BuiltinID == AArch64::BI__addx18qword ||
5767 BuiltinID == AArch64::BI__incx18byte ||
5768 BuiltinID == AArch64::BI__incx18word ||
5769 BuiltinID == AArch64::BI__incx18dword ||
5770 BuiltinID == AArch64::BI__incx18qword) {
5771 llvm::Type *IntTy;
5772 bool isIncrement;
5773 switch (BuiltinID) {
5774 case AArch64::BI__incx18byte:
5775 IntTy = Int8Ty;
5776 isIncrement = true;
5777 break;
5778 case AArch64::BI__incx18word:
5779 IntTy = Int16Ty;
5780 isIncrement = true;
5781 break;
5782 case AArch64::BI__incx18dword:
5783 IntTy = Int32Ty;
5784 isIncrement = true;
5785 break;
5786 case AArch64::BI__incx18qword:
5787 IntTy = Int64Ty;
5788 isIncrement = true;
5789 break;
5790 default:
5791 IntTy = ConvertType(T: E->getArg(Arg: 1)->getType());
5792 isIncrement = false;
5793 break;
5794 }
5795 // Process the args first
5796 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5797 Value *ValToAdd =
5798 isIncrement ? ConstantInt::get(Ty: IntTy, V: 1) : EmitScalarExpr(E: E->getArg(Arg: 1));
5799
5800 // Read x18 as i8*
5801 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5802
5803 // Load x18 + offset
5804 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5805 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5806 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5807
5808 // Add values
5809 Value *AddResult = Builder.CreateAdd(LHS: Load, RHS: ValToAdd);
5810
5811 // Store val at x18 + offset
5812 StoreInst *Store =
5813 Builder.CreateAlignedStore(Val: AddResult, Addr: Ptr, Align: CharUnits::One());
5814 return Store;
5815 }
5816
5817 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5818 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5819 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5820 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5821 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5822 llvm::Type *RetTy = ConvertType(T: E->getType());
5823 return Builder.CreateBitCast(V: Arg, DestTy: RetTy);
5824 }
5825
5826 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5827 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5828 BuiltinID == AArch64::BI_CountLeadingZeros ||
5829 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5830 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5831 llvm::Type *ArgType = Arg->getType();
5832
5833 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5834 BuiltinID == AArch64::BI_CountLeadingOnes64)
5835 Arg = Builder.CreateXor(LHS: Arg, RHS: Constant::getAllOnesValue(Ty: ArgType));
5836
5837 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: ArgType);
5838 Value *Result = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5839
5840 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5841 BuiltinID == AArch64::BI_CountLeadingZeros64)
5842 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5843 return Result;
5844 }
5845
5846 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5847 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5848 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5849
5850 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5851 ? CGM.getIntrinsic(IID: Intrinsic::aarch64_cls)
5852 : CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64);
5853
5854 Value *Result = Builder.CreateCall(Callee: F, Args: Arg, Name: "cls");
5855 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5856 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5857 return Result;
5858 }
5859
5860 if (BuiltinID == AArch64::BI_CountOneBits ||
5861 BuiltinID == AArch64::BI_CountOneBits64) {
5862 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
5863 llvm::Type *ArgType = ArgValue->getType();
5864 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctpop, Tys: ArgType);
5865
5866 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
5867 if (BuiltinID == AArch64::BI_CountOneBits64)
5868 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5869 return Result;
5870 }
5871
5872 if (BuiltinID == AArch64::BI__prefetch) {
5873 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
5874 Value *RW = llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
5875 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: 3);
5876 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
5877 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
5878 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
5879 }
5880
5881 if (BuiltinID == AArch64::BI__hlt) {
5882 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hlt);
5883 Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5884
5885 // Return 0 for convenience, even though MSVC returns some other undefined
5886 // value.
5887 return ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5888 }
5889
5890 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5891 return Builder.CreateFPTrunc(
5892 V: Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)),
5893 DestTy: Builder.getFloatTy()),
5894 DestTy: Builder.getBFloatTy());
5895
5896 // Handle MSVC intrinsics before argument evaluation to prevent double
5897 // evaluation.
5898 if (std::optional<MSVCIntrin> MsvcIntId =
5899 translateAarch64ToMsvcIntrin(BuiltinID))
5900 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
5901
5902 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5903 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
5904 return P.first == BuiltinID;
5905 });
5906 if (It != end(arr: NEONEquivalentIntrinsicMap))
5907 BuiltinID = It->second;
5908
5909 // Check whether this is an SISD builtin.
5910 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5911 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5912 IntrinsicMap: SISDMap, BuiltinID, MapProvenSorted&: AArch64SISDIntrinsicsProvenSorted);
5913 bool IsSISD = (Builtin != nullptr);
5914
5915 // Find out if any arguments are required to be integer constant
5916 // expressions.
5917 unsigned ICEArguments = 0;
5918 ASTContext::GetBuiltinTypeError Error;
5919 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
5920 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5921
5922 llvm::SmallVector<Value*, 4> Ops;
5923 Address PtrOp0 = Address::invalid();
5924 // Note the assumption that SISD intrinsics do not contain extra arguments.
5925 // TODO: Fold this into a single function call instead of, effectively, two
5926 // separate checks.
5927 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5928 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5929 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5930 if (i == 0) {
5931 switch (BuiltinID) {
5932 case NEON::BI__builtin_neon_vld1_v:
5933 case NEON::BI__builtin_neon_vld1q_v:
5934 case NEON::BI__builtin_neon_vld1_dup_v:
5935 case NEON::BI__builtin_neon_vld1q_dup_v:
5936 case NEON::BI__builtin_neon_vld1_lane_v:
5937 case NEON::BI__builtin_neon_vld1q_lane_v:
5938 case NEON::BI__builtin_neon_vst1_v:
5939 case NEON::BI__builtin_neon_vst1q_v:
5940 case NEON::BI__builtin_neon_vst1_lane_v:
5941 case NEON::BI__builtin_neon_vst1q_lane_v:
5942 case NEON::BI__builtin_neon_vldap1_lane_s64:
5943 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5944 case NEON::BI__builtin_neon_vstl1_lane_s64:
5945 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5946 // Get the alignment for the argument in addition to the value;
5947 // we'll use it later.
5948 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5949 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
5950 continue;
5951 }
5952 }
5953 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
5954 }
5955
5956 if (Builtin) {
5957 Value *Result = EmitCommonNeonSISDBuiltinExpr(CGF&: *this, SISDInfo: *Builtin, Ops, E);
5958 assert(Result && "SISD intrinsic should have been handled");
5959 return Result;
5960 }
5961
5962 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
5963 NeonTypeFlags Type(0);
5964 if (std::optional<llvm::APSInt> Result =
5965 Arg->getIntegerConstantExpr(Ctx: getContext()))
5966 // Determine the type of this overloaded NEON intrinsic.
5967 Type = NeonTypeFlags(Result->getZExtValue());
5968
5969 bool usgn = Type.isUnsigned();
5970 bool quad = Type.isQuad();
5971 unsigned Int;
5972
5973 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5974 // defer to common code if it's been added to our special map.
5975 Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SIMDIntrinsicMap, BuiltinID,
5976 MapProvenSorted&: AArch64SIMDIntrinsicsProvenSorted);
5977
5978 if (Builtin)
5979 return EmitCommonNeonBuiltinExpr(
5980 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
5981 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops,
5982 /*never use addresses*/ PtrOp0: Address::invalid(), PtrOp1: Address::invalid(), Arch);
5983
5984 if (Value *V = EmitAArch64TblBuiltinExpr(CGF&: *this, BuiltinID, E, Ops, Arch))
5985 return V;
5986
5987 // Handle non-overloaded intrinsics first.
5988 switch (BuiltinID) {
5989 default: break;
5990 case NEON::BI__builtin_neon_vabsh_f16:
5991 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: HalfTy), Ops, name: "vabs");
5992 case NEON::BI__builtin_neon_vaddq_p128: {
5993 llvm::Type *Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags::Poly128);
5994 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
5995 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
5996 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
5997 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5998 return Builder.CreateBitCast(V: Ops[0], DestTy: Int128Ty);
5999 }
6000 case NEON::BI__builtin_neon_vldrq_p128: {
6001 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
6002 return Builder.CreateAlignedLoad(Ty: Int128Ty, Addr: Ops[0],
6003 Align: CharUnits::fromQuantity(Quantity: 16));
6004 }
6005 case NEON::BI__builtin_neon_vstrq_p128: {
6006 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
6007 }
6008 case NEON::BI__builtin_neon_vcvts_f32_u32:
6009 case NEON::BI__builtin_neon_vcvtd_f64_u64:
6010 usgn = true;
6011 [[fallthrough]];
6012 case NEON::BI__builtin_neon_vcvts_f32_s32:
6013 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
6014 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6015 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
6016 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
6017 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
6018 if (usgn)
6019 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
6020 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
6021 }
6022 case NEON::BI__builtin_neon_vcvth_f16_u16:
6023 case NEON::BI__builtin_neon_vcvth_f16_u32:
6024 case NEON::BI__builtin_neon_vcvth_f16_u64:
6025 usgn = true;
6026 [[fallthrough]];
6027 case NEON::BI__builtin_neon_vcvth_f16_s16:
6028 case NEON::BI__builtin_neon_vcvth_f16_s32:
6029 case NEON::BI__builtin_neon_vcvth_f16_s64: {
6030 llvm::Type *FTy = HalfTy;
6031 llvm::Type *InTy;
6032 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
6033 InTy = Int64Ty;
6034 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
6035 InTy = Int32Ty;
6036 else
6037 InTy = Int16Ty;
6038 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
6039 if (usgn)
6040 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
6041 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
6042 }
6043 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6044 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6045 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6046 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6047 case NEON::BI__builtin_neon_vcvth_u16_f16:
6048 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6049 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6050 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6051 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6052 case NEON::BI__builtin_neon_vcvth_s16_f16: {
6053 llvm::Type *InTy = Int16Ty;
6054 llvm::Type* FTy = HalfTy;
6055 llvm::Type *Tys[2] = {InTy, FTy};
6056 switch (BuiltinID) {
6057 default: llvm_unreachable("missing builtin ID in switch!");
6058 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6059 Int = Intrinsic::aarch64_neon_fcvtau; break;
6060 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6061 Int = Intrinsic::aarch64_neon_fcvtmu; break;
6062 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6063 Int = Intrinsic::aarch64_neon_fcvtnu; break;
6064 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6065 Int = Intrinsic::aarch64_neon_fcvtpu; break;
6066 case NEON::BI__builtin_neon_vcvth_u16_f16:
6067 Int = Intrinsic::aarch64_neon_fcvtzu; break;
6068 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6069 Int = Intrinsic::aarch64_neon_fcvtas; break;
6070 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6071 Int = Intrinsic::aarch64_neon_fcvtms; break;
6072 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6073 Int = Intrinsic::aarch64_neon_fcvtns; break;
6074 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6075 Int = Intrinsic::aarch64_neon_fcvtps; break;
6076 case NEON::BI__builtin_neon_vcvth_s16_f16:
6077 Int = Intrinsic::aarch64_neon_fcvtzs; break;
6078 }
6079 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvt");
6080 }
6081 case NEON::BI__builtin_neon_vcaleh_f16:
6082 case NEON::BI__builtin_neon_vcalth_f16:
6083 case NEON::BI__builtin_neon_vcageh_f16:
6084 case NEON::BI__builtin_neon_vcagth_f16: {
6085 llvm::Type* InTy = Int32Ty;
6086 llvm::Type* FTy = HalfTy;
6087 llvm::Type *Tys[2] = {InTy, FTy};
6088 switch (BuiltinID) {
6089 default: llvm_unreachable("missing builtin ID in switch!");
6090 case NEON::BI__builtin_neon_vcageh_f16:
6091 Int = Intrinsic::aarch64_neon_facge; break;
6092 case NEON::BI__builtin_neon_vcagth_f16:
6093 Int = Intrinsic::aarch64_neon_facgt; break;
6094 case NEON::BI__builtin_neon_vcaleh_f16:
6095 Int = Intrinsic::aarch64_neon_facge; std::swap(a&: Ops[0], b&: Ops[1]); break;
6096 case NEON::BI__builtin_neon_vcalth_f16:
6097 Int = Intrinsic::aarch64_neon_facgt; std::swap(a&: Ops[0], b&: Ops[1]); break;
6098 }
6099 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "facg");
6100 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
6101 }
6102 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6103 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6104 llvm::Type* InTy = Int32Ty;
6105 llvm::Type* FTy = HalfTy;
6106 llvm::Type *Tys[2] = {InTy, FTy};
6107 switch (BuiltinID) {
6108 default: llvm_unreachable("missing builtin ID in switch!");
6109 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6110 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
6111 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6112 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
6113 }
6114 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
6115 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
6116 }
6117 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6118 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6119 llvm::Type* FTy = HalfTy;
6120 llvm::Type* InTy = Int32Ty;
6121 llvm::Type *Tys[2] = {FTy, InTy};
6122 switch (BuiltinID) {
6123 default: llvm_unreachable("missing builtin ID in switch!");
6124 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6125 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6126 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: InTy, Name: "sext");
6127 break;
6128 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6129 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6130 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: InTy);
6131 break;
6132 }
6133 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
6134 }
6135 case NEON::BI__builtin_neon_vpaddd_s64: {
6136 // TODO: Isn't this handled by
6137 // EmitCommonNeonSISDBuiltinExpr?
6138 auto *Ty = llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2);
6139 // The vector is v2f64, so make sure it's bitcast to that.
6140 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2i64");
6141 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
6142 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
6143 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
6144 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
6145 // Pairwise addition of a v2f64 into a scalar f64.
6146 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vpaddd");
6147 }
6148 case NEON::BI__builtin_neon_vpaddd_f64: {
6149 auto *Ty = llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2);
6150 // The vector is v2f64, so make sure it's bitcast to that.
6151 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2f64");
6152 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
6153 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
6154 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
6155 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
6156 // Pairwise addition of a v2f64 into a scalar f64.
6157 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
6158 }
6159 case NEON::BI__builtin_neon_vpadds_f32: {
6160 auto *Ty = llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2);
6161 // The vector is v2f32, so make sure it's bitcast to that.
6162 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2f32");
6163 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
6164 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
6165 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
6166 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
6167 // Pairwise addition of a v2f32 into a scalar f32.
6168 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
6169 }
6170 case NEON::BI__builtin_neon_vceqzd_s64:
6171 return EmitAArch64CompareBuiltinExpr(
6172 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6173 Pred: ICmpInst::ICMP_EQ, Name: "vceqz");
6174 case NEON::BI__builtin_neon_vceqzd_f64:
6175 case NEON::BI__builtin_neon_vceqzs_f32:
6176 case NEON::BI__builtin_neon_vceqzh_f16:
6177 return EmitAArch64CompareBuiltinExpr(
6178 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6179 Pred: ICmpInst::FCMP_OEQ, Name: "vceqz");
6180 case NEON::BI__builtin_neon_vcgezd_s64:
6181 return EmitAArch64CompareBuiltinExpr(
6182 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6183 Pred: ICmpInst::ICMP_SGE, Name: "vcgez");
6184 case NEON::BI__builtin_neon_vcgezd_f64:
6185 case NEON::BI__builtin_neon_vcgezs_f32:
6186 case NEON::BI__builtin_neon_vcgezh_f16:
6187 return EmitAArch64CompareBuiltinExpr(
6188 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6189 Pred: ICmpInst::FCMP_OGE, Name: "vcgez");
6190 case NEON::BI__builtin_neon_vclezd_s64:
6191 return EmitAArch64CompareBuiltinExpr(
6192 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6193 Pred: ICmpInst::ICMP_SLE, Name: "vclez");
6194 case NEON::BI__builtin_neon_vclezd_f64:
6195 case NEON::BI__builtin_neon_vclezs_f32:
6196 case NEON::BI__builtin_neon_vclezh_f16:
6197 return EmitAArch64CompareBuiltinExpr(
6198 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6199 Pred: ICmpInst::FCMP_OLE, Name: "vclez");
6200 case NEON::BI__builtin_neon_vcgtzd_s64:
6201 return EmitAArch64CompareBuiltinExpr(
6202 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6203 Pred: ICmpInst::ICMP_SGT, Name: "vcgtz");
6204 case NEON::BI__builtin_neon_vcgtzd_f64:
6205 case NEON::BI__builtin_neon_vcgtzs_f32:
6206 case NEON::BI__builtin_neon_vcgtzh_f16:
6207 return EmitAArch64CompareBuiltinExpr(
6208 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6209 Pred: ICmpInst::FCMP_OGT, Name: "vcgtz");
6210 case NEON::BI__builtin_neon_vcltzd_s64:
6211 return EmitAArch64CompareBuiltinExpr(
6212 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6213 Pred: ICmpInst::ICMP_SLT, Name: "vcltz");
6214
6215 case NEON::BI__builtin_neon_vcltzd_f64:
6216 case NEON::BI__builtin_neon_vcltzs_f32:
6217 case NEON::BI__builtin_neon_vcltzh_f16:
6218 return EmitAArch64CompareBuiltinExpr(
6219 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6220 Pred: ICmpInst::FCMP_OLT, Name: "vcltz");
6221
6222 case NEON::BI__builtin_neon_vceqzd_u64: {
6223 return EmitAArch64CompareBuiltinExpr(
6224 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6225 Pred: ICmpInst::ICMP_EQ, Name: "vceqzd");
6226 }
6227 case NEON::BI__builtin_neon_vceqd_f64:
6228 case NEON::BI__builtin_neon_vcled_f64:
6229 case NEON::BI__builtin_neon_vcltd_f64:
6230 case NEON::BI__builtin_neon_vcged_f64:
6231 case NEON::BI__builtin_neon_vcgtd_f64: {
6232 llvm::CmpInst::Predicate P;
6233 switch (BuiltinID) {
6234 default: llvm_unreachable("missing builtin ID in switch!");
6235 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6236 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6237 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6238 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6239 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6240 }
6241 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6242 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6243 if (P == llvm::FCmpInst::FCMP_OEQ)
6244 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6245 else
6246 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6247 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vcmpd");
6248 }
6249 case NEON::BI__builtin_neon_vceqs_f32:
6250 case NEON::BI__builtin_neon_vcles_f32:
6251 case NEON::BI__builtin_neon_vclts_f32:
6252 case NEON::BI__builtin_neon_vcges_f32:
6253 case NEON::BI__builtin_neon_vcgts_f32: {
6254 llvm::CmpInst::Predicate P;
6255 switch (BuiltinID) {
6256 default: llvm_unreachable("missing builtin ID in switch!");
6257 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6258 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6259 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6260 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6261 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6262 }
6263 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: FloatTy);
6264 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: FloatTy);
6265 if (P == llvm::FCmpInst::FCMP_OEQ)
6266 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6267 else
6268 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6269 return Builder.CreateSExt(V: Ops[0], DestTy: Int32Ty, Name: "vcmpd");
6270 }
6271 case NEON::BI__builtin_neon_vceqh_f16:
6272 case NEON::BI__builtin_neon_vcleh_f16:
6273 case NEON::BI__builtin_neon_vclth_f16:
6274 case NEON::BI__builtin_neon_vcgeh_f16:
6275 case NEON::BI__builtin_neon_vcgth_f16: {
6276 llvm::CmpInst::Predicate P;
6277 switch (BuiltinID) {
6278 default: llvm_unreachable("missing builtin ID in switch!");
6279 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6280 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6281 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6282 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6283 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6284 }
6285 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: HalfTy);
6286 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: HalfTy);
6287 if (P == llvm::FCmpInst::FCMP_OEQ)
6288 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6289 else
6290 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6291 return Builder.CreateSExt(V: Ops[0], DestTy: Int16Ty, Name: "vcmpd");
6292 }
6293 case NEON::BI__builtin_neon_vceqd_s64:
6294 case NEON::BI__builtin_neon_vceqd_u64:
6295 case NEON::BI__builtin_neon_vcgtd_s64:
6296 case NEON::BI__builtin_neon_vcgtd_u64:
6297 case NEON::BI__builtin_neon_vcltd_s64:
6298 case NEON::BI__builtin_neon_vcltd_u64:
6299 case NEON::BI__builtin_neon_vcged_u64:
6300 case NEON::BI__builtin_neon_vcged_s64:
6301 case NEON::BI__builtin_neon_vcled_u64:
6302 case NEON::BI__builtin_neon_vcled_s64: {
6303 llvm::CmpInst::Predicate P;
6304 switch (BuiltinID) {
6305 default: llvm_unreachable("missing builtin ID in switch!");
6306 case NEON::BI__builtin_neon_vceqd_s64:
6307 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6308 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6309 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6310 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6311 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6312 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6313 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6314 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6315 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6316 }
6317 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6318 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6319 Ops[0] = Builder.CreateICmp(P, LHS: Ops[0], RHS: Ops[1]);
6320 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqd");
6321 }
6322 case NEON::BI__builtin_neon_vnegd_s64:
6323 return Builder.CreateNeg(V: Ops[0], Name: "vnegd");
6324 case NEON::BI__builtin_neon_vnegh_f16:
6325 return Builder.CreateFNeg(V: Ops[0], Name: "vnegh");
6326 case NEON::BI__builtin_neon_vtstd_s64:
6327 case NEON::BI__builtin_neon_vtstd_u64: {
6328 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6329 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6330 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
6331 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
6332 RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6333 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vtstd");
6334 }
6335 case NEON::BI__builtin_neon_vset_lane_i8:
6336 case NEON::BI__builtin_neon_vset_lane_i16:
6337 case NEON::BI__builtin_neon_vset_lane_i32:
6338 case NEON::BI__builtin_neon_vset_lane_i64:
6339 case NEON::BI__builtin_neon_vset_lane_bf16:
6340 case NEON::BI__builtin_neon_vset_lane_f32:
6341 case NEON::BI__builtin_neon_vsetq_lane_i8:
6342 case NEON::BI__builtin_neon_vsetq_lane_i16:
6343 case NEON::BI__builtin_neon_vsetq_lane_i32:
6344 case NEON::BI__builtin_neon_vsetq_lane_i64:
6345 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6346 case NEON::BI__builtin_neon_vsetq_lane_f32:
6347 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6348 case NEON::BI__builtin_neon_vset_lane_f64:
6349 // The vector type needs a cast for the v1f64 variant.
6350 Ops[1] =
6351 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6352 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6353 case NEON::BI__builtin_neon_vset_lane_mf8:
6354 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6355 // The input vector type needs a cast to scalar type.
6356 Ops[0] =
6357 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::Type::getInt8Ty(C&: getLLVMContext()));
6358 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6359 case NEON::BI__builtin_neon_vsetq_lane_f64:
6360 // The vector type needs a cast for the v2f64 variant.
6361 Ops[1] =
6362 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6363 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6364
6365 case NEON::BI__builtin_neon_vget_lane_i8:
6366 case NEON::BI__builtin_neon_vdupb_lane_i8:
6367 Ops[0] =
6368 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8));
6369 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6370 case NEON::BI__builtin_neon_vgetq_lane_i8:
6371 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6372 Ops[0] =
6373 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16));
6374 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6375 case NEON::BI__builtin_neon_vget_lane_mf8:
6376 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6377 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6378 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6379 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6380 case NEON::BI__builtin_neon_vget_lane_i16:
6381 case NEON::BI__builtin_neon_vduph_lane_i16:
6382 Ops[0] =
6383 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4));
6384 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6385 case NEON::BI__builtin_neon_vgetq_lane_i16:
6386 case NEON::BI__builtin_neon_vduph_laneq_i16:
6387 Ops[0] =
6388 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8));
6389 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6390 case NEON::BI__builtin_neon_vget_lane_i32:
6391 case NEON::BI__builtin_neon_vdups_lane_i32:
6392 Ops[0] =
6393 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 2));
6394 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6395 case NEON::BI__builtin_neon_vdups_lane_f32:
6396 Ops[0] =
6397 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6398 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vdups_lane");
6399 case NEON::BI__builtin_neon_vgetq_lane_i32:
6400 case NEON::BI__builtin_neon_vdups_laneq_i32:
6401 Ops[0] =
6402 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
6403 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6404 case NEON::BI__builtin_neon_vget_lane_i64:
6405 case NEON::BI__builtin_neon_vdupd_lane_i64:
6406 Ops[0] =
6407 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 1));
6408 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6409 case NEON::BI__builtin_neon_vdupd_lane_f64:
6410 Ops[0] =
6411 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6412 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vdupd_lane");
6413 case NEON::BI__builtin_neon_vgetq_lane_i64:
6414 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6415 Ops[0] =
6416 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
6417 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6418 case NEON::BI__builtin_neon_vget_lane_f32:
6419 Ops[0] =
6420 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6421 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6422 case NEON::BI__builtin_neon_vget_lane_f64:
6423 Ops[0] =
6424 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6425 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6426 case NEON::BI__builtin_neon_vgetq_lane_f32:
6427 case NEON::BI__builtin_neon_vdups_laneq_f32:
6428 Ops[0] =
6429 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4));
6430 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6431 case NEON::BI__builtin_neon_vgetq_lane_f64:
6432 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6433 Ops[0] =
6434 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6435 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6436 case NEON::BI__builtin_neon_vaddh_f16:
6437 return Builder.CreateFAdd(L: Ops[0], R: Ops[1], Name: "vaddh");
6438 case NEON::BI__builtin_neon_vsubh_f16:
6439 return Builder.CreateFSub(L: Ops[0], R: Ops[1], Name: "vsubh");
6440 case NEON::BI__builtin_neon_vmulh_f16:
6441 return Builder.CreateFMul(L: Ops[0], R: Ops[1], Name: "vmulh");
6442 case NEON::BI__builtin_neon_vdivh_f16:
6443 return Builder.CreateFDiv(L: Ops[0], R: Ops[1], Name: "vdivh");
6444 case NEON::BI__builtin_neon_vfmah_f16:
6445 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6446 return emitCallMaybeConstrainedFPBuiltin(
6447 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
6448 Args: {Ops[1], Ops[2], Ops[0]});
6449 case NEON::BI__builtin_neon_vfmsh_f16: {
6450 Value *Neg = Builder.CreateFNeg(V: Ops[1], Name: "vsubh");
6451
6452 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6453 return emitCallMaybeConstrainedFPBuiltin(
6454 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
6455 Args: {Neg, Ops[2], Ops[0]});
6456 }
6457 case NEON::BI__builtin_neon_vaddd_s64:
6458 case NEON::BI__builtin_neon_vaddd_u64:
6459 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddd");
6460 case NEON::BI__builtin_neon_vsubd_s64:
6461 case NEON::BI__builtin_neon_vsubd_u64:
6462 return Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubd");
6463 case NEON::BI__builtin_neon_vqdmlalh_s16:
6464 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6465 SmallVector<Value *, 2> ProductOps;
6466 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6467 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
6468 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6469 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
6470 Ops&: ProductOps, name: "vqdmlXl");
6471 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6472 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6473
6474 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6475 ? Intrinsic::aarch64_neon_sqadd
6476 : Intrinsic::aarch64_neon_sqsub;
6477 // Drop the 2nd multiplication argument before the accumulation
6478 Ops.pop_back();
6479 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6480 }
6481 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6482 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6483 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqshlu, Tys: Int64Ty),
6484 Ops, name: "vqshlu_n");
6485 }
6486 case NEON::BI__builtin_neon_vqshld_n_u64:
6487 case NEON::BI__builtin_neon_vqshld_n_s64: {
6488 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6489 ? Intrinsic::aarch64_neon_uqshl
6490 : Intrinsic::aarch64_neon_sqshl;
6491 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6492 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vqshl_n");
6493 }
6494 case NEON::BI__builtin_neon_vrshrd_n_u64:
6495 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6496 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6497 ? Intrinsic::aarch64_neon_urshl
6498 : Intrinsic::aarch64_neon_srshl;
6499 int SV = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
6500 Ops[1] = ConstantInt::get(Ty: Int64Ty, V: -SV);
6501 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vrshr_n");
6502 }
6503 case NEON::BI__builtin_neon_vrsrad_n_u64:
6504 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6505 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6506 ? Intrinsic::aarch64_neon_urshl
6507 : Intrinsic::aarch64_neon_srshl;
6508 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6509 Ops[2] = Builder.CreateNeg(V: Ops[2]);
6510 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Int64Ty),
6511 Args: {Ops[1], Builder.CreateSExt(V: Ops[2], DestTy: Int64Ty)});
6512 return Builder.CreateAdd(LHS: Ops[0], RHS: Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty));
6513 }
6514 case NEON::BI__builtin_neon_vshld_n_s64:
6515 case NEON::BI__builtin_neon_vshld_n_u64: {
6516 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[1]);
6517 return Builder.CreateShl(
6518 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: Amt->getZExtValue()), Name: "shld_n");
6519 }
6520 case NEON::BI__builtin_neon_vshrd_n_s64: {
6521 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[1]);
6522 return Builder.CreateAShr(
6523 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6524 b: Amt->getZExtValue())),
6525 Name: "shrd_n");
6526 }
6527 case NEON::BI__builtin_neon_vshrd_n_u64: {
6528 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[1]);
6529 uint64_t ShiftAmt = Amt->getZExtValue();
6530 // Right-shifting an unsigned value by its size yields 0.
6531 if (ShiftAmt == 64)
6532 return ConstantInt::get(Ty: Int64Ty, V: 0);
6533 return Builder.CreateLShr(LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6534 Name: "shrd_n");
6535 }
6536 case NEON::BI__builtin_neon_vsrad_n_s64: {
6537 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[2]);
6538 Ops[1] = Builder.CreateAShr(
6539 LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6540 b: Amt->getZExtValue())),
6541 Name: "shrd_n");
6542 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6543 }
6544 case NEON::BI__builtin_neon_vsrad_n_u64: {
6545 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: Ops[2]);
6546 uint64_t ShiftAmt = Amt->getZExtValue();
6547 // Right-shifting an unsigned value by its size yields 0.
6548 // As Op + 0 = Op, return Ops[0] directly.
6549 if (ShiftAmt == 64)
6550 return Ops[0];
6551 Ops[1] = Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6552 Name: "shrd_n");
6553 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6554 }
6555 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6556 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6557 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6558 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6559 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "lane");
6560 SmallVector<Value *, 2> ProductOps;
6561 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6562 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
6563 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6564 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
6565 Ops&: ProductOps, name: "vqdmlXl");
6566 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6567 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6568 // Drop lane-selection and the corresponding vector argument (these have
6569 // already been used)
6570 Ops.pop_back_n(NumItems: 2);
6571
6572 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6573 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6574 ? Intrinsic::aarch64_neon_sqadd
6575 : Intrinsic::aarch64_neon_sqsub;
6576 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6577 }
6578 case NEON::BI__builtin_neon_vqdmlals_s32:
6579 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6580 SmallVector<Value *, 2> ProductOps;
6581 ProductOps.push_back(Elt: Ops[1]);
6582 ProductOps.push_back(Elt: Ops[2]);
6583 Ops[1] =
6584 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
6585 Ops&: ProductOps, name: "vqdmlXl");
6586
6587 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6588 ? Intrinsic::aarch64_neon_sqadd
6589 : Intrinsic::aarch64_neon_sqsub;
6590 // Drop the 2nd multiplication argument before the accumulation
6591 Ops.pop_back();
6592 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6593 }
6594 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6595 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6596 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6597 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6598 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "lane");
6599 SmallVector<Value *, 2> ProductOps;
6600 ProductOps.push_back(Elt: Ops[1]);
6601 ProductOps.push_back(Elt: Ops[2]);
6602 Ops[1] =
6603 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
6604 Ops&: ProductOps, name: "vqdmlXl");
6605 // Drop lane-selection and the corresponding vector argument (these have
6606 // already been used)
6607 Ops.pop_back_n(NumItems: 2);
6608
6609 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6610 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6611 ? Intrinsic::aarch64_neon_sqadd
6612 : Intrinsic::aarch64_neon_sqsub;
6613 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6614 }
6615 case NEON::BI__builtin_neon_vget_lane_bf16:
6616 case NEON::BI__builtin_neon_vduph_lane_bf16:
6617 case NEON::BI__builtin_neon_vduph_lane_f16: {
6618 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
6619 }
6620 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6621 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6622 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6623 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vgetq_lane");
6624 }
6625 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6626 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6627 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6628 return Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6629 }
6630 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6631 SmallVector<int, 16> ConcatMask(8);
6632 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6633 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6634 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6635 llvm::Value *Trunc =
6636 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6637 return Builder.CreateShuffleVector(
6638 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
6639 }
6640 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6641 SmallVector<int, 16> ConcatMask(8);
6642 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6643 SmallVector<int, 16> LoMask(4);
6644 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
6645 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6646 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6647 llvm::Type *V8BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 8);
6648 llvm::Value *Inactive = Builder.CreateShuffleVector(
6649 V: Builder.CreateBitCast(V: Ops[0], DestTy: V8BF16), Mask: LoMask);
6650 llvm::Value *Trunc =
6651 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[1], DestTy: V4F32), DestTy: V4BF16);
6652 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
6653 }
6654
6655 case clang::AArch64::BI_InterlockedAdd:
6656 case clang::AArch64::BI_InterlockedAdd_acq:
6657 case clang::AArch64::BI_InterlockedAdd_rel:
6658 case clang::AArch64::BI_InterlockedAdd_nf:
6659 case clang::AArch64::BI_InterlockedAdd64:
6660 case clang::AArch64::BI_InterlockedAdd64_acq:
6661 case clang::AArch64::BI_InterlockedAdd64_rel:
6662 case clang::AArch64::BI_InterlockedAdd64_nf: {
6663 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
6664 Value *Val = Ops[1];
6665 llvm::AtomicOrdering Ordering;
6666 switch (BuiltinID) {
6667 case clang::AArch64::BI_InterlockedAdd:
6668 case clang::AArch64::BI_InterlockedAdd64:
6669 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6670 break;
6671 case clang::AArch64::BI_InterlockedAdd_acq:
6672 case clang::AArch64::BI_InterlockedAdd64_acq:
6673 Ordering = llvm::AtomicOrdering::Acquire;
6674 break;
6675 case clang::AArch64::BI_InterlockedAdd_rel:
6676 case clang::AArch64::BI_InterlockedAdd64_rel:
6677 Ordering = llvm::AtomicOrdering::Release;
6678 break;
6679 case clang::AArch64::BI_InterlockedAdd_nf:
6680 case clang::AArch64::BI_InterlockedAdd64_nf:
6681 Ordering = llvm::AtomicOrdering::Monotonic;
6682 break;
6683 default:
6684 llvm_unreachable("missing builtin ID in switch!");
6685 }
6686 AtomicRMWInst *RMWI =
6687 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Addr: DestAddr, Val, Ordering);
6688 return Builder.CreateAdd(LHS: RMWI, RHS: Val);
6689 }
6690 }
6691
6692 llvm::FixedVectorType *VTy = GetNeonType(CGF: this, TypeFlags: Type);
6693 llvm::Type *Ty = VTy;
6694 if (!Ty)
6695 return nullptr;
6696
6697 bool ExtractLow = false;
6698 bool ExtendLaneArg = false;
6699 switch (BuiltinID) {
6700 default: return nullptr;
6701 case NEON::BI__builtin_neon_vbsl_v:
6702 case NEON::BI__builtin_neon_vbslq_v: {
6703 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6704 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: BitTy, Name: "vbsl");
6705 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: BitTy, Name: "vbsl");
6706 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: BitTy, Name: "vbsl");
6707
6708 Ops[1] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1], Name: "vbsl");
6709 Ops[2] = Builder.CreateAnd(LHS: Builder.CreateNot(V: Ops[0]), RHS: Ops[2], Name: "vbsl");
6710 Ops[0] = Builder.CreateOr(LHS: Ops[1], RHS: Ops[2], Name: "vbsl");
6711 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6712 }
6713 case NEON::BI__builtin_neon_vfma_lane_v:
6714 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6715 // The ARM builtins (and instructions) have the addend as the first
6716 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6717 Value *Addend = Ops[0];
6718 Value *Multiplicand = Ops[1];
6719 Value *LaneSource = Ops[2];
6720 Ops[0] = Multiplicand;
6721 Ops[1] = LaneSource;
6722 Ops[2] = Addend;
6723
6724 // Now adjust things to handle the lane access.
6725 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6726 ? llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6727 NumElts: VTy->getNumElements() / 2)
6728 : VTy;
6729 llvm::Constant *cst = cast<Constant>(Val: Ops[3]);
6730 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(), Elt: cst);
6731 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SourceTy);
6732 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV, Name: "lane");
6733
6734 Ops.pop_back();
6735 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6736 : Intrinsic::fma;
6737 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fmla");
6738 }
6739 case NEON::BI__builtin_neon_vfma_laneq_v: {
6740 auto *VTy = cast<llvm::FixedVectorType>(Val: Ty);
6741 // v1f64 fma should be mapped to Neon scalar f64 fma
6742 if (VTy && VTy->getElementType() == DoubleTy) {
6743 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6744 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6745 llvm::FixedVectorType *VTy =
6746 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6747 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: VTy);
6748 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6749 Value *Result;
6750 Result = emitCallMaybeConstrainedFPBuiltin(
6751 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma,
6752 Ty: DoubleTy, Args: {Ops[1], Ops[2], Ops[0]});
6753 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6754 }
6755 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6756 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6757
6758 auto *STy = llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6759 NumElts: VTy->getNumElements() * 2);
6760 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: STy);
6761 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(),
6762 Elt: cast<ConstantInt>(Val: Ops[3]));
6763 Ops[2] = Builder.CreateShuffleVector(V1: Ops[2], V2: Ops[2], Mask: SV, Name: "lane");
6764
6765 return emitCallMaybeConstrainedFPBuiltin(
6766 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6767 Args: {Ops[2], Ops[1], Ops[0]});
6768 }
6769 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6770 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6771 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6772
6773 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6774 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]));
6775 return emitCallMaybeConstrainedFPBuiltin(
6776 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6777 Args: {Ops[2], Ops[1], Ops[0]});
6778 }
6779 case NEON::BI__builtin_neon_vfmah_lane_f16:
6780 case NEON::BI__builtin_neon_vfmas_lane_f32:
6781 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6782 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6783 case NEON::BI__builtin_neon_vfmad_lane_f64:
6784 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6785 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
6786 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6787 return emitCallMaybeConstrainedFPBuiltin(
6788 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6789 Args: {Ops[1], Ops[2], Ops[0]});
6790 }
6791 case NEON::BI__builtin_neon_vmull_v:
6792 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6793 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6794 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6795 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
6796 case NEON::BI__builtin_neon_vmax_v:
6797 case NEON::BI__builtin_neon_vmaxq_v:
6798 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6799 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6800 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6801 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmax");
6802 case NEON::BI__builtin_neon_vmaxh_f16: {
6803 Int = Intrinsic::aarch64_neon_fmax;
6804 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmax");
6805 }
6806 case NEON::BI__builtin_neon_vmin_v:
6807 case NEON::BI__builtin_neon_vminq_v:
6808 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6809 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6810 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6811 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmin");
6812 case NEON::BI__builtin_neon_vminh_f16: {
6813 Int = Intrinsic::aarch64_neon_fmin;
6814 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmin");
6815 }
6816 case NEON::BI__builtin_neon_vabd_v:
6817 case NEON::BI__builtin_neon_vabdq_v:
6818 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6819 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6820 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6821 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vabd");
6822 case NEON::BI__builtin_neon_vpadal_v:
6823 case NEON::BI__builtin_neon_vpadalq_v: {
6824 unsigned ArgElts = VTy->getNumElements();
6825 llvm::IntegerType *EltTy = cast<IntegerType>(Val: VTy->getElementType());
6826 unsigned BitWidth = EltTy->getBitWidth();
6827 auto *ArgTy = llvm::FixedVectorType::get(
6828 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: BitWidth / 2), NumElts: 2 * ArgElts);
6829 llvm::Type* Tys[2] = { VTy, ArgTy };
6830 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6831 SmallVector<llvm::Value*, 1> TmpOps;
6832 TmpOps.push_back(Elt: Ops[1]);
6833 Function *F = CGM.getIntrinsic(IID: Int, Tys);
6834 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vpadal");
6835 llvm::Value *addend = Builder.CreateBitCast(V: Ops[0], DestTy: tmp->getType());
6836 return Builder.CreateAdd(LHS: tmp, RHS: addend);
6837 }
6838 case NEON::BI__builtin_neon_vpmin_v:
6839 case NEON::BI__builtin_neon_vpminq_v:
6840 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6841 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6842 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6843 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmin");
6844 case NEON::BI__builtin_neon_vpmax_v:
6845 case NEON::BI__builtin_neon_vpmaxq_v:
6846 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6847 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6848 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6849 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmax");
6850 case NEON::BI__builtin_neon_vminnm_v:
6851 case NEON::BI__builtin_neon_vminnmq_v:
6852 Int = Intrinsic::aarch64_neon_fminnm;
6853 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vminnm");
6854 case NEON::BI__builtin_neon_vminnmh_f16:
6855 Int = Intrinsic::aarch64_neon_fminnm;
6856 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vminnm");
6857 case NEON::BI__builtin_neon_vmaxnm_v:
6858 case NEON::BI__builtin_neon_vmaxnmq_v:
6859 Int = Intrinsic::aarch64_neon_fmaxnm;
6860 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmaxnm");
6861 case NEON::BI__builtin_neon_vmaxnmh_f16:
6862 Int = Intrinsic::aarch64_neon_fmaxnm;
6863 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmaxnm");
6864 case NEON::BI__builtin_neon_vrecpss_f32: {
6865 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: FloatTy),
6866 Ops, name: "vrecps");
6867 }
6868 case NEON::BI__builtin_neon_vrecpsd_f64:
6869 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: DoubleTy),
6870 Ops, name: "vrecps");
6871 case NEON::BI__builtin_neon_vrecpsh_f16:
6872 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: HalfTy),
6873 Ops, name: "vrecps");
6874 case NEON::BI__builtin_neon_vqshrun_n_v:
6875 Int = Intrinsic::aarch64_neon_sqshrun;
6876 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrun_n");
6877 case NEON::BI__builtin_neon_vqrshrun_n_v:
6878 Int = Intrinsic::aarch64_neon_sqrshrun;
6879 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrun_n");
6880 case NEON::BI__builtin_neon_vqshrn_n_v:
6881 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6882 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n");
6883 case NEON::BI__builtin_neon_vrshrn_n_v:
6884 Int = Intrinsic::aarch64_neon_rshrn;
6885 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshrn_n");
6886 case NEON::BI__builtin_neon_vqrshrn_n_v:
6887 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6888 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n");
6889 case NEON::BI__builtin_neon_vrndah_f16: {
6890 Int = Builder.getIsFPConstrained()
6891 ? Intrinsic::experimental_constrained_round
6892 : Intrinsic::round;
6893 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrnda");
6894 }
6895 case NEON::BI__builtin_neon_vrnda_v:
6896 case NEON::BI__builtin_neon_vrndaq_v: {
6897 Int = Builder.getIsFPConstrained()
6898 ? Intrinsic::experimental_constrained_round
6899 : Intrinsic::round;
6900 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnda");
6901 }
6902 case NEON::BI__builtin_neon_vrndih_f16: {
6903 Int = Builder.getIsFPConstrained()
6904 ? Intrinsic::experimental_constrained_nearbyint
6905 : Intrinsic::nearbyint;
6906 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndi");
6907 }
6908 case NEON::BI__builtin_neon_vrndmh_f16: {
6909 Int = Builder.getIsFPConstrained()
6910 ? Intrinsic::experimental_constrained_floor
6911 : Intrinsic::floor;
6912 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndm");
6913 }
6914 case NEON::BI__builtin_neon_vrndm_v:
6915 case NEON::BI__builtin_neon_vrndmq_v: {
6916 Int = Builder.getIsFPConstrained()
6917 ? Intrinsic::experimental_constrained_floor
6918 : Intrinsic::floor;
6919 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndm");
6920 }
6921 case NEON::BI__builtin_neon_vrndnh_f16: {
6922 Int = Builder.getIsFPConstrained()
6923 ? Intrinsic::experimental_constrained_roundeven
6924 : Intrinsic::roundeven;
6925 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndn");
6926 }
6927 case NEON::BI__builtin_neon_vrndn_v:
6928 case NEON::BI__builtin_neon_vrndnq_v: {
6929 Int = Builder.getIsFPConstrained()
6930 ? Intrinsic::experimental_constrained_roundeven
6931 : Intrinsic::roundeven;
6932 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndn");
6933 }
6934 case NEON::BI__builtin_neon_vrndns_f32: {
6935 Int = Builder.getIsFPConstrained()
6936 ? Intrinsic::experimental_constrained_roundeven
6937 : Intrinsic::roundeven;
6938 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: FloatTy), Ops, name: "vrndn");
6939 }
6940 case NEON::BI__builtin_neon_vrndph_f16: {
6941 Int = Builder.getIsFPConstrained()
6942 ? Intrinsic::experimental_constrained_ceil
6943 : Intrinsic::ceil;
6944 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndp");
6945 }
6946 case NEON::BI__builtin_neon_vrndp_v:
6947 case NEON::BI__builtin_neon_vrndpq_v: {
6948 Int = Builder.getIsFPConstrained()
6949 ? Intrinsic::experimental_constrained_ceil
6950 : Intrinsic::ceil;
6951 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndp");
6952 }
6953 case NEON::BI__builtin_neon_vrndxh_f16: {
6954 Int = Builder.getIsFPConstrained()
6955 ? Intrinsic::experimental_constrained_rint
6956 : Intrinsic::rint;
6957 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndx");
6958 }
6959 case NEON::BI__builtin_neon_vrndx_v:
6960 case NEON::BI__builtin_neon_vrndxq_v: {
6961 Int = Builder.getIsFPConstrained()
6962 ? Intrinsic::experimental_constrained_rint
6963 : Intrinsic::rint;
6964 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndx");
6965 }
6966 case NEON::BI__builtin_neon_vrndh_f16: {
6967 Int = Builder.getIsFPConstrained()
6968 ? Intrinsic::experimental_constrained_trunc
6969 : Intrinsic::trunc;
6970 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndz");
6971 }
6972 case NEON::BI__builtin_neon_vrnd32x_f32:
6973 case NEON::BI__builtin_neon_vrnd32xq_f32:
6974 case NEON::BI__builtin_neon_vrnd32x_f64:
6975 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6976 Int = Intrinsic::aarch64_neon_frint32x;
6977 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32x");
6978 }
6979 case NEON::BI__builtin_neon_vrnd32z_f32:
6980 case NEON::BI__builtin_neon_vrnd32zq_f32:
6981 case NEON::BI__builtin_neon_vrnd32z_f64:
6982 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6983 Int = Intrinsic::aarch64_neon_frint32z;
6984 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32z");
6985 }
6986 case NEON::BI__builtin_neon_vrnd64x_f32:
6987 case NEON::BI__builtin_neon_vrnd64xq_f32:
6988 case NEON::BI__builtin_neon_vrnd64x_f64:
6989 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6990 Int = Intrinsic::aarch64_neon_frint64x;
6991 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64x");
6992 }
6993 case NEON::BI__builtin_neon_vrnd64z_f32:
6994 case NEON::BI__builtin_neon_vrnd64zq_f32:
6995 case NEON::BI__builtin_neon_vrnd64z_f64:
6996 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6997 Int = Intrinsic::aarch64_neon_frint64z;
6998 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64z");
6999 }
7000 case NEON::BI__builtin_neon_vrnd_v:
7001 case NEON::BI__builtin_neon_vrndq_v: {
7002 Int = Builder.getIsFPConstrained()
7003 ? Intrinsic::experimental_constrained_trunc
7004 : Intrinsic::trunc;
7005 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndz");
7006 }
7007 case NEON::BI__builtin_neon_vcvt_f64_v:
7008 case NEON::BI__builtin_neon_vcvtq_f64_v:
7009 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7010 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
7011 return usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
7012 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
7013 case NEON::BI__builtin_neon_vcvt_f64_f32: {
7014 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
7015 "unexpected vcvt_f64_f32 builtin");
7016 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
7017 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
7018
7019 return Builder.CreateFPExt(V: Ops[0], DestTy: Ty, Name: "vcvt");
7020 }
7021 case NEON::BI__builtin_neon_vcvt_f32_f64: {
7022 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
7023 "unexpected vcvt_f32_f64 builtin");
7024 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
7025 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
7026
7027 return Builder.CreateFPTrunc(V: Ops[0], DestTy: Ty, Name: "vcvt");
7028 }
7029 case NEON::BI__builtin_neon_vcvt_s32_v:
7030 case NEON::BI__builtin_neon_vcvt_u32_v:
7031 case NEON::BI__builtin_neon_vcvt_s64_v:
7032 case NEON::BI__builtin_neon_vcvt_u64_v:
7033 case NEON::BI__builtin_neon_vcvt_s16_f16:
7034 case NEON::BI__builtin_neon_vcvt_u16_f16:
7035 case NEON::BI__builtin_neon_vcvtq_s32_v:
7036 case NEON::BI__builtin_neon_vcvtq_u32_v:
7037 case NEON::BI__builtin_neon_vcvtq_s64_v:
7038 case NEON::BI__builtin_neon_vcvtq_u64_v:
7039 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7040 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7041 Int =
7042 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7043 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type)};
7044 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtz");
7045 }
7046 case NEON::BI__builtin_neon_vcvta_s16_f16:
7047 case NEON::BI__builtin_neon_vcvta_u16_f16:
7048 case NEON::BI__builtin_neon_vcvta_s32_v:
7049 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7050 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7051 case NEON::BI__builtin_neon_vcvta_u32_v:
7052 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7053 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7054 case NEON::BI__builtin_neon_vcvta_s64_v:
7055 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7056 case NEON::BI__builtin_neon_vcvta_u64_v:
7057 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7058 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7059 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7060 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvta");
7061 }
7062 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7063 case NEON::BI__builtin_neon_vcvtm_s32_v:
7064 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7065 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7066 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7067 case NEON::BI__builtin_neon_vcvtm_u32_v:
7068 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7069 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7070 case NEON::BI__builtin_neon_vcvtm_s64_v:
7071 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7072 case NEON::BI__builtin_neon_vcvtm_u64_v:
7073 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7074 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7075 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7076 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtm");
7077 }
7078 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7079 case NEON::BI__builtin_neon_vcvtn_s32_v:
7080 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7081 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7082 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7083 case NEON::BI__builtin_neon_vcvtn_u32_v:
7084 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7085 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7086 case NEON::BI__builtin_neon_vcvtn_s64_v:
7087 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7088 case NEON::BI__builtin_neon_vcvtn_u64_v:
7089 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7090 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7091 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7092 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtn");
7093 }
7094 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7095 case NEON::BI__builtin_neon_vcvtp_s32_v:
7096 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7097 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7098 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7099 case NEON::BI__builtin_neon_vcvtp_u32_v:
7100 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7101 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7102 case NEON::BI__builtin_neon_vcvtp_s64_v:
7103 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7104 case NEON::BI__builtin_neon_vcvtp_u64_v:
7105 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7106 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7107 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7108 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtp");
7109 }
7110 case NEON::BI__builtin_neon_vmulx_v:
7111 case NEON::BI__builtin_neon_vmulxq_v: {
7112 Int = Intrinsic::aarch64_neon_fmulx;
7113 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmulx");
7114 }
7115 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7116 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7117 // vmulx_lane should be mapped to Neon scalar mulx after
7118 // extracting the scalar element
7119 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
7120 Ops.pop_back();
7121 Int = Intrinsic::aarch64_neon_fmulx;
7122 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmulx");
7123 }
7124 case NEON::BI__builtin_neon_vmul_lane_v:
7125 case NEON::BI__builtin_neon_vmul_laneq_v: {
7126 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7127 bool Quad = false;
7128 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7129 Quad = true;
7130 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7131 llvm::FixedVectorType *VTy =
7132 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
7133 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7134 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
7135 Value *Result = Builder.CreateFMul(L: Ops[0], R: Ops[1]);
7136 return Builder.CreateBitCast(V: Result, DestTy: Ty);
7137 }
7138 case NEON::BI__builtin_neon_vpmaxnm_v:
7139 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7140 Int = Intrinsic::aarch64_neon_fmaxnmp;
7141 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmaxnm");
7142 }
7143 case NEON::BI__builtin_neon_vpminnm_v:
7144 case NEON::BI__builtin_neon_vpminnmq_v: {
7145 Int = Intrinsic::aarch64_neon_fminnmp;
7146 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpminnm");
7147 }
7148 case NEON::BI__builtin_neon_vsqrth_f16: {
7149 Int = Builder.getIsFPConstrained()
7150 ? Intrinsic::experimental_constrained_sqrt
7151 : Intrinsic::sqrt;
7152 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vsqrt");
7153 }
7154 case NEON::BI__builtin_neon_vsqrt_v:
7155 case NEON::BI__builtin_neon_vsqrtq_v: {
7156 Int = Builder.getIsFPConstrained()
7157 ? Intrinsic::experimental_constrained_sqrt
7158 : Intrinsic::sqrt;
7159 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7160 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqrt");
7161 }
7162 case NEON::BI__builtin_neon_vrbit_v:
7163 case NEON::BI__builtin_neon_vrbitq_v: {
7164 Int = Intrinsic::bitreverse;
7165 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrbit");
7166 }
7167 case NEON::BI__builtin_neon_vmaxv_f16: {
7168 Int = Intrinsic::aarch64_neon_fmaxv;
7169 Ty = HalfTy;
7170 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7171 llvm::Type *Tys[2] = {Ty, VTy};
7172 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7173 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7174 }
7175 case NEON::BI__builtin_neon_vmaxvq_f16: {
7176 Int = Intrinsic::aarch64_neon_fmaxv;
7177 Ty = HalfTy;
7178 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7179 llvm::Type *Tys[2] = {Ty, VTy};
7180 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7181 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7182 }
7183 case NEON::BI__builtin_neon_vminv_f16: {
7184 Int = Intrinsic::aarch64_neon_fminv;
7185 Ty = HalfTy;
7186 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7187 llvm::Type *Tys[2] = {Ty, VTy};
7188 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7189 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7190 }
7191 case NEON::BI__builtin_neon_vminvq_f16: {
7192 Int = Intrinsic::aarch64_neon_fminv;
7193 Ty = HalfTy;
7194 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7195 llvm::Type *Tys[2] = {Ty, VTy};
7196 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7197 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7198 }
7199 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7200 Int = Intrinsic::aarch64_neon_fmaxnmv;
7201 Ty = HalfTy;
7202 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7203 llvm::Type *Tys[2] = {Ty, VTy};
7204 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7205 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7206 }
7207 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7208 Int = Intrinsic::aarch64_neon_fmaxnmv;
7209 Ty = HalfTy;
7210 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7211 llvm::Type *Tys[2] = {Ty, VTy};
7212 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7213 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7214 }
7215 case NEON::BI__builtin_neon_vminnmv_f16: {
7216 Int = Intrinsic::aarch64_neon_fminnmv;
7217 Ty = HalfTy;
7218 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7219 llvm::Type *Tys[2] = {Ty, VTy};
7220 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7221 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7222 }
7223 case NEON::BI__builtin_neon_vminnmvq_f16: {
7224 Int = Intrinsic::aarch64_neon_fminnmv;
7225 Ty = HalfTy;
7226 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7227 llvm::Type *Tys[2] = {Ty, VTy};
7228 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7229 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7230 }
7231 case NEON::BI__builtin_neon_vmul_n_f64: {
7232 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7233 Value *RHS = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
7234 return Builder.CreateFMul(L: Ops[0], R: RHS);
7235 }
7236 case NEON::BI__builtin_neon_vaddlv_u8: {
7237 Int = Intrinsic::aarch64_neon_uaddlv;
7238 Ty = Int32Ty;
7239 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7240 llvm::Type *Tys[2] = {Ty, VTy};
7241 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7242 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7243 }
7244 case NEON::BI__builtin_neon_vaddlv_u16: {
7245 Int = Intrinsic::aarch64_neon_uaddlv;
7246 Ty = Int32Ty;
7247 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7248 llvm::Type *Tys[2] = {Ty, VTy};
7249 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7250 }
7251 case NEON::BI__builtin_neon_vaddlvq_u8: {
7252 Int = Intrinsic::aarch64_neon_uaddlv;
7253 Ty = Int32Ty;
7254 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7255 llvm::Type *Tys[2] = {Ty, VTy};
7256 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7257 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7258 }
7259 case NEON::BI__builtin_neon_vaddlvq_u16: {
7260 Int = Intrinsic::aarch64_neon_uaddlv;
7261 Ty = Int32Ty;
7262 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7263 llvm::Type *Tys[2] = {Ty, VTy};
7264 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7265 }
7266 case NEON::BI__builtin_neon_vaddlv_s8: {
7267 Int = Intrinsic::aarch64_neon_saddlv;
7268 Ty = Int32Ty;
7269 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7270 llvm::Type *Tys[2] = {Ty, VTy};
7271 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7272 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7273 }
7274 case NEON::BI__builtin_neon_vaddlv_s16: {
7275 Int = Intrinsic::aarch64_neon_saddlv;
7276 Ty = Int32Ty;
7277 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7278 llvm::Type *Tys[2] = {Ty, VTy};
7279 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7280 }
7281 case NEON::BI__builtin_neon_vaddlvq_s8: {
7282 Int = Intrinsic::aarch64_neon_saddlv;
7283 Ty = Int32Ty;
7284 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7285 llvm::Type *Tys[2] = {Ty, VTy};
7286 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7287 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7288 }
7289 case NEON::BI__builtin_neon_vaddlvq_s16: {
7290 Int = Intrinsic::aarch64_neon_saddlv;
7291 Ty = Int32Ty;
7292 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7293 llvm::Type *Tys[2] = {Ty, VTy};
7294 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7295 }
7296 case NEON::BI__builtin_neon_vsri_n_v:
7297 case NEON::BI__builtin_neon_vsriq_n_v: {
7298 Int = Intrinsic::aarch64_neon_vsri;
7299 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7300 return EmitNeonCall(F: Intrin, Ops, name: "vsri_n");
7301 }
7302 case NEON::BI__builtin_neon_vsli_n_v:
7303 case NEON::BI__builtin_neon_vsliq_n_v: {
7304 Int = Intrinsic::aarch64_neon_vsli;
7305 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7306 return EmitNeonCall(F: Intrin, Ops, name: "vsli_n");
7307 }
7308 case NEON::BI__builtin_neon_vsra_n_v:
7309 case NEON::BI__builtin_neon_vsraq_n_v:
7310 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7311 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
7312 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
7313 case NEON::BI__builtin_neon_vrsra_n_v:
7314 case NEON::BI__builtin_neon_vrsraq_n_v: {
7315 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7316 SmallVector<llvm::Value*,2> TmpOps;
7317 TmpOps.push_back(Elt: Ops[1]);
7318 TmpOps.push_back(Elt: Ops[2]);
7319 Function* F = CGM.getIntrinsic(IID: Int, Tys: Ty);
7320 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vrshr_n", shift: 1, rightshift: true);
7321 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
7322 return Builder.CreateAdd(LHS: Ops[0], RHS: tmp);
7323 }
7324 case NEON::BI__builtin_neon_vld1_v:
7325 case NEON::BI__builtin_neon_vld1q_v: {
7326 return Builder.CreateAlignedLoad(Ty: VTy, Addr: Ops[0], Align: PtrOp0.getAlignment());
7327 }
7328 case NEON::BI__builtin_neon_vst1_v:
7329 case NEON::BI__builtin_neon_vst1q_v:
7330 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7331 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7332 case NEON::BI__builtin_neon_vld1_lane_v:
7333 case NEON::BI__builtin_neon_vld1q_lane_v: {
7334 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7335 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7336 Align: PtrOp0.getAlignment());
7337 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vld1_lane");
7338 }
7339 case NEON::BI__builtin_neon_vldap1_lane_s64:
7340 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7341 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7342 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7343 Ty: VTy->getElementType(), Addr: Ops[0], Align: PtrOp0.getAlignment());
7344 LI->setAtomic(Ordering: llvm::AtomicOrdering::Acquire);
7345 Ops[0] = LI;
7346 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vldap1_lane");
7347 }
7348 case NEON::BI__builtin_neon_vld1_dup_v:
7349 case NEON::BI__builtin_neon_vld1q_dup_v: {
7350 Value *V = PoisonValue::get(T: Ty);
7351 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7352 Align: PtrOp0.getAlignment());
7353 llvm::Constant *CI = ConstantInt::get(Ty: Int32Ty, V: 0);
7354 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ops[0], Idx: CI);
7355 return EmitNeonSplat(V: Ops[0], C: CI);
7356 }
7357 case NEON::BI__builtin_neon_vst1_lane_v:
7358 case NEON::BI__builtin_neon_vst1q_lane_v:
7359 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7360 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7361 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7362 case NEON::BI__builtin_neon_vstl1_lane_s64:
7363 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7364 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7365 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7366 llvm::StoreInst *SI =
7367 Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7368 SI->setAtomic(Ordering: llvm::AtomicOrdering::Release);
7369 return SI;
7370 }
7371 case NEON::BI__builtin_neon_vld2_v:
7372 case NEON::BI__builtin_neon_vld2q_v: {
7373 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7374 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2, Tys);
7375 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7376 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7377 }
7378 case NEON::BI__builtin_neon_vld3_v:
7379 case NEON::BI__builtin_neon_vld3q_v: {
7380 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7381 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3, Tys);
7382 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7383 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7384 }
7385 case NEON::BI__builtin_neon_vld4_v:
7386 case NEON::BI__builtin_neon_vld4q_v: {
7387 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7388 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4, Tys);
7389 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7390 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7391 }
7392 case NEON::BI__builtin_neon_vld2_dup_v:
7393 case NEON::BI__builtin_neon_vld2q_dup_v: {
7394 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7395 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2r, Tys);
7396 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7397 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7398 }
7399 case NEON::BI__builtin_neon_vld3_dup_v:
7400 case NEON::BI__builtin_neon_vld3q_dup_v: {
7401 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7402 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3r, Tys);
7403 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7404 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7405 }
7406 case NEON::BI__builtin_neon_vld4_dup_v:
7407 case NEON::BI__builtin_neon_vld4q_dup_v: {
7408 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7409 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4r, Tys);
7410 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7411 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7412 }
7413 case NEON::BI__builtin_neon_vld2_lane_v:
7414 case NEON::BI__builtin_neon_vld2q_lane_v: {
7415 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7416 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2lane, Tys);
7417 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7418 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7419 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7420 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7421 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld2_lane");
7422 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7423 }
7424 case NEON::BI__builtin_neon_vld3_lane_v:
7425 case NEON::BI__builtin_neon_vld3q_lane_v: {
7426 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7427 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3lane, Tys);
7428 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7429 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7430 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7431 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7432 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7433 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld3_lane");
7434 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7435 }
7436 case NEON::BI__builtin_neon_vld4_lane_v:
7437 case NEON::BI__builtin_neon_vld4q_lane_v: {
7438 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7439 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4lane, Tys);
7440 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7441 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7442 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7443 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7444 Ops[4] = Builder.CreateBitCast(V: Ops[4], DestTy: Ty);
7445 Ops[5] = Builder.CreateZExt(V: Ops[5], DestTy: Int64Ty);
7446 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld4_lane");
7447 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7448 }
7449 case NEON::BI__builtin_neon_vst2_v:
7450 case NEON::BI__builtin_neon_vst2q_v: {
7451 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7452 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7453 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2, Tys),
7454 Ops, name: "");
7455 }
7456 case NEON::BI__builtin_neon_vst2_lane_v:
7457 case NEON::BI__builtin_neon_vst2q_lane_v: {
7458 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7459 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
7460 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7461 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2lane, Tys),
7462 Ops, name: "");
7463 }
7464 case NEON::BI__builtin_neon_vst3_v:
7465 case NEON::BI__builtin_neon_vst3q_v: {
7466 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7467 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7468 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3, Tys),
7469 Ops, name: "");
7470 }
7471 case NEON::BI__builtin_neon_vst3_lane_v:
7472 case NEON::BI__builtin_neon_vst3q_lane_v: {
7473 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7474 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7475 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7476 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3lane, Tys),
7477 Ops, name: "");
7478 }
7479 case NEON::BI__builtin_neon_vst4_v:
7480 case NEON::BI__builtin_neon_vst4q_v: {
7481 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7482 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7483 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4, Tys),
7484 Ops, name: "");
7485 }
7486 case NEON::BI__builtin_neon_vst4_lane_v:
7487 case NEON::BI__builtin_neon_vst4q_lane_v: {
7488 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7489 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7490 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7491 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4lane, Tys),
7492 Ops, name: "");
7493 }
7494 case NEON::BI__builtin_neon_vtrn_v:
7495 case NEON::BI__builtin_neon_vtrnq_v: {
7496 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7497 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7498 Value *SV = nullptr;
7499
7500 for (unsigned vi = 0; vi != 2; ++vi) {
7501 SmallVector<int, 16> Indices;
7502 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7503 Indices.push_back(Elt: i+vi);
7504 Indices.push_back(Elt: i+e+vi);
7505 }
7506 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7507 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
7508 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7509 }
7510 return SV;
7511 }
7512 case NEON::BI__builtin_neon_vuzp_v:
7513 case NEON::BI__builtin_neon_vuzpq_v: {
7514 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7515 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7516 Value *SV = nullptr;
7517
7518 for (unsigned vi = 0; vi != 2; ++vi) {
7519 SmallVector<int, 16> Indices;
7520 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7521 Indices.push_back(Elt: 2*i+vi);
7522
7523 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7524 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
7525 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7526 }
7527 return SV;
7528 }
7529 case NEON::BI__builtin_neon_vzip_v:
7530 case NEON::BI__builtin_neon_vzipq_v: {
7531 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7532 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7533 Value *SV = nullptr;
7534
7535 for (unsigned vi = 0; vi != 2; ++vi) {
7536 SmallVector<int, 16> Indices;
7537 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7538 Indices.push_back(Elt: (i + vi*e) >> 1);
7539 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
7540 }
7541 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7542 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
7543 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7544 }
7545 return SV;
7546 }
7547 case NEON::BI__builtin_neon_vqtbl1q_v: {
7548 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl1, Tys: Ty),
7549 Ops, name: "vtbl1");
7550 }
7551 case NEON::BI__builtin_neon_vqtbl2q_v: {
7552 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl2, Tys: Ty),
7553 Ops, name: "vtbl2");
7554 }
7555 case NEON::BI__builtin_neon_vqtbl3q_v: {
7556 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl3, Tys: Ty),
7557 Ops, name: "vtbl3");
7558 }
7559 case NEON::BI__builtin_neon_vqtbl4q_v: {
7560 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl4, Tys: Ty),
7561 Ops, name: "vtbl4");
7562 }
7563 case NEON::BI__builtin_neon_vqtbx1q_v: {
7564 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx1, Tys: Ty),
7565 Ops, name: "vtbx1");
7566 }
7567 case NEON::BI__builtin_neon_vqtbx2q_v: {
7568 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx2, Tys: Ty),
7569 Ops, name: "vtbx2");
7570 }
7571 case NEON::BI__builtin_neon_vqtbx3q_v: {
7572 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx3, Tys: Ty),
7573 Ops, name: "vtbx3");
7574 }
7575 case NEON::BI__builtin_neon_vqtbx4q_v: {
7576 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx4, Tys: Ty),
7577 Ops, name: "vtbx4");
7578 }
7579 case NEON::BI__builtin_neon_vsqadd_v:
7580 case NEON::BI__builtin_neon_vsqaddq_v: {
7581 Int = Intrinsic::aarch64_neon_usqadd;
7582 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqadd");
7583 }
7584 case NEON::BI__builtin_neon_vuqadd_v:
7585 case NEON::BI__builtin_neon_vuqaddq_v: {
7586 Int = Intrinsic::aarch64_neon_suqadd;
7587 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vuqadd");
7588 }
7589
7590 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7591 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7592 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7593 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7594 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7595 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7596 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7597 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7598 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7599 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7600 llvm::Type *Tys[2];
7601 Tys[0] = Ty;
7602 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7603 /*isQuad*/ false));
7604 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7605 }
7606 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7607 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7608 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7609 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7610 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7611 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7612 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7613 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7614 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7615 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7616 llvm::Type *Tys[2];
7617 Tys[0] = Ty;
7618 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7619 /*isQuad*/ true));
7620 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7621 }
7622 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7623 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7624 case NEON::BI__builtin_neon_vluti2_lane_f16:
7625 case NEON::BI__builtin_neon_vluti2_lane_p16:
7626 case NEON::BI__builtin_neon_vluti2_lane_p8:
7627 case NEON::BI__builtin_neon_vluti2_lane_s16:
7628 case NEON::BI__builtin_neon_vluti2_lane_s8:
7629 case NEON::BI__builtin_neon_vluti2_lane_u16:
7630 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7631 Int = Intrinsic::aarch64_neon_vluti2_lane;
7632 llvm::Type *Tys[2];
7633 Tys[0] = Ty;
7634 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7635 /*isQuad*/ false));
7636 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7637 }
7638 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7639 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7640 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7641 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7642 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7643 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7644 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7645 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7646 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7647 Int = Intrinsic::aarch64_neon_vluti2_lane;
7648 llvm::Type *Tys[2];
7649 Tys[0] = Ty;
7650 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7651 /*isQuad*/ true));
7652 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7653 }
7654 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7655 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7656 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7657 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7658 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7659 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane");
7660 }
7661 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7662 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7663 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7664 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7665 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7666 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq");
7667 }
7668 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7669 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7670 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7671 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7672 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7673 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7674 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane_x2");
7675 }
7676 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7677 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7678 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7679 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7680 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7681 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7682 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq_x2");
7683 }
7684 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7685 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fmmla,
7686 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7687 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16)},
7688 Ops, E, name: "fmmla");
7689 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7690 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fmmla,
7691 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4),
7692 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16)},
7693 Ops, E, name: "fmmla");
7694 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7695 ExtractLow = true;
7696 [[fallthrough]];
7697 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7698 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7699 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
7700 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
7701 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
7702 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7703 ExtractLow = true;
7704 [[fallthrough]];
7705 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7706 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7707 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
7708 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
7709 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
7710 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7711 ExtractLow = true;
7712 [[fallthrough]];
7713 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7714 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7715 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
7716 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7717 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
7718 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7719 ExtractLow = true;
7720 [[fallthrough]];
7721 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7722 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7723 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
7724 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7725 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
7726 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7727 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7728 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
7729 Ty1: Ops[0]->getType(), Extract: false, Ops, E, name: "vfcvtn");
7730 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7731 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7732 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
7733 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4), Extract: false, Ops,
7734 E, name: "vfcvtn");
7735 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7736 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7737 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16),
7738 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8), Extract: false, Ops,
7739 E, name: "vfcvtn");
7740 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7741 llvm::Type *Ty = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7742 Ops[0] = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
7743 Idx: uint64_t(0));
7744 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn2, Ty0: Ty,
7745 Ty1: Ops[1]->getType(), Extract: false, Ops, E, name: "vfcvtn2");
7746 }
7747
7748 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7749 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7750 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2, ExtendLaneArg: false, RetTy: HalfTy,
7751 Ops, E, name: "fdot2");
7752 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7753 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7754 ExtendLaneArg = true;
7755 [[fallthrough]];
7756 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7757 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7758 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2_lane,
7759 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "fdot2_lane");
7760 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7761 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7762 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4, ExtendLaneArg: false,
7763 RetTy: FloatTy, Ops, E, name: "fdot4");
7764 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7765 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7766 ExtendLaneArg = true;
7767 [[fallthrough]];
7768 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7769 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7770 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4_lane,
7771 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "fdot4_lane");
7772
7773 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7774 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalb,
7775 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7776 name: "vmlal");
7777 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7778 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalt,
7779 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7780 name: "vmlal");
7781 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7782 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb,
7783 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7784 name: "vmlall");
7785 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7786 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt,
7787 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7788 name: "vmlall");
7789 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7790 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb,
7791 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7792 name: "vmlall");
7793 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7794 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt,
7795 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7796 name: "vmlall");
7797 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7798 ExtendLaneArg = true;
7799 [[fallthrough]];
7800 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7801 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7802 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7803 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7804 ExtendLaneArg = true;
7805 [[fallthrough]];
7806 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7807 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7808 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7809 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7810 ExtendLaneArg = true;
7811 [[fallthrough]];
7812 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7813 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7814 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7815 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7816 ExtendLaneArg = true;
7817 [[fallthrough]];
7818 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7819 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7820 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7821 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7822 ExtendLaneArg = true;
7823 [[fallthrough]];
7824 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7825 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7826 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7827 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7828 ExtendLaneArg = true;
7829 [[fallthrough]];
7830 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7831 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7832 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7833 case NEON::BI__builtin_neon_vamin_f16:
7834 case NEON::BI__builtin_neon_vaminq_f16:
7835 case NEON::BI__builtin_neon_vamin_f32:
7836 case NEON::BI__builtin_neon_vaminq_f32:
7837 case NEON::BI__builtin_neon_vaminq_f64: {
7838 Int = Intrinsic::aarch64_neon_famin;
7839 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famin");
7840 }
7841 case NEON::BI__builtin_neon_vamax_f16:
7842 case NEON::BI__builtin_neon_vamaxq_f16:
7843 case NEON::BI__builtin_neon_vamax_f32:
7844 case NEON::BI__builtin_neon_vamaxq_f32:
7845 case NEON::BI__builtin_neon_vamaxq_f64: {
7846 Int = Intrinsic::aarch64_neon_famax;
7847 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famax");
7848 }
7849 case NEON::BI__builtin_neon_vscale_f16:
7850 case NEON::BI__builtin_neon_vscaleq_f16:
7851 case NEON::BI__builtin_neon_vscale_f32:
7852 case NEON::BI__builtin_neon_vscaleq_f32:
7853 case NEON::BI__builtin_neon_vscaleq_f64: {
7854 Int = Intrinsic::aarch64_neon_fp8_fscale;
7855 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fscale");
7856 }
7857 }
7858}
7859
7860Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
7861 const CallExpr *E) {
7862 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7863 BuiltinID == BPF::BI__builtin_btf_type_id ||
7864 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7865 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7866 "unexpected BPF builtin");
7867
7868 // A sequence number, injected into IR builtin functions, to
7869 // prevent CSE given the only difference of the function
7870 // may just be the debuginfo metadata.
7871 static uint32_t BuiltinSeqNum;
7872
7873 switch (BuiltinID) {
7874 default:
7875 llvm_unreachable("Unexpected BPF builtin");
7876 case BPF::BI__builtin_preserve_field_info: {
7877 const Expr *Arg = E->getArg(Arg: 0);
7878 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7879
7880 if (!getDebugInfo()) {
7881 CGM.Error(loc: E->getExprLoc(),
7882 error: "using __builtin_preserve_field_info() without -g");
7883 return IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7884 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7885 }
7886
7887 // Enable underlying preserve_*_access_index() generation.
7888 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7889 IsInPreservedAIRegion = true;
7890 Value *FieldAddr = IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7891 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7892 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7893
7894 ConstantInt *C = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7895 Value *InfoKind = ConstantInt::get(Ty: Int64Ty, V: C->getSExtValue());
7896
7897 // Built the IR for the preserve_field_info intrinsic.
7898 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7899 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_field_info,
7900 Tys: {FieldAddr->getType()});
7901 return Builder.CreateCall(Callee: FnGetFieldInfo, Args: {FieldAddr, InfoKind});
7902 }
7903 case BPF::BI__builtin_btf_type_id:
7904 case BPF::BI__builtin_preserve_type_info: {
7905 if (!getDebugInfo()) {
7906 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7907 return nullptr;
7908 }
7909
7910 const Expr *Arg0 = E->getArg(Arg: 0);
7911 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7912 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7913
7914 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7915 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7916 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7917
7918 llvm::Function *FnDecl;
7919 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7920 FnDecl = Intrinsic::getOrInsertDeclaration(
7921 M: &CGM.getModule(), id: Intrinsic::bpf_btf_type_id, Tys: {});
7922 else
7923 FnDecl = Intrinsic::getOrInsertDeclaration(
7924 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_type_info, Tys: {});
7925 CallInst *Fn = Builder.CreateCall(Callee: FnDecl, Args: {SeqNumVal, FlagValue});
7926 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7927 return Fn;
7928 }
7929 case BPF::BI__builtin_preserve_enum_value: {
7930 if (!getDebugInfo()) {
7931 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7932 return nullptr;
7933 }
7934
7935 const Expr *Arg0 = E->getArg(Arg: 0);
7936 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7937 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7938
7939 // Find enumerator
7940 const auto *UO = cast<UnaryOperator>(Val: Arg0->IgnoreParens());
7941 const auto *CE = cast<CStyleCastExpr>(Val: UO->getSubExpr());
7942 const auto *DR = cast<DeclRefExpr>(Val: CE->getSubExpr());
7943 const auto *Enumerator = cast<EnumConstantDecl>(Val: DR->getDecl());
7944
7945 auto InitVal = Enumerator->getInitVal();
7946 std::string InitValStr;
7947 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7948 InitValStr = std::to_string(val: InitVal.getSExtValue());
7949 else
7950 InitValStr = std::to_string(val: InitVal.getZExtValue());
7951 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7952 Value *EnumStrVal = Builder.CreateGlobalString(Str: EnumStr);
7953
7954 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7955 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7956 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7957
7958 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7959 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_enum_value, Tys: {});
7960 CallInst *Fn =
7961 Builder.CreateCall(Callee: IntrinsicFn, Args: {SeqNumVal, EnumStrVal, FlagValue});
7962 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7963 return Fn;
7964 }
7965 }
7966}
7967
7968llvm::Value *CodeGenFunction::
7969BuildVector(ArrayRef<llvm::Value*> Ops) {
7970 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7971 "Not a power-of-two sized vector!");
7972 bool AllConstants = true;
7973 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7974 AllConstants &= isa<Constant>(Val: Ops[i]);
7975
7976 // If this is a constant vector, create a ConstantVector.
7977 if (AllConstants) {
7978 SmallVector<llvm::Constant*, 16> CstOps;
7979 for (llvm::Value *Op : Ops)
7980 CstOps.push_back(Elt: cast<Constant>(Val: Op));
7981 return llvm::ConstantVector::get(V: CstOps);
7982 }
7983
7984 // Otherwise, insertelement the values to build the vector.
7985 Value *Result = llvm::PoisonValue::get(
7986 T: llvm::FixedVectorType::get(ElementType: Ops[0]->getType(), NumElts: Ops.size()));
7987
7988 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7989 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Ops[i], Idx: Builder.getInt64(C: i));
7990
7991 return Result;
7992}
7993
7994Value *CodeGenFunction::EmitAArch64CpuInit() {
7995 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
7996 llvm::FunctionCallee Func =
7997 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__init_cpu_features_resolver");
7998 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
7999 cast<llvm::GlobalValue>(Val: Func.getCallee())
8000 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8001 return Builder.CreateCall(Callee: Func);
8002}
8003
8004Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8005 const Expr *ArgExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
8006 StringRef ArgStr = cast<StringLiteral>(Val: ArgExpr)->getString();
8007 llvm::SmallVector<StringRef, 8> Features;
8008 ArgStr.split(A&: Features, Separator: "+");
8009 for (auto &Feature : Features) {
8010 Feature = Feature.trim();
8011 if (!llvm::AArch64::parseFMVExtension(Extension: Feature))
8012 return Builder.getFalse();
8013 if (Feature != "default")
8014 Features.push_back(Elt: Feature);
8015 }
8016 return EmitAArch64CpuSupports(FeatureStrs: Features);
8017}
8018
8019llvm::Value *
8020CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8021 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(Features: FeaturesStrs);
8022 Value *Result = Builder.getTrue();
8023 if (FeaturesMask != 0) {
8024 // Get features from structure in runtime library
8025 // struct {
8026 // unsigned long long features;
8027 // } __aarch64_cpu_features;
8028 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty);
8029 llvm::Constant *AArch64CPUFeatures =
8030 CGM.CreateRuntimeVariable(Ty: STy, Name: "__aarch64_cpu_features");
8031 cast<llvm::GlobalValue>(Val: AArch64CPUFeatures)->setDSOLocal(true);
8032 llvm::Value *CpuFeatures = Builder.CreateGEP(
8033 Ty: STy, Ptr: AArch64CPUFeatures,
8034 IdxList: {ConstantInt::get(Ty: Int32Ty, V: 0), ConstantInt::get(Ty: Int32Ty, V: 0)});
8035 Value *Features = Builder.CreateAlignedLoad(Ty: Int64Ty, Addr: CpuFeatures,
8036 Align: CharUnits::fromQuantity(Quantity: 8));
8037 Value *Mask = Builder.getInt(AI: FeaturesMask.trunc(width: 64));
8038 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
8039 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
8040 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
8041 }
8042 return Result;
8043}
8044