1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
17#include "clang/Basic/TargetBuiltins.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
342static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args);
355 else
356 return CGF.Builder.CreateCall(Callee: F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasLegalHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
366 case NeonTypeFlags::Int8:
367 case NeonTypeFlags::Poly8:
368 case NeonTypeFlags::MFloat8:
369 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: V1Ty ? 1 : (8 << IsQuad));
370 case NeonTypeFlags::Int16:
371 case NeonTypeFlags::Poly16:
372 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
373 case NeonTypeFlags::BFloat16:
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(ElementType: CGF->BFloatTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
376 else
377 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
378 case NeonTypeFlags::Float16:
379 if (HasLegalHalfType)
380 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
381 else
382 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
383 case NeonTypeFlags::Int32:
384 return llvm::FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: V1Ty ? 1 : (2 << IsQuad));
385 case NeonTypeFlags::Int64:
386 case NeonTypeFlags::Poly64:
387 return llvm::FixedVectorType::get(ElementType: CGF->Int64Ty, NumElts: V1Ty ? 1 : (1 << IsQuad));
388 case NeonTypeFlags::Poly128:
389 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
390 // There is a lot of i128 and f128 API missing.
391 // so we use v16i8 to represent poly128 and get pattern matched.
392 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: 16);
393 case NeonTypeFlags::Float32:
394 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: V1Ty ? 1 : (2 << IsQuad));
395 case NeonTypeFlags::Float64:
396 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: V1Ty ? 1 : (1 << IsQuad));
397 }
398 llvm_unreachable("Unknown vector element type!");
399}
400
401static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
402 NeonTypeFlags IntTypeFlags) {
403 int IsQuad = IntTypeFlags.isQuad();
404 switch (IntTypeFlags.getEltType()) {
405 case NeonTypeFlags::Int16:
406 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: (4 << IsQuad));
407 case NeonTypeFlags::Int32:
408 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: (2 << IsQuad));
409 case NeonTypeFlags::Int64:
410 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: (1 << IsQuad));
411 default:
412 llvm_unreachable("Type can't be converted to floating-point!");
413 }
414}
415
416Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
417 const ElementCount &Count) {
418 Value *SV = llvm::ConstantVector::getSplat(EC: Count, Elt: C);
419 return Builder.CreateShuffleVector(V1: V, V2: V, Mask: SV, Name: "lane");
420}
421
422Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
423 ElementCount EC = cast<llvm::VectorType>(Val: V->getType())->getElementCount();
424 return EmitNeonSplat(V, C, Count: EC);
425}
426
427Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
428 const char *name,
429 unsigned shift, bool rightshift) {
430 unsigned j = 0;
431 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
432 ai != ae; ++ai, ++j) {
433 if (F->isConstrainedFPIntrinsic())
434 if (ai->getType()->isMetadataTy())
435 continue;
436 if (shift > 0 && shift == j)
437 Ops[j] = EmitNeonShiftVector(V: Ops[j], Ty: ai->getType(), negateForRightShift: rightshift);
438 else
439 Ops[j] = Builder.CreateBitCast(V: Ops[j], DestTy: ai->getType(), Name: name);
440 }
441
442 if (F->isConstrainedFPIntrinsic())
443 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops, Name: name);
444 else
445 return Builder.CreateCall(Callee: F, Args: Ops, Name: name);
446}
447
448Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
449 ArrayRef<llvm::Type *> Tys,
450 SmallVectorImpl<Value *> &Ops,
451 const CallExpr *E, const char *name) {
452 llvm::Value *FPM =
453 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, Idx: E->getNumArgs() - 1, E);
454 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr), Args: FPM);
455 return EmitNeonCall(F: CGM.getIntrinsic(IID, Tys), Ops, name);
456}
457
458llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
459 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
460 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
461
462 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
463 RetTy->getPrimitiveSizeInBits();
464 llvm::Type *Tys[] = {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount),
465 Ops[1]->getType()};
466 if (ExtendLaneArg) {
467 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
468 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
469 Idx: uint64_t(0));
470 }
471 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
472}
473
474llvm::Value *CodeGenFunction::EmitFP8NeonFMLACall(
475 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
476 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
477
478 if (ExtendLaneArg) {
479 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
480 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
481 Idx: uint64_t(0));
482 }
483 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
484 RetTy->getPrimitiveSizeInBits();
485 return EmitFP8NeonCall(IID, Tys: {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount)},
486 Ops, E, name);
487}
488
489Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
490 bool neg) {
491 int SV = cast<ConstantInt>(Val: V)->getSExtValue();
492 return ConstantInt::get(Ty, V: neg ? -SV : SV);
493}
494
495Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
496 llvm::Type *Ty1, bool Extract,
497 SmallVectorImpl<llvm::Value *> &Ops,
498 const CallExpr *E,
499 const char *name) {
500 llvm::Type *Tys[] = {Ty0, Ty1};
501 if (Extract) {
502 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
503 // the vector.
504 Tys[1] = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
505 Ops[0] = Builder.CreateExtractVector(DstType: Tys[1], SrcVec: Ops[0], Idx: uint64_t(0));
506 }
507 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
508}
509
510// Right-shift a vector by a constant.
511Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
512 llvm::Type *Ty, bool usgn,
513 const char *name) {
514 llvm::VectorType *VTy = cast<llvm::VectorType>(Val: Ty);
515
516 int ShiftAmt = cast<ConstantInt>(Val: Shift)->getSExtValue();
517 int EltSize = VTy->getScalarSizeInBits();
518
519 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty);
520
521 // lshr/ashr are undefined when the shift amount is equal to the vector
522 // element size.
523 if (ShiftAmt == EltSize) {
524 if (usgn) {
525 // Right-shifting an unsigned value by its size yields 0.
526 return llvm::ConstantAggregateZero::get(Ty: VTy);
527 } else {
528 // Right-shifting a signed value by its size is equivalent
529 // to a shift of size-1.
530 --ShiftAmt;
531 Shift = ConstantInt::get(Ty: VTy->getElementType(), V: ShiftAmt);
532 }
533 }
534
535 Shift = EmitNeonShiftVector(V: Shift, Ty, neg: false);
536 if (usgn)
537 return Builder.CreateLShr(LHS: Vec, RHS: Shift, Name: name);
538 else
539 return Builder.CreateAShr(LHS: Vec, RHS: Shift, Name: name);
540}
541
542enum {
543 AddRetType = (1 << 0),
544 Add1ArgType = (1 << 1),
545 Add2ArgTypes = (1 << 2),
546
547 VectorizeRetType = (1 << 3),
548 VectorizeArgTypes = (1 << 4),
549
550 InventFloatType = (1 << 5),
551 UnsignedAlts = (1 << 6),
552
553 Use64BitVectors = (1 << 7),
554 Use128BitVectors = (1 << 8),
555
556 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
557 VectorRet = AddRetType | VectorizeRetType,
558 VectorRetGetArgs01 =
559 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
560 FpCmpzModifiers =
561 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
562};
563
564namespace {
565struct ARMVectorIntrinsicInfo {
566 const char *NameHint;
567 unsigned BuiltinID;
568 unsigned LLVMIntrinsic;
569 unsigned AltLLVMIntrinsic;
570 uint64_t TypeModifier;
571
572 bool operator<(unsigned RHSBuiltinID) const {
573 return BuiltinID < RHSBuiltinID;
574 }
575 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
576 return BuiltinID < TE.BuiltinID;
577 }
578};
579} // end anonymous namespace
580
581#define NEONMAP0(NameBase) \
582 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
583
584#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
585 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
586 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
587
588#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
589 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
590 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591 TypeModifier }
592
593static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
594 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
595 NEONMAP0(splat_lane_v),
596 NEONMAP0(splat_laneq_v),
597 NEONMAP0(splatq_lane_v),
598 NEONMAP0(splatq_laneq_v),
599 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
600 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
601 NEONMAP1(vabs_v, arm_neon_vabs, 0),
602 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
603 NEONMAP0(vadd_v),
604 NEONMAP0(vaddhn_v),
605 NEONMAP0(vaddq_v),
606 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
607 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
608 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
609 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
610 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
611 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
612 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
613 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
614 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
615 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
616 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
617 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
618 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
620 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
622 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
623 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
624 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
625 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
626 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
627 NEONMAP1(vcage_v, arm_neon_vacge, 0),
628 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
631 NEONMAP1(vcale_v, arm_neon_vacge, 0),
632 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
633 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
634 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
635 NEONMAP0(vceqz_v),
636 NEONMAP0(vceqzq_v),
637 NEONMAP0(vcgez_v),
638 NEONMAP0(vcgezq_v),
639 NEONMAP0(vcgtz_v),
640 NEONMAP0(vcgtzq_v),
641 NEONMAP0(vclez_v),
642 NEONMAP0(vclezq_v),
643 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
644 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
645 NEONMAP0(vcltz_v),
646 NEONMAP0(vcltzq_v),
647 NEONMAP1(vclz_v, ctlz, Add1ArgType),
648 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
649 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
650 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
651 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
652 NEONMAP0(vcvt_f16_s16),
653 NEONMAP0(vcvt_f16_u16),
654 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
655 NEONMAP0(vcvt_f32_v),
656 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
657 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
658 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
659 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
660 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
661 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
662 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
663 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
664 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
665 NEONMAP0(vcvt_s16_f16),
666 NEONMAP0(vcvt_s32_v),
667 NEONMAP0(vcvt_s64_v),
668 NEONMAP0(vcvt_u16_f16),
669 NEONMAP0(vcvt_u32_v),
670 NEONMAP0(vcvt_u64_v),
671 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
672 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
673 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
675 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
676 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
678 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
679 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
680 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
681 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
682 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
683 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
684 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
685 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
686 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
688 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
689 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
691 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
692 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
693 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
694 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
695 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
696 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
697 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
698 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
700 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
701 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
703 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
704 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
705 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
706 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
707 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
708 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
709 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
710 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
712 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
713 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
715 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
716 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
717 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
718 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
719 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
720 NEONMAP0(vcvtq_f16_s16),
721 NEONMAP0(vcvtq_f16_u16),
722 NEONMAP0(vcvtq_f32_v),
723 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
724 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
725 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
726 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
727 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
728 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
729 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
730 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
731 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
732 NEONMAP0(vcvtq_s16_f16),
733 NEONMAP0(vcvtq_s32_v),
734 NEONMAP0(vcvtq_s64_v),
735 NEONMAP0(vcvtq_u16_f16),
736 NEONMAP0(vcvtq_u32_v),
737 NEONMAP0(vcvtq_u64_v),
738 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
739 NEONMAP1(vdot_u32, arm_neon_udot, 0),
740 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
741 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
742 NEONMAP0(vext_v),
743 NEONMAP0(vextq_v),
744 NEONMAP0(vfma_v),
745 NEONMAP0(vfmaq_v),
746 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
747 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
748 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
749 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
750 NEONMAP0(vld1_dup_v),
751 NEONMAP1(vld1_v, arm_neon_vld1, 0),
752 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
753 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
754 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
755 NEONMAP0(vld1q_dup_v),
756 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
757 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
758 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
759 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
760 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
761 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
762 NEONMAP1(vld2_v, arm_neon_vld2, 0),
763 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
764 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
765 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
766 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
767 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
768 NEONMAP1(vld3_v, arm_neon_vld3, 0),
769 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
770 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
771 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
772 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
773 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
774 NEONMAP1(vld4_v, arm_neon_vld4, 0),
775 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
776 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
777 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
778 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
780 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
781 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
782 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
783 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
784 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
785 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
786 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
787 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
788 NEONMAP0(vmovl_v),
789 NEONMAP0(vmovn_v),
790 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
791 NEONMAP0(vmull_v),
792 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
793 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
794 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
795 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
796 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
797 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
798 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
799 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
800 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
801 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
802 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
803 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
804 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
805 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
806 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
807 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
808 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
809 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
810 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
811 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
812 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
813 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
814 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
815 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
816 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
817 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
818 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
819 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
820 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
821 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
822 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
823 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
824 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
825 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
827 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
828 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
829 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
830 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
831 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
832 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
833 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
834 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
835 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
836 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
837 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
838 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
839 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
840 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
841 NEONMAP1(vrnd_v, trunc, Add1ArgType),
842 NEONMAP1(vrnda_v, round, Add1ArgType),
843 NEONMAP1(vrndaq_v, round, Add1ArgType),
844 NEONMAP0(vrndi_v),
845 NEONMAP0(vrndiq_v),
846 NEONMAP1(vrndm_v, floor, Add1ArgType),
847 NEONMAP1(vrndmq_v, floor, Add1ArgType),
848 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
849 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
850 NEONMAP1(vrndp_v, ceil, Add1ArgType),
851 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
852 NEONMAP1(vrndq_v, trunc, Add1ArgType),
853 NEONMAP1(vrndx_v, rint, Add1ArgType),
854 NEONMAP1(vrndxq_v, rint, Add1ArgType),
855 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
856 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
857 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
858 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
859 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
860 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
861 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
862 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
863 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
864 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
865 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
866 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
867 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
868 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
869 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
870 NEONMAP0(vshl_n_v),
871 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
872 NEONMAP0(vshll_n_v),
873 NEONMAP0(vshlq_n_v),
874 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
875 NEONMAP0(vshr_n_v),
876 NEONMAP0(vshrn_n_v),
877 NEONMAP0(vshrq_n_v),
878 NEONMAP1(vst1_v, arm_neon_vst1, 0),
879 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
883 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
884 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
885 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
886 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
887 NEONMAP1(vst2_v, arm_neon_vst2, 0),
888 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
889 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
890 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
891 NEONMAP1(vst3_v, arm_neon_vst3, 0),
892 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
893 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
894 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
895 NEONMAP1(vst4_v, arm_neon_vst4, 0),
896 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
897 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
898 NEONMAP0(vsubhn_v),
899 NEONMAP0(vtrn_v),
900 NEONMAP0(vtrnq_v),
901 NEONMAP0(vtst_v),
902 NEONMAP0(vtstq_v),
903 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
904 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
905 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
906 NEONMAP0(vuzp_v),
907 NEONMAP0(vuzpq_v),
908 NEONMAP0(vzip_v),
909 NEONMAP0(vzipq_v)
910};
911
912static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
913 NEONMAP0(splat_lane_v),
914 NEONMAP0(splat_laneq_v),
915 NEONMAP0(splatq_lane_v),
916 NEONMAP0(splatq_laneq_v),
917 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
918 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
919 NEONMAP0(vadd_v),
920 NEONMAP0(vaddhn_v),
921 NEONMAP0(vaddq_p128),
922 NEONMAP0(vaddq_v),
923 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
924 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
925 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
926 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
927 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
928 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
931 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
932 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
933 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
934 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
935 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
936 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
937 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
938 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
939 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
940 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
941 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
943 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
945 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
946 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
947 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
948 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
949 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
950 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
952 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
953 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
954 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
955 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
956 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
957 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
958 NEONMAP0(vceqz_v),
959 NEONMAP0(vceqzq_v),
960 NEONMAP0(vcgez_v),
961 NEONMAP0(vcgezq_v),
962 NEONMAP0(vcgtz_v),
963 NEONMAP0(vcgtzq_v),
964 NEONMAP0(vclez_v),
965 NEONMAP0(vclezq_v),
966 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
967 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
968 NEONMAP0(vcltz_v),
969 NEONMAP0(vcltzq_v),
970 NEONMAP1(vclz_v, ctlz, Add1ArgType),
971 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
972 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
973 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
974 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
975 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
976 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
977 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
978 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
979 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
980 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
981 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
982 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
983 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
984 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
985 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
986 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
987 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
988 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
989 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
990 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
991 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
992 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
993 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
994 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
995 NEONMAP0(vcvt_f16_s16),
996 NEONMAP0(vcvt_f16_u16),
997 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
998 NEONMAP0(vcvt_f32_v),
999 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1000 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1001 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1002 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1003 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1004 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1005 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1006 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1007 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1008 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1009 NEONMAP0(vcvtq_f16_s16),
1010 NEONMAP0(vcvtq_f16_u16),
1011 NEONMAP0(vcvtq_f32_v),
1012 NEONMAP0(vcvtq_high_bf16_f32),
1013 NEONMAP0(vcvtq_low_bf16_f32),
1014 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1015 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1016 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1017 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1018 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1019 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1020 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1021 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1022 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1023 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1024 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1025 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1026 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1027 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1028 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1029 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1030 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1031 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1032 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1033 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1034 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1035 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1036 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1037 NEONMAP0(vext_v),
1038 NEONMAP0(vextq_v),
1039 NEONMAP0(vfma_v),
1040 NEONMAP0(vfmaq_v),
1041 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1042 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1043 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1044 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1045 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1046 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1047 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1048 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1049 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1050 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1051 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1052 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1053 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1054 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1055 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1056 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1057 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1058 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1059 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1060 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1061 NEONMAP0(vmovl_v),
1062 NEONMAP0(vmovn_v),
1063 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1064 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1065 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1066 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1067 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1068 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1069 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1070 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1071 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1072 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1073 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1074 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1075 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1076 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1077 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1078 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1079 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1080 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1081 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1082 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1083 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1084 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1085 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1086 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1087 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1088 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1089 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1090 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1091 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1092 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1093 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1094 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1095 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1096 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1097 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1098 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1099 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1100 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1101 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1103 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1104 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1105 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1106 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1107 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1108 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1109 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1110 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1111 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1112 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1113 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1114 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1115 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1116 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1117 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1118 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1119 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1120 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1121 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1122 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1123 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1124 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1125 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1126 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1127 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1128 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1129 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1130 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1131 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1132 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1133 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1134 NEONMAP0(vrndi_v),
1135 NEONMAP0(vrndiq_v),
1136 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1137 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1138 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1139 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1140 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1141 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1142 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1143 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1144 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1145 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1146 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1147 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1148 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1149 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1150 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1151 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1152 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1153 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1154 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1155 NEONMAP0(vshl_n_v),
1156 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1157 NEONMAP0(vshll_n_v),
1158 NEONMAP0(vshlq_n_v),
1159 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1160 NEONMAP0(vshr_n_v),
1161 NEONMAP0(vshrn_n_v),
1162 NEONMAP0(vshrq_n_v),
1163 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1164 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1165 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1166 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1167 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1168 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1169 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1170 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1171 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1172 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1173 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1174 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1175 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1176 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1177 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1178 NEONMAP0(vsubhn_v),
1179 NEONMAP0(vtst_v),
1180 NEONMAP0(vtstq_v),
1181 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1182 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1183 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1184 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1185};
1186
1187static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1188 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1189 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1190 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1191 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1192 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1193 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1194 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1195 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1198 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1199 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1200 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
1201 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
1202 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1203 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1204 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1205 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1206 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1207 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1208 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1209 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1210 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1211 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1212 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1213 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1214 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1215 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1216 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1217 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1218 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1219 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1220 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1222 NEONMAP0(vcvth_bf16_f32),
1223 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1229 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1230 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1242 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1243 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1244 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1245 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1246 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1247 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1248 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1249 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1250 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
1251 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
1252 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1253 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1254 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1255 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1256 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1257 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1258 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1259 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1260 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
1261 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
1262 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1263 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1264 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1265 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1266 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
1267 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1268 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1269 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1270 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1271 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1272 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1273 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1274 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1275 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1276 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1277 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1278 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1279 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1280 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1281 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1282 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1283 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1284 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1285 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1286 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1287 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1288 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1289 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1290 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1291 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1292 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1293 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1294 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1295 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1296 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1297 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1298 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1299 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1300 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1301 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1302 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1303 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1304 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1305 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1306 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1307 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1308 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1309 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1310 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1311 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1312 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1313 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1314 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1315 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1316 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1317 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1318 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1319 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1320 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1321 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1322 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1323 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1324 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1325 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1326 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1327 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1328 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1329 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1330 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1331 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1332 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1333 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1334 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1335 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1336 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1337 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1338 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1339 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1340 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1341 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1342 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1343 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1344 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1345 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1346 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1347 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1348 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1349 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1350 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1351 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1352 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1353 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1354 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1355 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1356 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1357 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1358 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1359 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1360 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1361 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1362 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1363 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1364 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1365 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1366 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1367 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1368 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1369 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1370 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1371 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1372 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1373 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1374 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1375 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1376 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1377 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1378 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1379 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1381 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1382 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1383 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1384 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1385 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1386 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1387 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1388 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1389 // FP16 scalar intrinisics go here.
1390 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1391 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1392 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1393 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1394 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1395 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1396 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1397 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1398 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1399 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1400 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1401 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1402 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1403 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1404 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1405 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1406 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1407 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1408 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1409 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1410 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1411 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1412 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1413 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1414 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1415 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1416 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1417 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1418 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1419 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1420 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1421 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1422 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1423 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1424};
1425
1426// Some intrinsics are equivalent for codegen.
1427static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1428 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1429 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1430 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1431 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1432 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1433 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1434 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1435 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1436 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1437 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1438 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1439 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1440 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1441 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1442 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1443 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1444 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1445 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1446 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1447 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1448 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1449 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1450 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1451 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1452 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1453 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1454 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1455 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1456 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1457 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1458 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1459 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1460 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1461 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1462 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1463 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1464 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1465 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1466 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1467 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1468 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1469 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1470 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1471 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1472 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1473 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1474 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1475 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1476 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1477 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1478 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1479 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1480 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1481 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1482 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1483 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1484 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1485 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1486 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1487 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1488 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1489 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1490 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1491 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1492 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1493 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1494 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1495 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1496 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1497 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1498 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1499 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1500 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1501 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1502 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1503 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1504 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1505 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1506 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1507 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1508 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1509 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1510 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1511 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1512 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1513 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1514 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1515 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1516 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1517 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1518 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1519 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1520 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1521 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1522 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1523 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1524 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1525 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1526 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1527 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1528 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1529 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1530 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1531 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1532 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1533 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1534 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1535 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1536 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1537 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1538 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1539 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1540 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1541 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1542 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1543 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1544 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1545 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1546 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1547 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1548 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1549 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1550 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1551 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1552 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1553 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1554 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1555 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1556 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1557 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1558 // arbitrary one to be handled as tha canonical variation.
1559 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1560 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1561 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1562 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1563 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1564 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1565 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1566 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1567 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1568 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1569 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1570 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1571};
1572
1573#undef NEONMAP0
1574#undef NEONMAP1
1575#undef NEONMAP2
1576
1577#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1578 { \
1579 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1580 TypeModifier \
1581 }
1582
1583#define SVEMAP2(NameBase, TypeModifier) \
1584 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1585static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1586#define GET_SVE_LLVM_INTRINSIC_MAP
1587#include "clang/Basic/arm_sve_builtin_cg.inc"
1588#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1589#undef GET_SVE_LLVM_INTRINSIC_MAP
1590};
1591
1592#undef SVEMAP1
1593#undef SVEMAP2
1594
1595#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1596 { \
1597 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1598 TypeModifier \
1599 }
1600
1601#define SMEMAP2(NameBase, TypeModifier) \
1602 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1603static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1604#define GET_SME_LLVM_INTRINSIC_MAP
1605#include "clang/Basic/arm_sme_builtin_cg.inc"
1606#undef GET_SME_LLVM_INTRINSIC_MAP
1607};
1608
1609#undef SMEMAP1
1610#undef SMEMAP2
1611
1612static bool NEONSIMDIntrinsicsProvenSorted = false;
1613
1614static bool AArch64SIMDIntrinsicsProvenSorted = false;
1615static bool AArch64SISDIntrinsicsProvenSorted = false;
1616static bool AArch64SVEIntrinsicsProvenSorted = false;
1617static bool AArch64SMEIntrinsicsProvenSorted = false;
1618
1619static const ARMVectorIntrinsicInfo *
1620findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
1621 unsigned BuiltinID, bool &MapProvenSorted) {
1622
1623#ifndef NDEBUG
1624 if (!MapProvenSorted) {
1625 assert(llvm::is_sorted(IntrinsicMap));
1626 MapProvenSorted = true;
1627 }
1628#endif
1629
1630 const ARMVectorIntrinsicInfo *Builtin =
1631 llvm::lower_bound(Range&: IntrinsicMap, Value&: BuiltinID);
1632
1633 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1634 return Builtin;
1635
1636 return nullptr;
1637}
1638
1639Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
1640 unsigned Modifier,
1641 llvm::Type *ArgType,
1642 const CallExpr *E) {
1643 int VectorSize = 0;
1644 if (Modifier & Use64BitVectors)
1645 VectorSize = 64;
1646 else if (Modifier & Use128BitVectors)
1647 VectorSize = 128;
1648
1649 // Return type.
1650 SmallVector<llvm::Type *, 3> Tys;
1651 if (Modifier & AddRetType) {
1652 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
1653 if (Modifier & VectorizeRetType)
1654 Ty = llvm::FixedVectorType::get(
1655 ElementType: Ty, NumElts: VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1656
1657 Tys.push_back(Elt: Ty);
1658 }
1659
1660 // Arguments.
1661 if (Modifier & VectorizeArgTypes) {
1662 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1663 ArgType = llvm::FixedVectorType::get(ElementType: ArgType, NumElts: Elts);
1664 }
1665
1666 if (Modifier & (Add1ArgType | Add2ArgTypes))
1667 Tys.push_back(Elt: ArgType);
1668
1669 if (Modifier & Add2ArgTypes)
1670 Tys.push_back(Elt: ArgType);
1671
1672 if (Modifier & InventFloatType)
1673 Tys.push_back(Elt: FloatTy);
1674
1675 return CGM.getIntrinsic(IID: IntrinsicID, Tys);
1676}
1677
1678static Value *EmitCommonNeonSISDBuiltinExpr(
1679 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1680 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1681 unsigned BuiltinID = SISDInfo.BuiltinID;
1682 unsigned int Int = SISDInfo.LLVMIntrinsic;
1683 unsigned Modifier = SISDInfo.TypeModifier;
1684 const char *s = SISDInfo.NameHint;
1685
1686 switch (BuiltinID) {
1687 case NEON::BI__builtin_neon_vcled_s64:
1688 case NEON::BI__builtin_neon_vcled_u64:
1689 case NEON::BI__builtin_neon_vcles_f32:
1690 case NEON::BI__builtin_neon_vcled_f64:
1691 case NEON::BI__builtin_neon_vcltd_s64:
1692 case NEON::BI__builtin_neon_vcltd_u64:
1693 case NEON::BI__builtin_neon_vclts_f32:
1694 case NEON::BI__builtin_neon_vcltd_f64:
1695 case NEON::BI__builtin_neon_vcales_f32:
1696 case NEON::BI__builtin_neon_vcaled_f64:
1697 case NEON::BI__builtin_neon_vcalts_f32:
1698 case NEON::BI__builtin_neon_vcaltd_f64:
1699 // Only one direction of comparisons actually exist, cmle is actually a cmge
1700 // with swapped operands. The table gives us the right intrinsic but we
1701 // still need to do the swap.
1702 std::swap(a&: Ops[0], b&: Ops[1]);
1703 break;
1704 }
1705
1706 assert(Int && "Generic code assumes a valid intrinsic");
1707
1708 // Determine the type(s) of this overloaded AArch64 intrinsic.
1709 const Expr *Arg = E->getArg(Arg: 0);
1710 llvm::Type *ArgTy = CGF.ConvertType(T: Arg->getType());
1711 Function *F = CGF.LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: ArgTy, E);
1712
1713 int j = 0;
1714 ConstantInt *C0 = ConstantInt::get(Ty: CGF.SizeTy, V: 0);
1715 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1716 ai != ae; ++ai, ++j) {
1717 llvm::Type *ArgTy = ai->getType();
1718 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1719 ArgTy->getPrimitiveSizeInBits())
1720 continue;
1721
1722 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1723 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1724 // it before inserting.
1725 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1726 V: Ops[j], DestTy: cast<llvm::VectorType>(Val: ArgTy)->getElementType());
1727 Ops[j] =
1728 CGF.Builder.CreateInsertElement(Vec: PoisonValue::get(T: ArgTy), NewElt: Ops[j], Idx: C0);
1729 }
1730
1731 Value *Result = CGF.EmitNeonCall(F, Ops, name: s);
1732 llvm::Type *ResultType = CGF.ConvertType(T: E->getType());
1733 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1734 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1735 return CGF.Builder.CreateExtractElement(Vec: Result, Idx: C0);
1736
1737 return CGF.Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: s);
1738}
1739
1740Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
1741 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1742 const char *NameHint, unsigned Modifier, const CallExpr *E,
1743 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1744 llvm::Triple::ArchType Arch) {
1745 // Get the last argument, which specifies the vector type.
1746 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
1747 std::optional<llvm::APSInt> NeonTypeConst =
1748 Arg->getIntegerConstantExpr(Ctx: getContext());
1749 if (!NeonTypeConst)
1750 return nullptr;
1751
1752 // Determine the type of this overloaded NEON intrinsic.
1753 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1754 const bool Usgn = Type.isUnsigned();
1755 const bool Quad = Type.isQuad();
1756 const bool Floating = Type.isFloatingPoint();
1757 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
1758 const bool AllowBFloatArgsAndRet =
1759 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1760
1761 llvm::FixedVectorType *VTy =
1762 GetNeonType(CGF: this, TypeFlags: Type, HasLegalHalfType, V1Ty: false, AllowBFloatArgsAndRet);
1763 llvm::Type *Ty = VTy;
1764 if (!Ty)
1765 return nullptr;
1766
1767 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1768 return Builder.getInt32(C: addr.getAlignment().getQuantity());
1769 };
1770
1771 unsigned Int = LLVMIntrinsic;
1772 if ((Modifier & UnsignedAlts) && !Usgn)
1773 Int = AltLLVMIntrinsic;
1774
1775 switch (BuiltinID) {
1776 default: break;
1777 case NEON::BI__builtin_neon_splat_lane_v:
1778 case NEON::BI__builtin_neon_splat_laneq_v:
1779 case NEON::BI__builtin_neon_splatq_lane_v:
1780 case NEON::BI__builtin_neon_splatq_laneq_v: {
1781 auto NumElements = VTy->getElementCount();
1782 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1783 NumElements = NumElements * 2;
1784 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1785 NumElements = NumElements.divideCoefficientBy(RHS: 2);
1786
1787 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1788 return EmitNeonSplat(V: Ops[0], C: cast<ConstantInt>(Val: Ops[1]), Count: NumElements);
1789 }
1790 case NEON::BI__builtin_neon_vpadd_v:
1791 case NEON::BI__builtin_neon_vpaddq_v:
1792 // We don't allow fp/int overloading of intrinsics.
1793 if (VTy->getElementType()->isFloatingPointTy() &&
1794 Int == Intrinsic::aarch64_neon_addp)
1795 Int = Intrinsic::aarch64_neon_faddp;
1796 break;
1797 case NEON::BI__builtin_neon_vabs_v:
1798 case NEON::BI__builtin_neon_vabsq_v:
1799 if (VTy->getElementType()->isFloatingPointTy())
1800 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: Ty), Ops, name: "vabs");
1801 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops, name: "vabs");
1802 case NEON::BI__builtin_neon_vadd_v:
1803 case NEON::BI__builtin_neon_vaddq_v: {
1804 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Quad ? 16 : 8);
1805 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1806 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
1807 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
1808 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1809 }
1810 case NEON::BI__builtin_neon_vaddhn_v: {
1811 llvm::FixedVectorType *SrcTy =
1812 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1813
1814 // %sum = add <4 x i32> %lhs, %rhs
1815 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1816 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
1817 Ops[0] = Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddhn");
1818
1819 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1820 Constant *ShiftAmt =
1821 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
1822 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vaddhn");
1823
1824 // %res = trunc <4 x i32> %high to <4 x i16>
1825 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vaddhn");
1826 }
1827 case NEON::BI__builtin_neon_vcale_v:
1828 case NEON::BI__builtin_neon_vcaleq_v:
1829 case NEON::BI__builtin_neon_vcalt_v:
1830 case NEON::BI__builtin_neon_vcaltq_v:
1831 std::swap(a&: Ops[0], b&: Ops[1]);
1832 [[fallthrough]];
1833 case NEON::BI__builtin_neon_vcage_v:
1834 case NEON::BI__builtin_neon_vcageq_v:
1835 case NEON::BI__builtin_neon_vcagt_v:
1836 case NEON::BI__builtin_neon_vcagtq_v: {
1837 llvm::Type *Ty;
1838 switch (VTy->getScalarSizeInBits()) {
1839 default: llvm_unreachable("unexpected type");
1840 case 32:
1841 Ty = FloatTy;
1842 break;
1843 case 64:
1844 Ty = DoubleTy;
1845 break;
1846 case 16:
1847 Ty = HalfTy;
1848 break;
1849 }
1850 auto *VecFlt = llvm::FixedVectorType::get(ElementType: Ty, NumElts: VTy->getNumElements());
1851 llvm::Type *Tys[] = { VTy, VecFlt };
1852 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1853 return EmitNeonCall(F, Ops, name: NameHint);
1854 }
1855 case NEON::BI__builtin_neon_vceqz_v:
1856 case NEON::BI__builtin_neon_vceqzq_v:
1857 return EmitAArch64CompareBuiltinExpr(
1858 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, Name: "vceqz");
1859 case NEON::BI__builtin_neon_vcgez_v:
1860 case NEON::BI__builtin_neon_vcgezq_v:
1861 return EmitAArch64CompareBuiltinExpr(
1862 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1863 Name: "vcgez");
1864 case NEON::BI__builtin_neon_vclez_v:
1865 case NEON::BI__builtin_neon_vclezq_v:
1866 return EmitAArch64CompareBuiltinExpr(
1867 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1868 Name: "vclez");
1869 case NEON::BI__builtin_neon_vcgtz_v:
1870 case NEON::BI__builtin_neon_vcgtzq_v:
1871 return EmitAArch64CompareBuiltinExpr(
1872 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1873 Name: "vcgtz");
1874 case NEON::BI__builtin_neon_vcltz_v:
1875 case NEON::BI__builtin_neon_vcltzq_v:
1876 return EmitAArch64CompareBuiltinExpr(
1877 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1878 Name: "vcltz");
1879 case NEON::BI__builtin_neon_vclz_v:
1880 case NEON::BI__builtin_neon_vclzq_v:
1881 // We generate target-independent intrinsic, which needs a second argument
1882 // for whether or not clz of zero is undefined; on ARM it isn't.
1883 Ops.push_back(Elt: Builder.getInt1(V: getTarget().isCLZForZeroUndef()));
1884 break;
1885 case NEON::BI__builtin_neon_vcvt_f32_v:
1886 case NEON::BI__builtin_neon_vcvtq_f32_v:
1887 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1888 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1889 HasLegalHalfType);
1890 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1891 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1892 case NEON::BI__builtin_neon_vcvt_f16_s16:
1893 case NEON::BI__builtin_neon_vcvt_f16_u16:
1894 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1895 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1896 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1897 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1898 HasLegalHalfType);
1899 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1900 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1901 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1902 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1903 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1904 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1905 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1906 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1907 return EmitNeonCall(F, Ops, name: "vcvt_n");
1908 }
1909 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1910 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1911 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1912 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1913 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1914 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1915 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1916 return EmitNeonCall(F, Ops, name: "vcvt_n");
1917 }
1918 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1919 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1920 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1921 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1922 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1923 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1924 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1925 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1926 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1927 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1928 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1929 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1930 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1931 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1932 return EmitNeonCall(F, Ops, name: "vcvt_n");
1933 }
1934 case NEON::BI__builtin_neon_vcvt_s32_v:
1935 case NEON::BI__builtin_neon_vcvt_u32_v:
1936 case NEON::BI__builtin_neon_vcvt_s64_v:
1937 case NEON::BI__builtin_neon_vcvt_u64_v:
1938 case NEON::BI__builtin_neon_vcvt_s16_f16:
1939 case NEON::BI__builtin_neon_vcvt_u16_f16:
1940 case NEON::BI__builtin_neon_vcvtq_s32_v:
1941 case NEON::BI__builtin_neon_vcvtq_u32_v:
1942 case NEON::BI__builtin_neon_vcvtq_s64_v:
1943 case NEON::BI__builtin_neon_vcvtq_u64_v:
1944 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1945 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1946 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetFloatNeonType(CGF: this, IntTypeFlags: Type));
1947 return Usgn ? Builder.CreateFPToUI(V: Ops[0], DestTy: Ty, Name: "vcvt")
1948 : Builder.CreateFPToSI(V: Ops[0], DestTy: Ty, Name: "vcvt");
1949 }
1950 case NEON::BI__builtin_neon_vcvta_s16_f16:
1951 case NEON::BI__builtin_neon_vcvta_s32_v:
1952 case NEON::BI__builtin_neon_vcvta_s64_v:
1953 case NEON::BI__builtin_neon_vcvta_u16_f16:
1954 case NEON::BI__builtin_neon_vcvta_u32_v:
1955 case NEON::BI__builtin_neon_vcvta_u64_v:
1956 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1957 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1958 case NEON::BI__builtin_neon_vcvtaq_s64_v:
1959 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
1960 case NEON::BI__builtin_neon_vcvtaq_u32_v:
1961 case NEON::BI__builtin_neon_vcvtaq_u64_v:
1962 case NEON::BI__builtin_neon_vcvtn_s16_f16:
1963 case NEON::BI__builtin_neon_vcvtn_s32_v:
1964 case NEON::BI__builtin_neon_vcvtn_s64_v:
1965 case NEON::BI__builtin_neon_vcvtn_u16_f16:
1966 case NEON::BI__builtin_neon_vcvtn_u32_v:
1967 case NEON::BI__builtin_neon_vcvtn_u64_v:
1968 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
1969 case NEON::BI__builtin_neon_vcvtnq_s32_v:
1970 case NEON::BI__builtin_neon_vcvtnq_s64_v:
1971 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
1972 case NEON::BI__builtin_neon_vcvtnq_u32_v:
1973 case NEON::BI__builtin_neon_vcvtnq_u64_v:
1974 case NEON::BI__builtin_neon_vcvtp_s16_f16:
1975 case NEON::BI__builtin_neon_vcvtp_s32_v:
1976 case NEON::BI__builtin_neon_vcvtp_s64_v:
1977 case NEON::BI__builtin_neon_vcvtp_u16_f16:
1978 case NEON::BI__builtin_neon_vcvtp_u32_v:
1979 case NEON::BI__builtin_neon_vcvtp_u64_v:
1980 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
1981 case NEON::BI__builtin_neon_vcvtpq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtpq_s64_v:
1983 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
1984 case NEON::BI__builtin_neon_vcvtpq_u32_v:
1985 case NEON::BI__builtin_neon_vcvtpq_u64_v:
1986 case NEON::BI__builtin_neon_vcvtm_s16_f16:
1987 case NEON::BI__builtin_neon_vcvtm_s32_v:
1988 case NEON::BI__builtin_neon_vcvtm_s64_v:
1989 case NEON::BI__builtin_neon_vcvtm_u16_f16:
1990 case NEON::BI__builtin_neon_vcvtm_u32_v:
1991 case NEON::BI__builtin_neon_vcvtm_u64_v:
1992 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
1993 case NEON::BI__builtin_neon_vcvtmq_s32_v:
1994 case NEON::BI__builtin_neon_vcvtmq_s64_v:
1995 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
1996 case NEON::BI__builtin_neon_vcvtmq_u32_v:
1997 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
1998 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1999 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2000 }
2001 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2002 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2003 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2004
2005 }
2006 case NEON::BI__builtin_neon_vext_v:
2007 case NEON::BI__builtin_neon_vextq_v: {
2008 int CV = cast<ConstantInt>(Val: Ops[2])->getSExtValue();
2009 SmallVector<int, 16> Indices;
2010 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2011 Indices.push_back(Elt: i+CV);
2012
2013 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2014 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2015 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1], Mask: Indices, Name: "vext");
2016 }
2017 case NEON::BI__builtin_neon_vfma_v:
2018 case NEON::BI__builtin_neon_vfmaq_v: {
2019 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2020 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2021 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2022
2023 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2024 return emitCallMaybeConstrainedFPBuiltin(
2025 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
2026 Args: {Ops[1], Ops[2], Ops[0]});
2027 }
2028 case NEON::BI__builtin_neon_vld1_v:
2029 case NEON::BI__builtin_neon_vld1q_v: {
2030 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2031 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2032 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vld1");
2033 }
2034 case NEON::BI__builtin_neon_vld1_x2_v:
2035 case NEON::BI__builtin_neon_vld1q_x2_v:
2036 case NEON::BI__builtin_neon_vld1_x3_v:
2037 case NEON::BI__builtin_neon_vld1q_x3_v:
2038 case NEON::BI__builtin_neon_vld1_x4_v:
2039 case NEON::BI__builtin_neon_vld1q_x4_v: {
2040 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2041 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2042 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld1xN");
2043 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2044 }
2045 case NEON::BI__builtin_neon_vld2_v:
2046 case NEON::BI__builtin_neon_vld2q_v:
2047 case NEON::BI__builtin_neon_vld3_v:
2048 case NEON::BI__builtin_neon_vld3q_v:
2049 case NEON::BI__builtin_neon_vld4_v:
2050 case NEON::BI__builtin_neon_vld4q_v:
2051 case NEON::BI__builtin_neon_vld2_dup_v:
2052 case NEON::BI__builtin_neon_vld2q_dup_v:
2053 case NEON::BI__builtin_neon_vld3_dup_v:
2054 case NEON::BI__builtin_neon_vld3q_dup_v:
2055 case NEON::BI__builtin_neon_vld4_dup_v:
2056 case NEON::BI__builtin_neon_vld4q_dup_v: {
2057 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2058 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2059 Value *Align = getAlignmentValue32(PtrOp1);
2060 Ops[1] = Builder.CreateCall(Callee: F, Args: {Ops[1], Align}, Name: NameHint);
2061 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2062 }
2063 case NEON::BI__builtin_neon_vld1_dup_v:
2064 case NEON::BI__builtin_neon_vld1q_dup_v: {
2065 Value *V = PoisonValue::get(T: Ty);
2066 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
2067 LoadInst *Ld = Builder.CreateLoad(Addr: PtrOp0);
2068 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
2069 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ld, Idx: CI);
2070 return EmitNeonSplat(V: Ops[0], C: CI);
2071 }
2072 case NEON::BI__builtin_neon_vld2_lane_v:
2073 case NEON::BI__builtin_neon_vld2q_lane_v:
2074 case NEON::BI__builtin_neon_vld3_lane_v:
2075 case NEON::BI__builtin_neon_vld3q_lane_v:
2076 case NEON::BI__builtin_neon_vld4_lane_v:
2077 case NEON::BI__builtin_neon_vld4q_lane_v: {
2078 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2079 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2080 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2081 Ops[I] = Builder.CreateBitCast(V: Ops[I], DestTy: Ty);
2082 Ops.push_back(Elt: getAlignmentValue32(PtrOp1));
2083 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: NameHint);
2084 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2085 }
2086 case NEON::BI__builtin_neon_vmovl_v: {
2087 llvm::FixedVectorType *DTy =
2088 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2089 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DTy);
2090 if (Usgn)
2091 return Builder.CreateZExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2092 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2093 }
2094 case NEON::BI__builtin_neon_vmovn_v: {
2095 llvm::FixedVectorType *QTy =
2096 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2097 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: QTy);
2098 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vmovn");
2099 }
2100 case NEON::BI__builtin_neon_vmull_v:
2101 // FIXME: the integer vmull operations could be emitted in terms of pure
2102 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2103 // hoisting the exts outside loops. Until global ISel comes along that can
2104 // see through such movement this leads to bad CodeGen. So we need an
2105 // intrinsic for now.
2106 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2107 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2108 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
2109 case NEON::BI__builtin_neon_vpadal_v:
2110 case NEON::BI__builtin_neon_vpadalq_v: {
2111 // The source operand type has twice as many elements of half the size.
2112 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2113 llvm::Type *EltTy =
2114 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2115 auto *NarrowTy =
2116 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2117 llvm::Type *Tys[2] = { Ty, NarrowTy };
2118 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2119 }
2120 case NEON::BI__builtin_neon_vpaddl_v:
2121 case NEON::BI__builtin_neon_vpaddlq_v: {
2122 // The source operand type has twice as many elements of half the size.
2123 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2124 llvm::Type *EltTy = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2125 auto *NarrowTy =
2126 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2127 llvm::Type *Tys[2] = { Ty, NarrowTy };
2128 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vpaddl");
2129 }
2130 case NEON::BI__builtin_neon_vqdmlal_v:
2131 case NEON::BI__builtin_neon_vqdmlsl_v: {
2132 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2133 Ops[1] =
2134 EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops&: MulOps, name: "vqdmlal");
2135 Ops.resize(N: 2);
2136 return EmitNeonCall(F: CGM.getIntrinsic(IID: AltLLVMIntrinsic, Tys: Ty), Ops, name: NameHint);
2137 }
2138 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2139 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2140 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2141 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2142 auto *RTy = cast<llvm::FixedVectorType>(Val: Ty);
2143 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2144 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2145 RTy = llvm::FixedVectorType::get(ElementType: RTy->getElementType(),
2146 NumElts: RTy->getNumElements() * 2);
2147 llvm::Type *Tys[2] = {
2148 RTy, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2149 /*isQuad*/ false))};
2150 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2151 }
2152 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2153 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2154 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2155 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2156 llvm::Type *Tys[2] = {
2157 Ty, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2158 /*isQuad*/ true))};
2159 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2160 }
2161 case NEON::BI__builtin_neon_vqshl_n_v:
2162 case NEON::BI__builtin_neon_vqshlq_n_v:
2163 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshl_n",
2164 shift: 1, rightshift: false);
2165 case NEON::BI__builtin_neon_vqshlu_n_v:
2166 case NEON::BI__builtin_neon_vqshluq_n_v:
2167 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshlu_n",
2168 shift: 1, rightshift: false);
2169 case NEON::BI__builtin_neon_vrecpe_v:
2170 case NEON::BI__builtin_neon_vrecpeq_v:
2171 case NEON::BI__builtin_neon_vrsqrte_v:
2172 case NEON::BI__builtin_neon_vrsqrteq_v:
2173 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2174 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2175 case NEON::BI__builtin_neon_vrndi_v:
2176 case NEON::BI__builtin_neon_vrndiq_v:
2177 Int = Builder.getIsFPConstrained()
2178 ? Intrinsic::experimental_constrained_nearbyint
2179 : Intrinsic::nearbyint;
2180 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2181 case NEON::BI__builtin_neon_vrshr_n_v:
2182 case NEON::BI__builtin_neon_vrshrq_n_v:
2183 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshr_n",
2184 shift: 1, rightshift: true);
2185 case NEON::BI__builtin_neon_vsha512hq_u64:
2186 case NEON::BI__builtin_neon_vsha512h2q_u64:
2187 case NEON::BI__builtin_neon_vsha512su0q_u64:
2188 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2189 Function *F = CGM.getIntrinsic(IID: Int);
2190 return EmitNeonCall(F, Ops, name: "");
2191 }
2192 case NEON::BI__builtin_neon_vshl_n_v:
2193 case NEON::BI__builtin_neon_vshlq_n_v:
2194 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty, neg: false);
2195 return Builder.CreateShl(LHS: Builder.CreateBitCast(V: Ops[0],DestTy: Ty), RHS: Ops[1],
2196 Name: "vshl_n");
2197 case NEON::BI__builtin_neon_vshll_n_v: {
2198 llvm::FixedVectorType *SrcTy =
2199 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2200 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2201 if (Usgn)
2202 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: VTy);
2203 else
2204 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: VTy);
2205 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: VTy, neg: false);
2206 return Builder.CreateShl(LHS: Ops[0], RHS: Ops[1], Name: "vshll_n");
2207 }
2208 case NEON::BI__builtin_neon_vshrn_n_v: {
2209 llvm::FixedVectorType *SrcTy =
2210 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2211 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2212 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: SrcTy, neg: false);
2213 if (Usgn)
2214 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: Ops[1]);
2215 else
2216 Ops[0] = Builder.CreateAShr(LHS: Ops[0], RHS: Ops[1]);
2217 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vshrn_n");
2218 }
2219 case NEON::BI__builtin_neon_vshr_n_v:
2220 case NEON::BI__builtin_neon_vshrq_n_v:
2221 return EmitNeonRShiftImm(Vec: Ops[0], Shift: Ops[1], Ty, usgn: Usgn, name: "vshr_n");
2222 case NEON::BI__builtin_neon_vst1_v:
2223 case NEON::BI__builtin_neon_vst1q_v:
2224 case NEON::BI__builtin_neon_vst2_v:
2225 case NEON::BI__builtin_neon_vst2q_v:
2226 case NEON::BI__builtin_neon_vst3_v:
2227 case NEON::BI__builtin_neon_vst3q_v:
2228 case NEON::BI__builtin_neon_vst4_v:
2229 case NEON::BI__builtin_neon_vst4q_v:
2230 case NEON::BI__builtin_neon_vst2_lane_v:
2231 case NEON::BI__builtin_neon_vst2q_lane_v:
2232 case NEON::BI__builtin_neon_vst3_lane_v:
2233 case NEON::BI__builtin_neon_vst3q_lane_v:
2234 case NEON::BI__builtin_neon_vst4_lane_v:
2235 case NEON::BI__builtin_neon_vst4q_lane_v: {
2236 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2237 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2238 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "");
2239 }
2240 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2241 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2242 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2243 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2244 case NEON::BI__builtin_neon_vsm4eq_u32: {
2245 Function *F = CGM.getIntrinsic(IID: Int);
2246 return EmitNeonCall(F, Ops, name: "");
2247 }
2248 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2249 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2250 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2251 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2252 Function *F = CGM.getIntrinsic(IID: Int);
2253 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
2254 return EmitNeonCall(F, Ops, name: "");
2255 }
2256 case NEON::BI__builtin_neon_vst1_x2_v:
2257 case NEON::BI__builtin_neon_vst1q_x2_v:
2258 case NEON::BI__builtin_neon_vst1_x3_v:
2259 case NEON::BI__builtin_neon_vst1q_x3_v:
2260 case NEON::BI__builtin_neon_vst1_x4_v:
2261 case NEON::BI__builtin_neon_vst1q_x4_v: {
2262 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2263 // in AArch64 it comes last. We may want to stick to one or another.
2264 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2265 Arch == llvm::Triple::aarch64_32) {
2266 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
2267 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
2268 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2269 }
2270 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
2271 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2272 }
2273 case NEON::BI__builtin_neon_vsubhn_v: {
2274 llvm::FixedVectorType *SrcTy =
2275 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2276
2277 // %sum = add <4 x i32> %lhs, %rhs
2278 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2279 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
2280 Ops[0] = Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubhn");
2281
2282 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2283 Constant *ShiftAmt =
2284 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
2285 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vsubhn");
2286
2287 // %res = trunc <4 x i32> %high to <4 x i16>
2288 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vsubhn");
2289 }
2290 case NEON::BI__builtin_neon_vtrn_v:
2291 case NEON::BI__builtin_neon_vtrnq_v: {
2292 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2293 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2294 Value *SV = nullptr;
2295
2296 for (unsigned vi = 0; vi != 2; ++vi) {
2297 SmallVector<int, 16> Indices;
2298 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2299 Indices.push_back(Elt: i+vi);
2300 Indices.push_back(Elt: i+e+vi);
2301 }
2302 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2303 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
2304 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2305 }
2306 return SV;
2307 }
2308 case NEON::BI__builtin_neon_vtst_v:
2309 case NEON::BI__builtin_neon_vtstq_v: {
2310 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2311 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2312 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
2313 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
2314 RHS: ConstantAggregateZero::get(Ty));
2315 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vtst");
2316 }
2317 case NEON::BI__builtin_neon_vuzp_v:
2318 case NEON::BI__builtin_neon_vuzpq_v: {
2319 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2320 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2321 Value *SV = nullptr;
2322
2323 for (unsigned vi = 0; vi != 2; ++vi) {
2324 SmallVector<int, 16> Indices;
2325 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2326 Indices.push_back(Elt: 2*i+vi);
2327
2328 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2329 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
2330 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2331 }
2332 return SV;
2333 }
2334 case NEON::BI__builtin_neon_vxarq_u64: {
2335 Function *F = CGM.getIntrinsic(IID: Int);
2336 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
2337 return EmitNeonCall(F, Ops, name: "");
2338 }
2339 case NEON::BI__builtin_neon_vzip_v:
2340 case NEON::BI__builtin_neon_vzipq_v: {
2341 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2342 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2343 Value *SV = nullptr;
2344
2345 for (unsigned vi = 0; vi != 2; ++vi) {
2346 SmallVector<int, 16> Indices;
2347 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2348 Indices.push_back(Elt: (i + vi*e) >> 1);
2349 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
2350 }
2351 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2352 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
2353 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2354 }
2355 return SV;
2356 }
2357 case NEON::BI__builtin_neon_vdot_s32:
2358 case NEON::BI__builtin_neon_vdot_u32:
2359 case NEON::BI__builtin_neon_vdotq_s32:
2360 case NEON::BI__builtin_neon_vdotq_u32: {
2361 auto *InputTy =
2362 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2363 llvm::Type *Tys[2] = { Ty, InputTy };
2364 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vdot");
2365 }
2366 case NEON::BI__builtin_neon_vfmlal_low_f16:
2367 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2368 auto *InputTy =
2369 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2370 llvm::Type *Tys[2] = { Ty, InputTy };
2371 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_low");
2372 }
2373 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2374 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2375 auto *InputTy =
2376 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2377 llvm::Type *Tys[2] = { Ty, InputTy };
2378 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_low");
2379 }
2380 case NEON::BI__builtin_neon_vfmlal_high_f16:
2381 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2382 auto *InputTy =
2383 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2384 llvm::Type *Tys[2] = { Ty, InputTy };
2385 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_high");
2386 }
2387 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2388 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2389 auto *InputTy =
2390 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2391 llvm::Type *Tys[2] = { Ty, InputTy };
2392 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_high");
2393 }
2394 case NEON::BI__builtin_neon_vmmlaq_s32:
2395 case NEON::BI__builtin_neon_vmmlaq_u32: {
2396 auto *InputTy =
2397 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2398 llvm::Type *Tys[2] = { Ty, InputTy };
2399 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vmmla");
2400 }
2401 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2402 auto *InputTy =
2403 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2405 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusmmla");
2406 }
2407 case NEON::BI__builtin_neon_vusdot_s32:
2408 case NEON::BI__builtin_neon_vusdotq_s32: {
2409 auto *InputTy =
2410 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2412 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusdot");
2413 }
2414 case NEON::BI__builtin_neon_vbfdot_f32:
2415 case NEON::BI__builtin_neon_vbfdotq_f32: {
2416 llvm::Type *InputTy =
2417 llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2419 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vbfdot");
2420 }
2421 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2422 llvm::Type *Tys[1] = { Ty };
2423 Function *F = CGM.getIntrinsic(IID: Int, Tys);
2424 return EmitNeonCall(F, Ops, name: "vcvtfp2bf");
2425 }
2426
2427 }
2428
2429 assert(Int && "Expected valid intrinsic number");
2430
2431 // Determine the type(s) of this overloaded AArch64 intrinsic.
2432 Function *F = LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: Ty, E);
2433
2434 Value *Result = EmitNeonCall(F, Ops, name: NameHint);
2435 llvm::Type *ResultType = ConvertType(T: E->getType());
2436 // AArch64 intrinsic one-element vector type cast to
2437 // scalar type expected by the builtin
2438 return Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: NameHint);
2439}
2440
2441Value *
2442CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
2443 const CmpInst::Predicate Pred,
2444 const Twine &Name) {
2445
2446 if (isa<FixedVectorType>(Val: Ty)) {
2447 // Vector types are cast to i8 vectors. Recover original type.
2448 Op = Builder.CreateBitCast(V: Op, DestTy: Ty);
2449 }
2450
2451 if (CmpInst::isFPPredicate(P: Pred)) {
2452 if (Pred == CmpInst::FCMP_OEQ)
2453 Op = Builder.CreateFCmp(P: Pred, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()));
2454 else
2455 Op = Builder.CreateFCmpS(P: Pred, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()));
2456 } else {
2457 Op = Builder.CreateICmp(P: Pred, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()));
2458 }
2459
2460 llvm::Type *ResTy = Ty;
2461 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Ty))
2462 ResTy = FixedVectorType::get(
2463 ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: VTy->getScalarSizeInBits()),
2464 NumElts: VTy->getNumElements());
2465
2466 return Builder.CreateSExt(V: Op, DestTy: ResTy, Name);
2467}
2468
2469static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
2470 Value *ExtOp, Value *IndexOp,
2471 llvm::Type *ResTy, unsigned IntID,
2472 const char *Name) {
2473 SmallVector<Value *, 2> TblOps;
2474 if (ExtOp)
2475 TblOps.push_back(Elt: ExtOp);
2476
2477 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2478 SmallVector<int, 16> Indices;
2479 auto *TblTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
2480 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2481 Indices.push_back(Elt: 2*i);
2482 Indices.push_back(Elt: 2*i+1);
2483 }
2484
2485 int PairPos = 0, End = Ops.size() - 1;
2486 while (PairPos < End) {
2487 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2488 V2: Ops[PairPos+1], Mask: Indices,
2489 Name));
2490 PairPos += 2;
2491 }
2492
2493 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2494 // of the 128-bit lookup table with zero.
2495 if (PairPos == End) {
2496 Value *ZeroTbl = ConstantAggregateZero::get(Ty: TblTy);
2497 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2498 V2: ZeroTbl, Mask: Indices, Name));
2499 }
2500
2501 Function *TblF;
2502 TblOps.push_back(Elt: IndexOp);
2503 TblF = CGF.CGM.getIntrinsic(IID: IntID, Tys: ResTy);
2504
2505 return CGF.EmitNeonCall(F: TblF, Ops&: TblOps, name: Name);
2506}
2507
2508Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2509 unsigned Value;
2510 switch (BuiltinID) {
2511 default:
2512 return nullptr;
2513 case clang::ARM::BI__builtin_arm_nop:
2514 Value = 0;
2515 break;
2516 case clang::ARM::BI__builtin_arm_yield:
2517 case clang::ARM::BI__yield:
2518 Value = 1;
2519 break;
2520 case clang::ARM::BI__builtin_arm_wfe:
2521 case clang::ARM::BI__wfe:
2522 Value = 2;
2523 break;
2524 case clang::ARM::BI__builtin_arm_wfi:
2525 case clang::ARM::BI__wfi:
2526 Value = 3;
2527 break;
2528 case clang::ARM::BI__builtin_arm_sev:
2529 case clang::ARM::BI__sev:
2530 Value = 4;
2531 break;
2532 case clang::ARM::BI__builtin_arm_sevl:
2533 case clang::ARM::BI__sevl:
2534 Value = 5;
2535 break;
2536 }
2537
2538 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_hint),
2539 Args: llvm::ConstantInt::get(Ty: Int32Ty, V: Value));
2540}
2541
2542enum SpecialRegisterAccessKind {
2543 NormalRead,
2544 VolatileRead,
2545 Write,
2546};
2547
2548// Generates the IR for the read/write special register builtin,
2549// ValueType is the type of the value that is to be written or read,
2550// RegisterType is the type of the register being written to or read from.
2551static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
2552 const CallExpr *E,
2553 llvm::Type *RegisterType,
2554 llvm::Type *ValueType,
2555 SpecialRegisterAccessKind AccessKind,
2556 StringRef SysReg = "") {
2557 // write and register intrinsics only support 32, 64 and 128 bit operations.
2558 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2559 RegisterType->isIntegerTy(128)) &&
2560 "Unsupported size for register.");
2561
2562 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2563 CodeGen::CodeGenModule &CGM = CGF.CGM;
2564 LLVMContext &Context = CGM.getLLVMContext();
2565
2566 if (SysReg.empty()) {
2567 const Expr *SysRegStrExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
2568 SysReg = cast<clang::StringLiteral>(Val: SysRegStrExpr)->getString();
2569 }
2570
2571 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysReg) };
2572 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
2573 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
2574
2575 llvm::Type *Types[] = { RegisterType };
2576
2577 bool MixedTypes = RegisterType->isIntegerTy(Bitwidth: 64) && ValueType->isIntegerTy(Bitwidth: 32);
2578 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2579 && "Can't fit 64-bit value in 32-bit register");
2580
2581 if (AccessKind != Write) {
2582 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2583 llvm::Function *F = CGM.getIntrinsic(
2584 IID: AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2585 : Intrinsic::read_register,
2586 Tys: Types);
2587 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: Metadata);
2588
2589 if (MixedTypes)
2590 // Read into 64 bit register and then truncate result to 32 bit.
2591 return Builder.CreateTrunc(V: Call, DestTy: ValueType);
2592
2593 if (ValueType->isPointerTy())
2594 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2595 return Builder.CreateIntToPtr(V: Call, DestTy: ValueType);
2596
2597 return Call;
2598 }
2599
2600 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
2601 llvm::Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
2602 if (MixedTypes) {
2603 // Extend 32 bit write value to 64 bit to pass to write.
2604 ArgValue = Builder.CreateZExt(V: ArgValue, DestTy: RegisterType);
2605 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2606 }
2607
2608 if (ValueType->isPointerTy()) {
2609 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2610 ArgValue = Builder.CreatePtrToInt(V: ArgValue, DestTy: RegisterType);
2611 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2612 }
2613
2614 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2615}
2616
2617/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2618/// argument that specifies the vector type.
2619static bool HasExtraNeonArgument(unsigned BuiltinID) {
2620 switch (BuiltinID) {
2621 default: break;
2622 case NEON::BI__builtin_neon_vget_lane_i8:
2623 case NEON::BI__builtin_neon_vget_lane_i16:
2624 case NEON::BI__builtin_neon_vget_lane_bf16:
2625 case NEON::BI__builtin_neon_vget_lane_i32:
2626 case NEON::BI__builtin_neon_vget_lane_i64:
2627 case NEON::BI__builtin_neon_vget_lane_mf8:
2628 case NEON::BI__builtin_neon_vget_lane_f32:
2629 case NEON::BI__builtin_neon_vgetq_lane_i8:
2630 case NEON::BI__builtin_neon_vgetq_lane_i16:
2631 case NEON::BI__builtin_neon_vgetq_lane_bf16:
2632 case NEON::BI__builtin_neon_vgetq_lane_i32:
2633 case NEON::BI__builtin_neon_vgetq_lane_i64:
2634 case NEON::BI__builtin_neon_vgetq_lane_mf8:
2635 case NEON::BI__builtin_neon_vgetq_lane_f32:
2636 case NEON::BI__builtin_neon_vduph_lane_bf16:
2637 case NEON::BI__builtin_neon_vduph_laneq_bf16:
2638 case NEON::BI__builtin_neon_vset_lane_i8:
2639 case NEON::BI__builtin_neon_vset_lane_mf8:
2640 case NEON::BI__builtin_neon_vset_lane_i16:
2641 case NEON::BI__builtin_neon_vset_lane_bf16:
2642 case NEON::BI__builtin_neon_vset_lane_i32:
2643 case NEON::BI__builtin_neon_vset_lane_i64:
2644 case NEON::BI__builtin_neon_vset_lane_f32:
2645 case NEON::BI__builtin_neon_vsetq_lane_i8:
2646 case NEON::BI__builtin_neon_vsetq_lane_mf8:
2647 case NEON::BI__builtin_neon_vsetq_lane_i16:
2648 case NEON::BI__builtin_neon_vsetq_lane_bf16:
2649 case NEON::BI__builtin_neon_vsetq_lane_i32:
2650 case NEON::BI__builtin_neon_vsetq_lane_i64:
2651 case NEON::BI__builtin_neon_vsetq_lane_f32:
2652 case NEON::BI__builtin_neon_vsha1h_u32:
2653 case NEON::BI__builtin_neon_vsha1cq_u32:
2654 case NEON::BI__builtin_neon_vsha1pq_u32:
2655 case NEON::BI__builtin_neon_vsha1mq_u32:
2656 case NEON::BI__builtin_neon_vcvth_bf16_f32:
2657 case clang::ARM::BI_MoveToCoprocessor:
2658 case clang::ARM::BI_MoveToCoprocessor2:
2659 return false;
2660 }
2661 return true;
2662}
2663
2664Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2665 const CallExpr *E,
2666 ReturnValueSlot ReturnValue,
2667 llvm::Triple::ArchType Arch) {
2668 if (auto Hint = GetValueForARMHint(BuiltinID))
2669 return Hint;
2670
2671 if (BuiltinID == clang::ARM::BI__emit) {
2672 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2673 llvm::FunctionType *FTy =
2674 llvm::FunctionType::get(Result: VoidTy, /*Variadic=*/isVarArg: false);
2675
2676 Expr::EvalResult Result;
2677 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2678 llvm_unreachable("Sema will ensure that the parameter is constant");
2679
2680 llvm::APSInt Value = Result.Val.getInt();
2681 uint64_t ZExtValue = Value.zextOrTrunc(width: IsThumb ? 16 : 32).getZExtValue();
2682
2683 llvm::InlineAsm *Emit =
2684 IsThumb ? InlineAsm::get(Ty: FTy, AsmString: ".inst.n 0x" + utohexstr(X: ZExtValue), Constraints: "",
2685 /*hasSideEffects=*/true)
2686 : InlineAsm::get(Ty: FTy, AsmString: ".inst 0x" + utohexstr(X: ZExtValue), Constraints: "",
2687 /*hasSideEffects=*/true);
2688
2689 return Builder.CreateCall(Callee: Emit);
2690 }
2691
2692 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2693 Value *Option = EmitScalarExpr(E: E->getArg(Arg: 0));
2694 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_dbg), Args: Option);
2695 }
2696
2697 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2698 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
2699 Value *RW = EmitScalarExpr(E: E->getArg(Arg: 1));
2700 Value *IsData = EmitScalarExpr(E: E->getArg(Arg: 2));
2701
2702 // Locality is not supported on ARM target
2703 Value *Locality = llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
2704
2705 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
2706 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, IsData});
2707 }
2708
2709 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2710 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2711 return Builder.CreateCall(
2712 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
2713 }
2714
2715 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2716 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2717 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2718 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
2719 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
2720 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2721 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
2722 return Res;
2723 }
2724
2725
2726 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2727 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2728 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls), Args: Arg, Name: "cls");
2729 }
2730 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2731 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2732 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls64), Args: Arg,
2733 Name: "cls");
2734 }
2735
2736 if (BuiltinID == clang::ARM::BI__clear_cache) {
2737 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2738 const FunctionDecl *FD = E->getDirectCallee();
2739 Value *Ops[2];
2740 for (unsigned i = 0; i < 2; i++)
2741 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
2742 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
2743 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
2744 StringRef Name = FD->getName();
2745 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
2746 }
2747
2748 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2749 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2750 Function *F;
2751
2752 switch (BuiltinID) {
2753 default: llvm_unreachable("unexpected builtin");
2754 case clang::ARM::BI__builtin_arm_mcrr:
2755 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr);
2756 break;
2757 case clang::ARM::BI__builtin_arm_mcrr2:
2758 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr2);
2759 break;
2760 }
2761
2762 // MCRR{2} instruction has 5 operands but
2763 // the intrinsic has 4 because Rt and Rt2
2764 // are represented as a single unsigned 64
2765 // bit integer in the intrinsic definition
2766 // but internally it's represented as 2 32
2767 // bit integers.
2768
2769 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2770 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2771 Value *RtAndRt2 = EmitScalarExpr(E: E->getArg(Arg: 2));
2772 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 3));
2773
2774 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2775 Value *Rt = Builder.CreateTruncOrBitCast(V: RtAndRt2, DestTy: Int32Ty);
2776 Value *Rt2 = Builder.CreateLShr(LHS: RtAndRt2, RHS: C1);
2777 Rt2 = Builder.CreateTruncOrBitCast(V: Rt2, DestTy: Int32Ty);
2778
2779 return Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, Rt, Rt2, CRm});
2780 }
2781
2782 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2783 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2784 Function *F;
2785
2786 switch (BuiltinID) {
2787 default: llvm_unreachable("unexpected builtin");
2788 case clang::ARM::BI__builtin_arm_mrrc:
2789 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc);
2790 break;
2791 case clang::ARM::BI__builtin_arm_mrrc2:
2792 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc2);
2793 break;
2794 }
2795
2796 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2797 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2798 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 2));
2799 Value *RtAndRt2 = Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, CRm});
2800
2801 // Returns an unsigned 64 bit integer, represented
2802 // as two 32 bit integers.
2803
2804 Value *Rt = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 1);
2805 Value *Rt1 = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 0);
2806 Rt = Builder.CreateZExt(V: Rt, DestTy: Int64Ty);
2807 Rt1 = Builder.CreateZExt(V: Rt1, DestTy: Int64Ty);
2808
2809 Value *ShiftCast = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2810 RtAndRt2 = Builder.CreateShl(LHS: Rt, RHS: ShiftCast, Name: "shl", HasNUW: true);
2811 RtAndRt2 = Builder.CreateOr(LHS: RtAndRt2, RHS: Rt1);
2812
2813 return Builder.CreateBitCast(V: RtAndRt2, DestTy: ConvertType(T: E->getType()));
2814 }
2815
2816 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2817 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2818 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2819 getContext().getTypeSize(T: E->getType()) == 64) ||
2820 BuiltinID == clang::ARM::BI__ldrexd) {
2821 Function *F;
2822
2823 switch (BuiltinID) {
2824 default: llvm_unreachable("unexpected builtin");
2825 case clang::ARM::BI__builtin_arm_ldaex:
2826 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldaexd);
2827 break;
2828 case clang::ARM::BI__builtin_arm_ldrexd:
2829 case clang::ARM::BI__builtin_arm_ldrex:
2830 case clang::ARM::BI__ldrexd:
2831 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldrexd);
2832 break;
2833 }
2834
2835 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
2836 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldrexd");
2837
2838 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2839 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2840 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int64Ty);
2841 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int64Ty);
2842
2843 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2844 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
2845 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
2846 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
2847 }
2848
2849 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2850 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2851 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
2852
2853 QualType Ty = E->getType();
2854 llvm::Type *RealResTy = ConvertType(T: Ty);
2855 llvm::Type *IntTy =
2856 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2857
2858 Function *F = CGM.getIntrinsic(
2859 IID: BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2860 : Intrinsic::arm_ldrex,
2861 Tys: UnqualPtrTy);
2862 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldrex");
2863 Val->addParamAttr(
2864 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
2865
2866 if (RealResTy->isPointerTy())
2867 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
2868 else {
2869 llvm::Type *IntResTy = llvm::IntegerType::get(
2870 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
2871 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
2872 DestTy: RealResTy);
2873 }
2874 }
2875
2876 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2877 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2878 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2879 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 64)) {
2880 Function *F = CGM.getIntrinsic(
2881 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2882 : Intrinsic::arm_strexd);
2883 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty);
2884
2885 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
2886 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
2887 Builder.CreateStore(Val, Addr: Tmp);
2888
2889 Address LdPtr = Tmp.withElementType(ElemTy: STy);
2890 Val = Builder.CreateLoad(Addr: LdPtr);
2891
2892 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2893 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2894 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
2895 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "strexd");
2896 }
2897
2898 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2899 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2900 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
2901 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
2902
2903 QualType Ty = E->getArg(Arg: 0)->getType();
2904 llvm::Type *StoreTy =
2905 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2906
2907 if (StoreVal->getType()->isPointerTy())
2908 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int32Ty);
2909 else {
2910 llvm::Type *IntTy = llvm::IntegerType::get(
2911 C&: getLLVMContext(),
2912 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
2913 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
2914 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int32Ty);
2915 }
2916
2917 Function *F = CGM.getIntrinsic(
2918 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2919 : Intrinsic::arm_strex,
2920 Tys: StoreAddr->getType());
2921
2922 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "strex");
2923 CI->addParamAttr(
2924 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
2925 return CI;
2926 }
2927
2928 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
2929 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_clrex);
2930 return Builder.CreateCall(Callee: F);
2931 }
2932
2933 // CRC32
2934 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2935 switch (BuiltinID) {
2936 case clang::ARM::BI__builtin_arm_crc32b:
2937 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2938 case clang::ARM::BI__builtin_arm_crc32cb:
2939 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2940 case clang::ARM::BI__builtin_arm_crc32h:
2941 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2942 case clang::ARM::BI__builtin_arm_crc32ch:
2943 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2944 case clang::ARM::BI__builtin_arm_crc32w:
2945 case clang::ARM::BI__builtin_arm_crc32d:
2946 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2947 case clang::ARM::BI__builtin_arm_crc32cw:
2948 case clang::ARM::BI__builtin_arm_crc32cd:
2949 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2950 }
2951
2952 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2953 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
2954 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2955
2956 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
2957 // intrinsics, hence we need different codegen for these cases.
2958 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
2959 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
2960 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2961 Value *Arg1a = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: Int32Ty);
2962 Value *Arg1b = Builder.CreateLShr(LHS: Arg1, RHS: C1);
2963 Arg1b = Builder.CreateTruncOrBitCast(V: Arg1b, DestTy: Int32Ty);
2964
2965 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
2966 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg0, Arg1a});
2967 return Builder.CreateCall(Callee: F, Args: {Res, Arg1b});
2968 } else {
2969 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: Int32Ty);
2970
2971 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
2972 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
2973 }
2974 }
2975
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2978 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2979 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
2980 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
2981 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
2982
2983 SpecialRegisterAccessKind AccessKind = Write;
2984 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
2985 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2986 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
2987 AccessKind = VolatileRead;
2988
2989 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
2990 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
2991
2992 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
2993 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
2994
2995 llvm::Type *ValueType;
2996 llvm::Type *RegisterType;
2997 if (IsPointerBuiltin) {
2998 ValueType = VoidPtrTy;
2999 RegisterType = Int32Ty;
3000 } else if (Is64Bit) {
3001 ValueType = RegisterType = Int64Ty;
3002 } else {
3003 ValueType = RegisterType = Int32Ty;
3004 }
3005
3006 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
3007 AccessKind);
3008 }
3009
3010 if (BuiltinID == ARM::BI__builtin_sponentry) {
3011 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
3012 return Builder.CreateCall(Callee: F);
3013 }
3014
3015 // Handle MSVC intrinsics before argument evaluation to prevent double
3016 // evaluation.
3017 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3018 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
3019
3020 // Deal with MVE builtins
3021 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3022 return Result;
3023 // Handle CDE builtins
3024 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3025 return Result;
3026
3027 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3028 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
3029 return P.first == BuiltinID;
3030 });
3031 if (It != end(arr: NEONEquivalentIntrinsicMap))
3032 BuiltinID = It->second;
3033
3034 // Find out if any arguments are required to be integer constant
3035 // expressions.
3036 unsigned ICEArguments = 0;
3037 ASTContext::GetBuiltinTypeError Error;
3038 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
3039 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3040
3041 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3042 return Builder.getInt32(C: addr.getAlignment().getQuantity());
3043 };
3044
3045 Address PtrOp0 = Address::invalid();
3046 Address PtrOp1 = Address::invalid();
3047 SmallVector<Value*, 4> Ops;
3048 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3049 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3050 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3051 if (i == 0) {
3052 switch (BuiltinID) {
3053 case NEON::BI__builtin_neon_vld1_v:
3054 case NEON::BI__builtin_neon_vld1q_v:
3055 case NEON::BI__builtin_neon_vld1q_lane_v:
3056 case NEON::BI__builtin_neon_vld1_lane_v:
3057 case NEON::BI__builtin_neon_vld1_dup_v:
3058 case NEON::BI__builtin_neon_vld1q_dup_v:
3059 case NEON::BI__builtin_neon_vst1_v:
3060 case NEON::BI__builtin_neon_vst1q_v:
3061 case NEON::BI__builtin_neon_vst1q_lane_v:
3062 case NEON::BI__builtin_neon_vst1_lane_v:
3063 case NEON::BI__builtin_neon_vst2_v:
3064 case NEON::BI__builtin_neon_vst2q_v:
3065 case NEON::BI__builtin_neon_vst2_lane_v:
3066 case NEON::BI__builtin_neon_vst2q_lane_v:
3067 case NEON::BI__builtin_neon_vst3_v:
3068 case NEON::BI__builtin_neon_vst3q_v:
3069 case NEON::BI__builtin_neon_vst3_lane_v:
3070 case NEON::BI__builtin_neon_vst3q_lane_v:
3071 case NEON::BI__builtin_neon_vst4_v:
3072 case NEON::BI__builtin_neon_vst4q_v:
3073 case NEON::BI__builtin_neon_vst4_lane_v:
3074 case NEON::BI__builtin_neon_vst4q_lane_v:
3075 // Get the alignment for the argument in addition to the value;
3076 // we'll use it later.
3077 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
3078 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
3079 continue;
3080 }
3081 }
3082 if (i == 1) {
3083 switch (BuiltinID) {
3084 case NEON::BI__builtin_neon_vld2_v:
3085 case NEON::BI__builtin_neon_vld2q_v:
3086 case NEON::BI__builtin_neon_vld3_v:
3087 case NEON::BI__builtin_neon_vld3q_v:
3088 case NEON::BI__builtin_neon_vld4_v:
3089 case NEON::BI__builtin_neon_vld4q_v:
3090 case NEON::BI__builtin_neon_vld2_lane_v:
3091 case NEON::BI__builtin_neon_vld2q_lane_v:
3092 case NEON::BI__builtin_neon_vld3_lane_v:
3093 case NEON::BI__builtin_neon_vld3q_lane_v:
3094 case NEON::BI__builtin_neon_vld4_lane_v:
3095 case NEON::BI__builtin_neon_vld4q_lane_v:
3096 case NEON::BI__builtin_neon_vld2_dup_v:
3097 case NEON::BI__builtin_neon_vld2q_dup_v:
3098 case NEON::BI__builtin_neon_vld3_dup_v:
3099 case NEON::BI__builtin_neon_vld3q_dup_v:
3100 case NEON::BI__builtin_neon_vld4_dup_v:
3101 case NEON::BI__builtin_neon_vld4q_dup_v:
3102 // Get the alignment for the argument in addition to the value;
3103 // we'll use it later.
3104 PtrOp1 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
3105 Ops.push_back(Elt: PtrOp1.emitRawPointer(CGF&: *this));
3106 continue;
3107 }
3108 }
3109
3110 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
3111 }
3112
3113 switch (BuiltinID) {
3114 default: break;
3115
3116 case NEON::BI__builtin_neon_vget_lane_i8:
3117 case NEON::BI__builtin_neon_vget_lane_i16:
3118 case NEON::BI__builtin_neon_vget_lane_i32:
3119 case NEON::BI__builtin_neon_vget_lane_i64:
3120 case NEON::BI__builtin_neon_vget_lane_bf16:
3121 case NEON::BI__builtin_neon_vget_lane_f32:
3122 case NEON::BI__builtin_neon_vgetq_lane_i8:
3123 case NEON::BI__builtin_neon_vgetq_lane_i16:
3124 case NEON::BI__builtin_neon_vgetq_lane_i32:
3125 case NEON::BI__builtin_neon_vgetq_lane_i64:
3126 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3127 case NEON::BI__builtin_neon_vgetq_lane_f32:
3128 case NEON::BI__builtin_neon_vduph_lane_bf16:
3129 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3130 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
3131
3132 case NEON::BI__builtin_neon_vrndns_f32: {
3133 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
3134 llvm::Type *Tys[] = {Arg->getType()};
3135 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrintn, Tys);
3136 return Builder.CreateCall(Callee: F, Args: {Arg}, Name: "vrndn"); }
3137
3138 case NEON::BI__builtin_neon_vset_lane_i8:
3139 case NEON::BI__builtin_neon_vset_lane_i16:
3140 case NEON::BI__builtin_neon_vset_lane_i32:
3141 case NEON::BI__builtin_neon_vset_lane_i64:
3142 case NEON::BI__builtin_neon_vset_lane_bf16:
3143 case NEON::BI__builtin_neon_vset_lane_f32:
3144 case NEON::BI__builtin_neon_vsetq_lane_i8:
3145 case NEON::BI__builtin_neon_vsetq_lane_i16:
3146 case NEON::BI__builtin_neon_vsetq_lane_i32:
3147 case NEON::BI__builtin_neon_vsetq_lane_i64:
3148 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3149 case NEON::BI__builtin_neon_vsetq_lane_f32:
3150 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
3151
3152 case NEON::BI__builtin_neon_vsha1h_u32:
3153 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1h), Ops,
3154 name: "vsha1h");
3155 case NEON::BI__builtin_neon_vsha1cq_u32:
3156 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1c), Ops,
3157 name: "vsha1h");
3158 case NEON::BI__builtin_neon_vsha1pq_u32:
3159 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1p), Ops,
3160 name: "vsha1h");
3161 case NEON::BI__builtin_neon_vsha1mq_u32:
3162 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1m), Ops,
3163 name: "vsha1h");
3164
3165 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3166 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3167 name: "vcvtbfp2bf");
3168 }
3169
3170 // The ARM _MoveToCoprocessor builtins put the input register value as
3171 // the first argument, but the LLVM intrinsic expects it as the third one.
3172 case clang::ARM::BI_MoveToCoprocessor:
3173 case clang::ARM::BI_MoveToCoprocessor2: {
3174 Function *F = CGM.getIntrinsic(IID: BuiltinID == clang::ARM::BI_MoveToCoprocessor
3175 ? Intrinsic::arm_mcr
3176 : Intrinsic::arm_mcr2);
3177 return Builder.CreateCall(Callee: F, Args: {Ops[1], Ops[2], Ops[0],
3178 Ops[3], Ops[4], Ops[5]});
3179 }
3180 }
3181
3182 // Get the last argument, which specifies the vector type.
3183 assert(HasExtraArg);
3184 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
3185 std::optional<llvm::APSInt> Result =
3186 Arg->getIntegerConstantExpr(Ctx: getContext());
3187 if (!Result)
3188 return nullptr;
3189
3190 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3191 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3192 // Determine the overloaded type of this builtin.
3193 llvm::Type *Ty;
3194 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3195 Ty = FloatTy;
3196 else
3197 Ty = DoubleTy;
3198
3199 // Determine whether this is an unsigned conversion or not.
3200 bool usgn = Result->getZExtValue() == 1;
3201 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3202
3203 // Call the appropriate intrinsic.
3204 Function *F = CGM.getIntrinsic(IID: Int, Tys: Ty);
3205 return Builder.CreateCall(Callee: F, Args: Ops, Name: "vcvtr");
3206 }
3207
3208 // Determine the type of this overloaded NEON intrinsic.
3209 NeonTypeFlags Type = Result->getZExtValue();
3210 bool usgn = Type.isUnsigned();
3211 bool rightShift = false;
3212
3213 llvm::FixedVectorType *VTy =
3214 GetNeonType(CGF: this, TypeFlags: Type, HasLegalHalfType: getTarget().hasLegalHalfType(), V1Ty: false,
3215 AllowBFloatArgsAndRet: getTarget().hasBFloat16Type());
3216 llvm::Type *Ty = VTy;
3217 if (!Ty)
3218 return nullptr;
3219
3220 // Many NEON builtins have identical semantics and uses in ARM and
3221 // AArch64. Emit these in a single function.
3222 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3223 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3224 IntrinsicMap, BuiltinID, MapProvenSorted&: NEONSIMDIntrinsicsProvenSorted);
3225 if (Builtin)
3226 return EmitCommonNeonBuiltinExpr(
3227 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
3228 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3229
3230 unsigned Int;
3231 switch (BuiltinID) {
3232 default: return nullptr;
3233 case NEON::BI__builtin_neon_vld1q_lane_v:
3234 // Handle 64-bit integer elements as a special case. Use shuffles of
3235 // one-element vectors to avoid poor code for i64 in the backend.
3236 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3237 // Extract the other lane.
3238 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3239 int Lane = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
3240 Value *SV = llvm::ConstantVector::get(V: ConstantInt::get(Ty: Int32Ty, V: 1-Lane));
3241 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3242 // Load the value as a one-element vector.
3243 Ty = llvm::FixedVectorType::get(ElementType: VTy->getElementType(), NumElts: 1);
3244 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3245 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_neon_vld1, Tys);
3246 Value *Align = getAlignmentValue32(PtrOp0);
3247 Value *Ld = Builder.CreateCall(Callee: F, Args: {Ops[0], Align});
3248 // Combine them.
3249 int Indices[] = {1 - Lane, Lane};
3250 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ld, Mask: Indices, Name: "vld1q_lane");
3251 }
3252 [[fallthrough]];
3253 case NEON::BI__builtin_neon_vld1_lane_v: {
3254 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3255 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
3256 Value *Ld = Builder.CreateLoad(Addr: PtrOp0);
3257 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ld, Idx: Ops[2], Name: "vld1_lane");
3258 }
3259 case NEON::BI__builtin_neon_vqrshrn_n_v:
3260 Int =
3261 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3262 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n",
3263 shift: 1, rightshift: true);
3264 case NEON::BI__builtin_neon_vqrshrun_n_v:
3265 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqrshiftnsu, Tys: Ty),
3266 Ops, name: "vqrshrun_n", shift: 1, rightshift: true);
3267 case NEON::BI__builtin_neon_vqshrn_n_v:
3268 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3269 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n",
3270 shift: 1, rightshift: true);
3271 case NEON::BI__builtin_neon_vqshrun_n_v:
3272 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqshiftnsu, Tys: Ty),
3273 Ops, name: "vqshrun_n", shift: 1, rightshift: true);
3274 case NEON::BI__builtin_neon_vrecpe_v:
3275 case NEON::BI__builtin_neon_vrecpeq_v:
3276 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrecpe, Tys: Ty),
3277 Ops, name: "vrecpe");
3278 case NEON::BI__builtin_neon_vrshrn_n_v:
3279 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrshiftn, Tys: Ty),
3280 Ops, name: "vrshrn_n", shift: 1, rightshift: true);
3281 case NEON::BI__builtin_neon_vrsra_n_v:
3282 case NEON::BI__builtin_neon_vrsraq_n_v:
3283 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3284 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3285 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: true);
3286 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3287 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Ty), Args: {Ops[1], Ops[2]});
3288 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vrsra_n");
3289 case NEON::BI__builtin_neon_vsri_n_v:
3290 case NEON::BI__builtin_neon_vsriq_n_v:
3291 rightShift = true;
3292 [[fallthrough]];
3293 case NEON::BI__builtin_neon_vsli_n_v:
3294 case NEON::BI__builtin_neon_vsliq_n_v:
3295 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: rightShift);
3296 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vshiftins, Tys: Ty),
3297 Ops, name: "vsli_n");
3298 case NEON::BI__builtin_neon_vsra_n_v:
3299 case NEON::BI__builtin_neon_vsraq_n_v:
3300 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3301 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
3302 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
3303 case NEON::BI__builtin_neon_vst1q_lane_v:
3304 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3305 // a one-element vector and avoid poor code for i64 in the backend.
3306 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3307 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3308 Value *SV = llvm::ConstantVector::get(V: cast<llvm::Constant>(Val: Ops[2]));
3309 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3310 Ops[2] = getAlignmentValue32(PtrOp0);
3311 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3312 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vst1,
3313 Tys), Args: Ops);
3314 }
3315 [[fallthrough]];
3316 case NEON::BI__builtin_neon_vst1_lane_v: {
3317 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3318 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
3319 return Builder.CreateStore(Val: Ops[1],
3320 Addr: PtrOp0.withElementType(ElemTy: Ops[1]->getType()));
3321 }
3322 case NEON::BI__builtin_neon_vtbl1_v:
3323 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl1),
3324 Ops, name: "vtbl1");
3325 case NEON::BI__builtin_neon_vtbl2_v:
3326 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl2),
3327 Ops, name: "vtbl2");
3328 case NEON::BI__builtin_neon_vtbl3_v:
3329 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl3),
3330 Ops, name: "vtbl3");
3331 case NEON::BI__builtin_neon_vtbl4_v:
3332 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl4),
3333 Ops, name: "vtbl4");
3334 case NEON::BI__builtin_neon_vtbx1_v:
3335 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx1),
3336 Ops, name: "vtbx1");
3337 case NEON::BI__builtin_neon_vtbx2_v:
3338 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx2),
3339 Ops, name: "vtbx2");
3340 case NEON::BI__builtin_neon_vtbx3_v:
3341 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx3),
3342 Ops, name: "vtbx3");
3343 case NEON::BI__builtin_neon_vtbx4_v:
3344 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx4),
3345 Ops, name: "vtbx4");
3346 }
3347}
3348
3349template<typename Integer>
3350static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
3351 return E->getIntegerConstantExpr(Ctx: Context)->getExtValue();
3352}
3353
3354static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3355 llvm::Type *T, bool Unsigned) {
3356 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3357 // which finds it convenient to specify signed/unsigned as a boolean flag.
3358 return Unsigned ? Builder.CreateZExt(V, DestTy: T) : Builder.CreateSExt(V, DestTy: T);
3359}
3360
3361static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3362 uint32_t Shift, bool Unsigned) {
3363 // MVE helper function for integer shift right. This must handle signed vs
3364 // unsigned, and also deal specially with the case where the shift count is
3365 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3366 // undefined behavior, but in MVE it's legal, so we must convert it to code
3367 // that is not undefined in IR.
3368 unsigned LaneBits = cast<llvm::VectorType>(Val: V->getType())
3369 ->getElementType()
3370 ->getPrimitiveSizeInBits();
3371 if (Shift == LaneBits) {
3372 // An unsigned shift of the full lane size always generates zero, so we can
3373 // simply emit a zero vector. A signed shift of the full lane size does the
3374 // same thing as shifting by one bit fewer.
3375 if (Unsigned)
3376 return llvm::Constant::getNullValue(Ty: V->getType());
3377 else
3378 --Shift;
3379 }
3380 return Unsigned ? Builder.CreateLShr(LHS: V, RHS: Shift) : Builder.CreateAShr(LHS: V, RHS: Shift);
3381}
3382
3383static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3384 // MVE-specific helper function for a vector splat, which infers the element
3385 // count of the output vector by knowing that MVE vectors are all 128 bits
3386 // wide.
3387 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3388 return Builder.CreateVectorSplat(NumElts: Elements, V);
3389}
3390
3391static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3392 CodeGenFunction *CGF,
3393 llvm::Value *V,
3394 llvm::Type *DestType) {
3395 // Convert one MVE vector type into another by reinterpreting its in-register
3396 // format.
3397 //
3398 // Little-endian, this is identical to a bitcast (which reinterprets the
3399 // memory format). But big-endian, they're not necessarily the same, because
3400 // the register and memory formats map to each other differently depending on
3401 // the lane size.
3402 //
3403 // We generate a bitcast whenever we can (if we're little-endian, or if the
3404 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3405 // that performs the different kind of reinterpretation.
3406 if (CGF->getTarget().isBigEndian() &&
3407 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3408 return Builder.CreateCall(
3409 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vreinterpretq,
3410 Tys: {DestType, V->getType()}),
3411 Args: V);
3412 } else {
3413 return Builder.CreateBitCast(V, DestTy: DestType);
3414 }
3415}
3416
3417static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3418 // Make a shufflevector that extracts every other element of a vector (evens
3419 // or odds, as desired).
3420 SmallVector<int, 16> Indices;
3421 unsigned InputElements =
3422 cast<llvm::FixedVectorType>(Val: V->getType())->getNumElements();
3423 for (unsigned i = 0; i < InputElements; i += 2)
3424 Indices.push_back(Elt: i + Odd);
3425 return Builder.CreateShuffleVector(V, Mask: Indices);
3426}
3427
3428static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3429 llvm::Value *V1) {
3430 // Make a shufflevector that interleaves two vectors element by element.
3431 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3432 SmallVector<int, 16> Indices;
3433 unsigned InputElements =
3434 cast<llvm::FixedVectorType>(Val: V0->getType())->getNumElements();
3435 for (unsigned i = 0; i < InputElements; i++) {
3436 Indices.push_back(Elt: i);
3437 Indices.push_back(Elt: i + InputElements);
3438 }
3439 return Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: Indices);
3440}
3441
3442template<unsigned HighBit, unsigned OtherBits>
3443static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3444 // MVE-specific helper function to make a vector splat of a constant such as
3445 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3446 llvm::Type *T = cast<llvm::VectorType>(Val: VT)->getElementType();
3447 unsigned LaneBits = T->getPrimitiveSizeInBits();
3448 uint32_t Value = HighBit << (LaneBits - 1);
3449 if (OtherBits)
3450 Value |= (1UL << (LaneBits - 1)) - 1;
3451 llvm::Value *Lane = llvm::ConstantInt::get(Ty: T, V: Value);
3452 return ARMMVEVectorSplat(Builder, V: Lane);
3453}
3454
3455static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3456 llvm::Value *V,
3457 unsigned ReverseWidth) {
3458 // MVE-specific helper function which reverses the elements of a
3459 // vector within every (ReverseWidth)-bit collection of lanes.
3460 SmallVector<int, 16> Indices;
3461 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3462 unsigned Elements = 128 / LaneSize;
3463 unsigned Mask = ReverseWidth / LaneSize - 1;
3464 for (unsigned i = 0; i < Elements; i++)
3465 Indices.push_back(Elt: i ^ Mask);
3466 return Builder.CreateShuffleVector(V, Mask: Indices);
3467}
3468
3469Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
3470 const CallExpr *E,
3471 ReturnValueSlot ReturnValue,
3472 llvm::Triple::ArchType Arch) {
3473 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3474 Intrinsic::ID IRIntr;
3475 unsigned NumVectors;
3476
3477 // Code autogenerated by Tablegen will handle all the simple builtins.
3478 switch (BuiltinID) {
3479 #include "clang/Basic/arm_mve_builtin_cg.inc"
3480
3481 // If we didn't match an MVE builtin id at all, go back to the
3482 // main EmitARMBuiltinExpr.
3483 default:
3484 return nullptr;
3485 }
3486
3487 // Anything that breaks from that switch is an MVE builtin that
3488 // needs handwritten code to generate.
3489
3490 switch (CustomCodeGenType) {
3491
3492 case CustomCodeGen::VLD24: {
3493 llvm::SmallVector<Value *, 4> Ops;
3494 llvm::SmallVector<llvm::Type *, 4> Tys;
3495
3496 auto MvecCType = E->getType();
3497 auto MvecLType = ConvertType(T: MvecCType);
3498 assert(MvecLType->isStructTy() &&
3499 "Return type for vld[24]q should be a struct");
3500 assert(MvecLType->getStructNumElements() == 1 &&
3501 "Return-type struct for vld[24]q should have one element");
3502 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3503 assert(MvecLTypeInner->isArrayTy() &&
3504 "Return-type struct for vld[24]q should contain an array");
3505 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3506 "Array member of return-type struct vld[24]q has wrong length");
3507 auto VecLType = MvecLTypeInner->getArrayElementType();
3508
3509 Tys.push_back(Elt: VecLType);
3510
3511 auto Addr = E->getArg(Arg: 0);
3512 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3513 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3514
3515 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3516 Value *LoadResult = Builder.CreateCall(Callee: F, Args: Ops);
3517 Value *MvecOut = PoisonValue::get(T: MvecLType);
3518 for (unsigned i = 0; i < NumVectors; ++i) {
3519 Value *Vec = Builder.CreateExtractValue(Agg: LoadResult, Idxs: i);
3520 MvecOut = Builder.CreateInsertValue(Agg: MvecOut, Val: Vec, Idxs: {0, i});
3521 }
3522
3523 if (ReturnValue.isNull())
3524 return MvecOut;
3525 else
3526 return Builder.CreateStore(Val: MvecOut, Addr: ReturnValue.getAddress());
3527 }
3528
3529 case CustomCodeGen::VST24: {
3530 llvm::SmallVector<Value *, 4> Ops;
3531 llvm::SmallVector<llvm::Type *, 4> Tys;
3532
3533 auto Addr = E->getArg(Arg: 0);
3534 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3535 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3536
3537 auto MvecCType = E->getArg(Arg: 1)->getType();
3538 auto MvecLType = ConvertType(T: MvecCType);
3539 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3540 assert(MvecLType->getStructNumElements() == 1 &&
3541 "Data-type struct for vst2q should have one element");
3542 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3543 assert(MvecLTypeInner->isArrayTy() &&
3544 "Data-type struct for vst2q should contain an array");
3545 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3546 "Array member of return-type struct vld[24]q has wrong length");
3547 auto VecLType = MvecLTypeInner->getArrayElementType();
3548
3549 Tys.push_back(Elt: VecLType);
3550
3551 AggValueSlot MvecSlot = CreateAggTemp(T: MvecCType);
3552 EmitAggExpr(E: E->getArg(Arg: 1), AS: MvecSlot);
3553 auto Mvec = Builder.CreateLoad(Addr: MvecSlot.getAddress());
3554 for (unsigned i = 0; i < NumVectors; i++)
3555 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Mvec, Idxs: {0, i}));
3556
3557 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3558 Value *ToReturn = nullptr;
3559 for (unsigned i = 0; i < NumVectors; i++) {
3560 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: i));
3561 ToReturn = Builder.CreateCall(Callee: F, Args: Ops);
3562 Ops.pop_back();
3563 }
3564 return ToReturn;
3565 }
3566 }
3567 llvm_unreachable("unknown custom codegen type.");
3568}
3569
3570Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
3571 const CallExpr *E,
3572 ReturnValueSlot ReturnValue,
3573 llvm::Triple::ArchType Arch) {
3574 switch (BuiltinID) {
3575 default:
3576 return nullptr;
3577#include "clang/Basic/arm_cde_builtin_cg.inc"
3578 }
3579}
3580
3581static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3582 const CallExpr *E,
3583 SmallVectorImpl<Value *> &Ops,
3584 llvm::Triple::ArchType Arch) {
3585 unsigned int Int = 0;
3586 const char *s = nullptr;
3587
3588 switch (BuiltinID) {
3589 default:
3590 return nullptr;
3591 case NEON::BI__builtin_neon_vtbl1_v:
3592 case NEON::BI__builtin_neon_vqtbl1_v:
3593 case NEON::BI__builtin_neon_vqtbl1q_v:
3594 case NEON::BI__builtin_neon_vtbl2_v:
3595 case NEON::BI__builtin_neon_vqtbl2_v:
3596 case NEON::BI__builtin_neon_vqtbl2q_v:
3597 case NEON::BI__builtin_neon_vtbl3_v:
3598 case NEON::BI__builtin_neon_vqtbl3_v:
3599 case NEON::BI__builtin_neon_vqtbl3q_v:
3600 case NEON::BI__builtin_neon_vtbl4_v:
3601 case NEON::BI__builtin_neon_vqtbl4_v:
3602 case NEON::BI__builtin_neon_vqtbl4q_v:
3603 break;
3604 case NEON::BI__builtin_neon_vtbx1_v:
3605 case NEON::BI__builtin_neon_vqtbx1_v:
3606 case NEON::BI__builtin_neon_vqtbx1q_v:
3607 case NEON::BI__builtin_neon_vtbx2_v:
3608 case NEON::BI__builtin_neon_vqtbx2_v:
3609 case NEON::BI__builtin_neon_vqtbx2q_v:
3610 case NEON::BI__builtin_neon_vtbx3_v:
3611 case NEON::BI__builtin_neon_vqtbx3_v:
3612 case NEON::BI__builtin_neon_vqtbx3q_v:
3613 case NEON::BI__builtin_neon_vtbx4_v:
3614 case NEON::BI__builtin_neon_vqtbx4_v:
3615 case NEON::BI__builtin_neon_vqtbx4q_v:
3616 break;
3617 }
3618
3619 assert(E->getNumArgs() >= 3);
3620
3621 // Get the last argument, which specifies the vector type.
3622 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
3623 std::optional<llvm::APSInt> Result =
3624 Arg->getIntegerConstantExpr(Ctx: CGF.getContext());
3625 if (!Result)
3626 return nullptr;
3627
3628 // Determine the type of this overloaded NEON intrinsic.
3629 NeonTypeFlags Type = Result->getZExtValue();
3630 llvm::FixedVectorType *Ty = GetNeonType(CGF: &CGF, TypeFlags: Type);
3631 if (!Ty)
3632 return nullptr;
3633
3634 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3635
3636 // AArch64 scalar builtins are not overloaded, they do not have an extra
3637 // argument that specifies the vector type, need to handle each case.
3638 switch (BuiltinID) {
3639 case NEON::BI__builtin_neon_vtbl1_v: {
3640 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 1), ExtOp: nullptr, IndexOp: Ops[1],
3641 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3642 }
3643 case NEON::BI__builtin_neon_vtbl2_v: {
3644 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 2), ExtOp: nullptr, IndexOp: Ops[2],
3645 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3646 }
3647 case NEON::BI__builtin_neon_vtbl3_v: {
3648 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 3), ExtOp: nullptr, IndexOp: Ops[3],
3649 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3650 }
3651 case NEON::BI__builtin_neon_vtbl4_v: {
3652 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 4), ExtOp: nullptr, IndexOp: Ops[4],
3653 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3654 }
3655 case NEON::BI__builtin_neon_vtbx1_v: {
3656 Value *TblRes =
3657 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 1), ExtOp: nullptr, IndexOp: Ops[2], ResTy: Ty,
3658 IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3659
3660 llvm::Constant *EightV = ConstantInt::get(Ty, V: 8);
3661 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[2], RHS: EightV);
3662 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3663
3664 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3665 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3666 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3667 }
3668 case NEON::BI__builtin_neon_vtbx2_v: {
3669 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 2), ExtOp: Ops[0], IndexOp: Ops[3],
3670 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx1, Name: "vtbx1");
3671 }
3672 case NEON::BI__builtin_neon_vtbx3_v: {
3673 Value *TblRes =
3674 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 3), ExtOp: nullptr, IndexOp: Ops[4], ResTy: Ty,
3675 IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3676
3677 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, V: 24);
3678 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[4],
3679 RHS: TwentyFourV);
3680 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3681
3682 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3683 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3684 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3685 }
3686 case NEON::BI__builtin_neon_vtbx4_v: {
3687 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 4), ExtOp: Ops[0], IndexOp: Ops[5],
3688 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx2, Name: "vtbx2");
3689 }
3690 case NEON::BI__builtin_neon_vqtbl1_v:
3691 case NEON::BI__builtin_neon_vqtbl1q_v:
3692 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3693 case NEON::BI__builtin_neon_vqtbl2_v:
3694 case NEON::BI__builtin_neon_vqtbl2q_v: {
3695 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3696 case NEON::BI__builtin_neon_vqtbl3_v:
3697 case NEON::BI__builtin_neon_vqtbl3q_v:
3698 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3699 case NEON::BI__builtin_neon_vqtbl4_v:
3700 case NEON::BI__builtin_neon_vqtbl4q_v:
3701 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3702 case NEON::BI__builtin_neon_vqtbx1_v:
3703 case NEON::BI__builtin_neon_vqtbx1q_v:
3704 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3705 case NEON::BI__builtin_neon_vqtbx2_v:
3706 case NEON::BI__builtin_neon_vqtbx2q_v:
3707 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3708 case NEON::BI__builtin_neon_vqtbx3_v:
3709 case NEON::BI__builtin_neon_vqtbx3q_v:
3710 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3711 case NEON::BI__builtin_neon_vqtbx4_v:
3712 case NEON::BI__builtin_neon_vqtbx4q_v:
3713 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3714 }
3715 }
3716
3717 if (!Int)
3718 return nullptr;
3719
3720 Function *F = CGF.CGM.getIntrinsic(IID: Int, Tys: Ty);
3721 return CGF.EmitNeonCall(F, Ops, name: s);
3722}
3723
3724Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
3725 auto *VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
3726 Op = Builder.CreateBitCast(V: Op, DestTy: Int16Ty);
3727 Value *V = PoisonValue::get(T: VTy);
3728 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
3729 Op = Builder.CreateInsertElement(Vec: V, NewElt: Op, Idx: CI);
3730 return Op;
3731}
3732
3733/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3734/// access builtin. Only required if it can't be inferred from the base pointer
3735/// operand.
3736llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
3737 switch (TypeFlags.getMemEltType()) {
3738 case SVETypeFlags::MemEltTyDefault:
3739 return getEltType(TypeFlags);
3740 case SVETypeFlags::MemEltTyInt8:
3741 return Builder.getInt8Ty();
3742 case SVETypeFlags::MemEltTyInt16:
3743 return Builder.getInt16Ty();
3744 case SVETypeFlags::MemEltTyInt32:
3745 return Builder.getInt32Ty();
3746 case SVETypeFlags::MemEltTyInt64:
3747 return Builder.getInt64Ty();
3748 }
3749 llvm_unreachable("Unknown MemEltType");
3750}
3751
3752llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3753 switch (TypeFlags.getEltType()) {
3754 default:
3755 llvm_unreachable("Invalid SVETypeFlag!");
3756
3757 case SVETypeFlags::EltTyMFloat8:
3758 case SVETypeFlags::EltTyInt8:
3759 return Builder.getInt8Ty();
3760 case SVETypeFlags::EltTyInt16:
3761 return Builder.getInt16Ty();
3762 case SVETypeFlags::EltTyInt32:
3763 return Builder.getInt32Ty();
3764 case SVETypeFlags::EltTyInt64:
3765 return Builder.getInt64Ty();
3766 case SVETypeFlags::EltTyInt128:
3767 return Builder.getInt128Ty();
3768
3769 case SVETypeFlags::EltTyFloat16:
3770 return Builder.getHalfTy();
3771 case SVETypeFlags::EltTyFloat32:
3772 return Builder.getFloatTy();
3773 case SVETypeFlags::EltTyFloat64:
3774 return Builder.getDoubleTy();
3775
3776 case SVETypeFlags::EltTyBFloat16:
3777 return Builder.getBFloatTy();
3778
3779 case SVETypeFlags::EltTyBool8:
3780 case SVETypeFlags::EltTyBool16:
3781 case SVETypeFlags::EltTyBool32:
3782 case SVETypeFlags::EltTyBool64:
3783 return Builder.getInt1Ty();
3784 }
3785}
3786
3787// Return the llvm predicate vector type corresponding to the specified element
3788// TypeFlags.
3789llvm::ScalableVectorType *
3790CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
3791 switch (TypeFlags.getEltType()) {
3792 default: llvm_unreachable("Unhandled SVETypeFlag!");
3793
3794 case SVETypeFlags::EltTyInt8:
3795 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3796 case SVETypeFlags::EltTyInt16:
3797 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3798 case SVETypeFlags::EltTyInt32:
3799 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3800 case SVETypeFlags::EltTyInt64:
3801 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3802
3803 case SVETypeFlags::EltTyBFloat16:
3804 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3805 case SVETypeFlags::EltTyFloat16:
3806 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3807 case SVETypeFlags::EltTyFloat32:
3808 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3809 case SVETypeFlags::EltTyFloat64:
3810 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3811
3812 case SVETypeFlags::EltTyBool8:
3813 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3814 case SVETypeFlags::EltTyBool16:
3815 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3816 case SVETypeFlags::EltTyBool32:
3817 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3818 case SVETypeFlags::EltTyBool64:
3819 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3820 }
3821}
3822
3823// Return the llvm vector type corresponding to the specified element TypeFlags.
3824llvm::ScalableVectorType *
3825CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
3826 switch (TypeFlags.getEltType()) {
3827 default:
3828 llvm_unreachable("Invalid SVETypeFlag!");
3829
3830 case SVETypeFlags::EltTyInt8:
3831 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3832 case SVETypeFlags::EltTyInt16:
3833 return llvm::ScalableVectorType::get(ElementType: Builder.getInt16Ty(), MinNumElts: 8);
3834 case SVETypeFlags::EltTyInt32:
3835 return llvm::ScalableVectorType::get(ElementType: Builder.getInt32Ty(), MinNumElts: 4);
3836 case SVETypeFlags::EltTyInt64:
3837 return llvm::ScalableVectorType::get(ElementType: Builder.getInt64Ty(), MinNumElts: 2);
3838
3839 case SVETypeFlags::EltTyMFloat8:
3840 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3841 case SVETypeFlags::EltTyFloat16:
3842 return llvm::ScalableVectorType::get(ElementType: Builder.getHalfTy(), MinNumElts: 8);
3843 case SVETypeFlags::EltTyBFloat16:
3844 return llvm::ScalableVectorType::get(ElementType: Builder.getBFloatTy(), MinNumElts: 8);
3845 case SVETypeFlags::EltTyFloat32:
3846 return llvm::ScalableVectorType::get(ElementType: Builder.getFloatTy(), MinNumElts: 4);
3847 case SVETypeFlags::EltTyFloat64:
3848 return llvm::ScalableVectorType::get(ElementType: Builder.getDoubleTy(), MinNumElts: 2);
3849
3850 case SVETypeFlags::EltTyBool8:
3851 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3852 case SVETypeFlags::EltTyBool16:
3853 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3854 case SVETypeFlags::EltTyBool32:
3855 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3856 case SVETypeFlags::EltTyBool64:
3857 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3858 }
3859}
3860
3861llvm::Value *
3862CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
3863 Function *Ptrue =
3864 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_ptrue, Tys: getSVEPredType(TypeFlags));
3865 return Builder.CreateCall(Callee: Ptrue, Args: {Builder.getInt32(/*SV_ALL*/ C: 31)});
3866}
3867
3868constexpr unsigned SVEBitsPerBlock = 128;
3869
3870static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3871 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3872 return llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: NumElts);
3873}
3874
3875// Reinterpret the input predicate so that it can be used to correctly isolate
3876// the elements of the specified datatype.
3877Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
3878 llvm::ScalableVectorType *VTy) {
3879
3880 if (isa<TargetExtType>(Val: Pred->getType()) &&
3881 cast<TargetExtType>(Val: Pred->getType())->getName() == "aarch64.svcount")
3882 return Pred;
3883
3884 auto *RTy = llvm::VectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), Other: VTy);
3885 if (Pred->getType() == RTy)
3886 return Pred;
3887
3888 unsigned IntID;
3889 llvm::Type *IntrinsicTy;
3890 switch (VTy->getMinNumElements()) {
3891 default:
3892 llvm_unreachable("unsupported element count!");
3893 case 1:
3894 case 2:
3895 case 4:
3896 case 8:
3897 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
3898 IntrinsicTy = RTy;
3899 break;
3900 case 16:
3901 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
3902 IntrinsicTy = Pred->getType();
3903 break;
3904 }
3905
3906 Function *F = CGM.getIntrinsic(IID: IntID, Tys: IntrinsicTy);
3907 Value *C = Builder.CreateCall(Callee: F, Args: Pred);
3908 assert(C->getType() == RTy && "Unexpected return type!");
3909 return C;
3910}
3911
3912Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
3913 llvm::StructType *Ty) {
3914 if (PredTuple->getType() == Ty)
3915 return PredTuple;
3916
3917 Value *Ret = llvm::PoisonValue::get(T: Ty);
3918 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
3919 Value *Pred = Builder.CreateExtractValue(Agg: PredTuple, Idxs: I);
3920 Pred = EmitSVEPredicateCast(
3921 Pred, VTy: cast<llvm::ScalableVectorType>(Val: Ty->getTypeAtIndex(N: I)));
3922 Ret = Builder.CreateInsertValue(Agg: Ret, Val: Pred, Idxs: I);
3923 }
3924
3925 return Ret;
3926}
3927
3928Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
3929 SmallVectorImpl<Value *> &Ops,
3930 unsigned IntID) {
3931 auto *ResultTy = getSVEType(TypeFlags);
3932 auto *OverloadedTy =
3933 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: ResultTy);
3934
3935 Function *F = nullptr;
3936 if (Ops[1]->getType()->isVectorTy())
3937 // This is the "vector base, scalar offset" case. In order to uniquely
3938 // map this built-in to an LLVM IR intrinsic, we need both the return type
3939 // and the type of the vector base.
3940 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[1]->getType()});
3941 else
3942 // This is the "scalar base, vector offset case". The type of the offset
3943 // is encoded in the name of the intrinsic. We only need to specify the
3944 // return type in order to uniquely map this built-in to an LLVM IR
3945 // intrinsic.
3946 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
3947
3948 // At the ACLE level there's only one predicate type, svbool_t, which is
3949 // mapped to <n x 16 x i1>. However, this might be incompatible with the
3950 // actual type being loaded. For example, when loading doubles (i64) the
3951 // predicate should be <n x 2 x i1> instead. At the IR level the type of
3952 // the predicate and the data being loaded must match. Cast to the type
3953 // expected by the intrinsic. The intrinsic itself should be defined in
3954 // a way than enforces relations between parameter types.
3955 Ops[0] = EmitSVEPredicateCast(
3956 Pred: Ops[0], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 0)->getType()));
3957
3958 // Pass 0 when the offset is missing. This can only be applied when using
3959 // the "vector base" addressing mode for which ACLE allows no offset. The
3960 // corresponding LLVM IR always requires an offset.
3961 if (Ops.size() == 2) {
3962 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
3963 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
3964 }
3965
3966 // For "vector base, scalar index" scale the index so that it becomes a
3967 // scalar offset.
3968 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
3969 unsigned BytesPerElt =
3970 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
3971 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
3972 }
3973
3974 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
3975
3976 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
3977 // other cases it's folded into a nop.
3978 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(V: Call, DestTy: ResultTy)
3979 : Builder.CreateSExt(V: Call, DestTy: ResultTy);
3980}
3981
3982Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
3983 SmallVectorImpl<Value *> &Ops,
3984 unsigned IntID) {
3985 auto *SrcDataTy = getSVEType(TypeFlags);
3986 auto *OverloadedTy =
3987 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: SrcDataTy);
3988
3989 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
3990 // it's the first argument. Move it accordingly.
3991 Ops.insert(I: Ops.begin(), Elt: Ops.pop_back_val());
3992
3993 Function *F = nullptr;
3994 if (Ops[2]->getType()->isVectorTy())
3995 // This is the "vector base, scalar offset" case. In order to uniquely
3996 // map this built-in to an LLVM IR intrinsic, we need both the return type
3997 // and the type of the vector base.
3998 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[2]->getType()});
3999 else
4000 // This is the "scalar base, vector offset case". The type of the offset
4001 // is encoded in the name of the intrinsic. We only need to specify the
4002 // return type in order to uniquely map this built-in to an LLVM IR
4003 // intrinsic.
4004 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4005
4006 // Pass 0 when the offset is missing. This can only be applied when using
4007 // the "vector base" addressing mode for which ACLE allows no offset. The
4008 // corresponding LLVM IR always requires an offset.
4009 if (Ops.size() == 3) {
4010 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4011 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4012 }
4013
4014 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4015 // folded into a nop.
4016 Ops[0] = Builder.CreateTrunc(V: Ops[0], DestTy: OverloadedTy);
4017
4018 // At the ACLE level there's only one predicate type, svbool_t, which is
4019 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4020 // actual type being stored. For example, when storing doubles (i64) the
4021 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4022 // the predicate and the data being stored must match. Cast to the type
4023 // expected by the intrinsic. The intrinsic itself should be defined in
4024 // a way that enforces relations between parameter types.
4025 Ops[1] = EmitSVEPredicateCast(
4026 Pred: Ops[1], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 1)->getType()));
4027
4028 // For "vector base, scalar index" scale the index so that it becomes a
4029 // scalar offset.
4030 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4031 unsigned BytesPerElt =
4032 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4033 Ops[3] = Builder.CreateShl(LHS: Ops[3], RHS: Log2_32(Value: BytesPerElt));
4034 }
4035
4036 return Builder.CreateCall(Callee: F, Args: Ops);
4037}
4038
4039Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
4040 SmallVectorImpl<Value *> &Ops,
4041 unsigned IntID) {
4042 // The gather prefetches are overloaded on the vector input - this can either
4043 // be the vector of base addresses or vector of offsets.
4044 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[1]->getType());
4045 if (!OverloadedTy)
4046 OverloadedTy = cast<llvm::ScalableVectorType>(Val: Ops[2]->getType());
4047
4048 // Cast the predicate from svbool_t to the right number of elements.
4049 Ops[0] = EmitSVEPredicateCast(Pred: Ops[0], VTy: OverloadedTy);
4050
4051 // vector + imm addressing modes
4052 if (Ops[1]->getType()->isVectorTy()) {
4053 if (Ops.size() == 3) {
4054 // Pass 0 for 'vector+imm' when the index is omitted.
4055 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4056
4057 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4058 std::swap(a&: Ops[2], b&: Ops[3]);
4059 } else {
4060 // Index needs to be passed as scaled offset.
4061 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4062 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4063 if (BytesPerElt > 1)
4064 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
4065 }
4066 }
4067
4068 Function *F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4069 return Builder.CreateCall(Callee: F, Args: Ops);
4070}
4071
4072Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
4073 SmallVectorImpl<Value*> &Ops,
4074 unsigned IntID) {
4075 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4076 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4077 Value *BasePtr = Ops[1];
4078
4079 // Does the load have an offset?
4080 if (Ops.size() > 2)
4081 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4082
4083 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy});
4084 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
4085}
4086
4087Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
4088 SmallVectorImpl<Value*> &Ops,
4089 unsigned IntID) {
4090 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4091
4092 unsigned N;
4093 switch (IntID) {
4094 case Intrinsic::aarch64_sve_st2:
4095 case Intrinsic::aarch64_sve_st1_pn_x2:
4096 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4097 case Intrinsic::aarch64_sve_st2q:
4098 N = 2;
4099 break;
4100 case Intrinsic::aarch64_sve_st3:
4101 case Intrinsic::aarch64_sve_st3q:
4102 N = 3;
4103 break;
4104 case Intrinsic::aarch64_sve_st4:
4105 case Intrinsic::aarch64_sve_st1_pn_x4:
4106 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4107 case Intrinsic::aarch64_sve_st4q:
4108 N = 4;
4109 break;
4110 default:
4111 llvm_unreachable("unknown intrinsic!");
4112 }
4113
4114 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4115 Value *BasePtr = Ops[1];
4116
4117 // Does the store have an offset?
4118 if (Ops.size() > (2 + N))
4119 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4120
4121 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4122 // need to break up the tuple vector.
4123 SmallVector<llvm::Value*, 5> Operands;
4124 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4125 Operands.push_back(Elt: Ops[I]);
4126 Operands.append(IL: {Predicate, BasePtr});
4127 Function *F = CGM.getIntrinsic(IID: IntID, Tys: { VTy });
4128
4129 return Builder.CreateCall(Callee: F, Args: Operands);
4130}
4131
4132// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4133// svpmullt_pair intrinsics, with the exception that their results are bitcast
4134// to a wider type.
4135Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
4136 SmallVectorImpl<Value *> &Ops,
4137 unsigned BuiltinID) {
4138 // Splat scalar operand to vector (intrinsics with _n infix)
4139 if (TypeFlags.hasSplatOperand()) {
4140 unsigned OpNo = TypeFlags.getSplatOperand();
4141 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4142 }
4143
4144 // The pair-wise function has a narrower overloaded type.
4145 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Ops[0]->getType());
4146 Value *Call = Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
4147
4148 // Now bitcast to the wider result type.
4149 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4150 return EmitSVEReinterpret(Val: Call, Ty);
4151}
4152
4153Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
4154 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4155 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4156 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: OverloadedTy);
4157 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt32(C: 0)});
4158}
4159
4160Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
4161 SmallVectorImpl<Value *> &Ops,
4162 unsigned BuiltinID) {
4163 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4164 auto *VectorTy = getSVEVectorForElementType(EltTy: MemEltTy);
4165 auto *MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4166
4167 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: MemoryTy);
4168 Value *BasePtr = Ops[1];
4169
4170 // Implement the index operand if not omitted.
4171 if (Ops.size() > 3)
4172 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4173
4174 Value *PrfOp = Ops.back();
4175
4176 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Predicate->getType());
4177 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr, PrfOp});
4178}
4179
4180Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
4181 llvm::Type *ReturnTy,
4182 SmallVectorImpl<Value *> &Ops,
4183 unsigned IntrinsicID,
4184 bool IsZExtReturn) {
4185 QualType LangPTy = E->getArg(Arg: 1)->getType();
4186 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4187 T: LangPTy->castAs<PointerType>()->getPointeeType());
4188
4189 // Mfloat8 types is stored as a vector, so extra work
4190 // to extract sclar element type is necessary.
4191 if (MemEltTy->isVectorTy()) {
4192 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4193 "Only <1 x i8> expected");
4194 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4195 }
4196
4197 // The vector type that is returned may be different from the
4198 // eventual type loaded from memory.
4199 auto VectorTy = cast<llvm::ScalableVectorType>(Val: ReturnTy);
4200 llvm::ScalableVectorType *MemoryTy = nullptr;
4201 llvm::ScalableVectorType *PredTy = nullptr;
4202 bool IsQuadLoad = false;
4203 switch (IntrinsicID) {
4204 case Intrinsic::aarch64_sve_ld1uwq:
4205 case Intrinsic::aarch64_sve_ld1udq:
4206 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4207 PredTy = llvm::ScalableVectorType::get(
4208 ElementType: llvm::Type::getInt1Ty(C&: getLLVMContext()), MinNumElts: 1);
4209 IsQuadLoad = true;
4210 break;
4211 default:
4212 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4213 PredTy = MemoryTy;
4214 break;
4215 }
4216
4217 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4218 Value *BasePtr = Ops[1];
4219
4220 // Does the load have an offset?
4221 if (Ops.size() > 2)
4222 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4223
4224 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadLoad ? VectorTy : MemoryTy);
4225 auto *Load =
4226 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr}));
4227 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4228 CGM.DecorateInstructionWithTBAA(Inst: Load, TBAAInfo);
4229
4230 if (IsQuadLoad)
4231 return Load;
4232
4233 return IsZExtReturn ? Builder.CreateZExt(V: Load, DestTy: VectorTy)
4234 : Builder.CreateSExt(V: Load, DestTy: VectorTy);
4235}
4236
4237Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
4238 SmallVectorImpl<Value *> &Ops,
4239 unsigned IntrinsicID) {
4240 QualType LangPTy = E->getArg(Arg: 1)->getType();
4241 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4242 T: LangPTy->castAs<PointerType>()->getPointeeType());
4243
4244 // Mfloat8 types is stored as a vector, so extra work
4245 // to extract sclar element type is necessary.
4246 if (MemEltTy->isVectorTy()) {
4247 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4248 "Only <1 x i8> expected");
4249 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4250 }
4251
4252 // The vector type that is stored may be different from the
4253 // eventual type stored to memory.
4254 auto VectorTy = cast<llvm::ScalableVectorType>(Val: Ops.back()->getType());
4255 auto MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4256
4257 auto PredTy = MemoryTy;
4258 auto AddrMemoryTy = MemoryTy;
4259 bool IsQuadStore = false;
4260
4261 switch (IntrinsicID) {
4262 case Intrinsic::aarch64_sve_st1wq:
4263 case Intrinsic::aarch64_sve_st1dq:
4264 AddrMemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4265 PredTy =
4266 llvm::ScalableVectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), MinNumElts: 1);
4267 IsQuadStore = true;
4268 break;
4269 default:
4270 break;
4271 }
4272 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4273 Value *BasePtr = Ops[1];
4274
4275 // Does the store have an offset?
4276 if (Ops.size() == 4)
4277 BasePtr = Builder.CreateGEP(Ty: AddrMemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4278
4279 // Last value is always the data
4280 Value *Val =
4281 IsQuadStore ? Ops.back() : Builder.CreateTrunc(V: Ops.back(), DestTy: MemoryTy);
4282
4283 Function *F =
4284 CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadStore ? VectorTy : MemoryTy);
4285 auto *Store =
4286 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Val, Predicate, BasePtr}));
4287 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4288 CGM.DecorateInstructionWithTBAA(Inst: Store, TBAAInfo);
4289 return Store;
4290}
4291
4292Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
4293 SmallVectorImpl<Value *> &Ops,
4294 unsigned IntID) {
4295 Ops[2] = EmitSVEPredicateCast(
4296 Pred: Ops[2], VTy: getSVEVectorForElementType(EltTy: SVEBuiltinMemEltTy(TypeFlags)));
4297
4298 SmallVector<Value *> NewOps;
4299 NewOps.push_back(Elt: Ops[2]);
4300
4301 llvm::Value *BasePtr = Ops[3];
4302 llvm::Value *RealSlice = Ops[1];
4303 // If the intrinsic contains the vnum parameter, multiply it with the vector
4304 // size in bytes.
4305 if (Ops.size() == 5) {
4306 Function *StreamingVectorLength =
4307 CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsb);
4308 llvm::Value *StreamingVectorLengthCall =
4309 Builder.CreateCall(Callee: StreamingVectorLength);
4310 llvm::Value *Mulvl =
4311 Builder.CreateMul(LHS: StreamingVectorLengthCall, RHS: Ops[4], Name: "mulvl");
4312 // The type of the ptr parameter is void *, so use Int8Ty here.
4313 BasePtr = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[3], IdxList: Mulvl);
4314 RealSlice = Builder.CreateZExt(V: RealSlice, DestTy: Int64Ty);
4315 RealSlice = Builder.CreateAdd(LHS: RealSlice, RHS: Ops[4]);
4316 RealSlice = Builder.CreateTrunc(V: RealSlice, DestTy: Int32Ty);
4317 }
4318 NewOps.push_back(Elt: BasePtr);
4319 NewOps.push_back(Elt: Ops[0]);
4320 NewOps.push_back(Elt: RealSlice);
4321 Function *F = CGM.getIntrinsic(IID: IntID);
4322 return Builder.CreateCall(Callee: F, Args: NewOps);
4323}
4324
4325Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
4326 SmallVectorImpl<Value *> &Ops,
4327 unsigned IntID) {
4328 auto *VecTy = getSVEType(TypeFlags);
4329 Function *F = CGM.getIntrinsic(IID: IntID, Tys: VecTy);
4330 if (TypeFlags.isReadZA())
4331 Ops[1] = EmitSVEPredicateCast(Pred: Ops[1], VTy: VecTy);
4332 else if (TypeFlags.isWriteZA())
4333 Ops[2] = EmitSVEPredicateCast(Pred: Ops[2], VTy: VecTy);
4334 return Builder.CreateCall(Callee: F, Args: Ops);
4335}
4336
4337Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
4338 SmallVectorImpl<Value *> &Ops,
4339 unsigned IntID) {
4340 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4341 if (Ops.size() == 0)
4342 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: 255));
4343 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4344 return Builder.CreateCall(Callee: F, Args: Ops);
4345}
4346
4347Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
4348 SmallVectorImpl<Value *> &Ops,
4349 unsigned IntID) {
4350 if (Ops.size() == 2)
4351 Ops.push_back(Elt: Builder.getInt32(C: 0));
4352 else
4353 Ops[2] = Builder.CreateIntCast(V: Ops[2], DestTy: Int32Ty, isSigned: true);
4354 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4355 return Builder.CreateCall(Callee: F, Args: Ops);
4356}
4357
4358// Limit the usage of scalable llvm IR generated by the ACLE by using the
4359// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4360Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4361 return Builder.CreateVectorSplat(
4362 EC: cast<llvm::VectorType>(Val: Ty)->getElementCount(), V: Scalar);
4363}
4364
4365Value *CodeGenFunction::EmitSVEDupX(Value *Scalar) {
4366 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4367#ifndef NDEBUG
4368 auto *VecTy = cast<llvm::VectorType>(Ty);
4369 ElementCount EC = VecTy->getElementCount();
4370 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4371 "Only <1 x i8> expected");
4372#endif
4373 Scalar = Builder.CreateExtractElement(Vec: Scalar, Idx: uint64_t(0));
4374 }
4375 return EmitSVEDupX(Scalar, Ty: getSVEVectorForElementType(EltTy: Scalar->getType()));
4376}
4377
4378Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
4379 // FIXME: For big endian this needs an additional REV, or needs a separate
4380 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4381 // instruction is defined as 'bitwise' equivalent from memory point of
4382 // view (when storing/reloading), whereas the svreinterpret builtin
4383 // implements bitwise equivalent cast from register point of view.
4384 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4385
4386 if (auto *StructTy = dyn_cast<StructType>(Val: Ty)) {
4387 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4388
4389 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4390 Value *In = Builder.CreateExtractValue(Agg: Val, Idxs: I);
4391 Value *Out = Builder.CreateBitCast(V: In, DestTy: StructTy->getTypeAtIndex(N: I));
4392 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Out, Idxs: I);
4393 }
4394
4395 return Tuple;
4396 }
4397
4398 return Builder.CreateBitCast(V: Val, DestTy: Ty);
4399}
4400
4401static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4402 SmallVectorImpl<Value *> &Ops) {
4403 auto *SplatZero = Constant::getNullValue(Ty);
4404 Ops.insert(I: Ops.begin(), Elt: SplatZero);
4405}
4406
4407static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4408 SmallVectorImpl<Value *> &Ops) {
4409 auto *SplatUndef = UndefValue::get(T: Ty);
4410 Ops.insert(I: Ops.begin(), Elt: SplatUndef);
4411}
4412
4413SmallVector<llvm::Type *, 2>
4414CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
4415 llvm::Type *ResultType,
4416 ArrayRef<Value *> Ops) {
4417 if (TypeFlags.isOverloadNone())
4418 return {};
4419
4420 llvm::Type *DefaultType = getSVEType(TypeFlags);
4421
4422 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4423 return {DefaultType, Ops[1]->getType()};
4424
4425 if (TypeFlags.isOverloadWhileRW())
4426 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4427
4428 if (TypeFlags.isOverloadCvt())
4429 return {Ops[0]->getType(), Ops.back()->getType()};
4430
4431 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4432 ResultType->isVectorTy())
4433 return {ResultType, Ops[1]->getType()};
4434
4435 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4436 return {DefaultType};
4437}
4438
4439Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
4440 ArrayRef<Value *> Ops) {
4441 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4442 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4443 unsigned Idx = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
4444
4445 if (TypeFlags.isTupleSet())
4446 return Builder.CreateInsertValue(Agg: Ops[0], Val: Ops[2], Idxs: Idx);
4447 return Builder.CreateExtractValue(Agg: Ops[0], Idxs: Idx);
4448}
4449
4450Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
4451 llvm::Type *Ty,
4452 ArrayRef<Value *> Ops) {
4453 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4454
4455 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4456 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4457 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Ops[Idx], Idxs: Idx);
4458
4459 return Tuple;
4460}
4461
4462void CodeGenFunction::GetAArch64SVEProcessedOperands(
4463 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4464 SVETypeFlags TypeFlags) {
4465 // Find out if any arguments are required to be integer constant expressions.
4466 unsigned ICEArguments = 0;
4467 ASTContext::GetBuiltinTypeError Error;
4468 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
4469 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4470
4471 // Tuple set/get only requires one insert/extract vector, which is
4472 // created by EmitSVETupleSetOrGet.
4473 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4474
4475 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4476 bool IsICE = ICEArguments & (1 << i);
4477 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: i));
4478
4479 if (IsICE) {
4480 // If this is required to be a constant, constant fold it so that we know
4481 // that the generated intrinsic gets a ConstantInt.
4482 std::optional<llvm::APSInt> Result =
4483 E->getArg(Arg: i)->getIntegerConstantExpr(Ctx: getContext());
4484 assert(Result && "Expected argument to be a constant");
4485
4486 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4487 // truncate because the immediate has been range checked and no valid
4488 // immediate requires more than a handful of bits.
4489 *Result = Result->extOrTrunc(width: 32);
4490 Ops.push_back(Elt: llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result));
4491 continue;
4492 }
4493
4494 if (isa<StructType>(Val: Arg->getType()) && !IsTupleGetOrSet) {
4495 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4496 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Arg, Idxs: I));
4497
4498 continue;
4499 }
4500
4501 Ops.push_back(Elt: Arg);
4502 }
4503}
4504
4505Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
4506 const CallExpr *E) {
4507 llvm::Type *Ty = ConvertType(T: E->getType());
4508 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4509 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4510 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
4511 return EmitSVEReinterpret(Val, Ty);
4512 }
4513
4514 auto *Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SVEIntrinsicMap, BuiltinID,
4515 MapProvenSorted&: AArch64SVEIntrinsicsProvenSorted);
4516
4517 llvm::SmallVector<Value *, 4> Ops;
4518 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4519 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4520
4521 if (TypeFlags.isLoad())
4522 return EmitSVEMaskedLoad(E, ReturnTy: Ty, Ops, IntrinsicID: Builtin->LLVMIntrinsic,
4523 IsZExtReturn: TypeFlags.isZExtReturn());
4524 else if (TypeFlags.isStore())
4525 return EmitSVEMaskedStore(E, Ops, IntrinsicID: Builtin->LLVMIntrinsic);
4526 else if (TypeFlags.isGatherLoad())
4527 return EmitSVEGatherLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4528 else if (TypeFlags.isScatterStore())
4529 return EmitSVEScatterStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4530 else if (TypeFlags.isPrefetch())
4531 return EmitSVEPrefetchLoad(TypeFlags, Ops, BuiltinID: Builtin->LLVMIntrinsic);
4532 else if (TypeFlags.isGatherPrefetch())
4533 return EmitSVEGatherPrefetch(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4534 else if (TypeFlags.isStructLoad())
4535 return EmitSVEStructLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4536 else if (TypeFlags.isStructStore())
4537 return EmitSVEStructStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4538 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4539 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4540 else if (TypeFlags.isTupleCreate())
4541 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4542 else if (TypeFlags.isUndef())
4543 return UndefValue::get(T: Ty);
4544 else if (Builtin->LLVMIntrinsic != 0) {
4545 // Emit set FPMR for intrinsics that require it
4546 if (TypeFlags.setsFPMR())
4547 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
4548 Args: Ops.pop_back_val());
4549 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4550 InsertExplicitZeroOperand(Builder, Ty, Ops);
4551
4552 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4553 InsertExplicitUndefOperand(Builder, Ty, Ops);
4554
4555 // Some ACLE builtins leave out the argument to specify the predicate
4556 // pattern, which is expected to be expanded to an SV_ALL pattern.
4557 if (TypeFlags.isAppendSVALL())
4558 Ops.push_back(Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4559 if (TypeFlags.isInsertOp1SVALL())
4560 Ops.insert(I: &Ops[1], Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4561
4562 // Predicates must match the main datatype.
4563 for (Value *&Op : Ops)
4564 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
4565 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
4566 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
4567
4568 // Splat scalar operand to vector (intrinsics with _n infix)
4569 if (TypeFlags.hasSplatOperand()) {
4570 unsigned OpNo = TypeFlags.getSplatOperand();
4571 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4572 }
4573
4574 if (TypeFlags.isReverseCompare())
4575 std::swap(a&: Ops[1], b&: Ops[2]);
4576 else if (TypeFlags.isReverseUSDOT())
4577 std::swap(a&: Ops[1], b&: Ops[2]);
4578 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4579 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4580 std::swap(a&: Ops[1], b&: Ops[2]);
4581 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4582 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4583 std::swap(a&: Ops[1], b&: Ops[3]);
4584
4585 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4586 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4587 llvm::Type *OpndTy = Ops[1]->getType();
4588 auto *SplatZero = Constant::getNullValue(Ty: OpndTy);
4589 Ops[1] = Builder.CreateSelect(C: Ops[0], True: Ops[1], False: SplatZero);
4590 }
4591
4592 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic,
4593 Tys: getSVEOverloadTypes(TypeFlags, ResultType: Ty, Ops));
4594 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4595
4596 if (Call->getType() == Ty)
4597 return Call;
4598
4599 // Predicate results must be converted to svbool_t.
4600 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Val: Ty))
4601 return EmitSVEPredicateCast(Pred: Call, VTy: PredTy);
4602 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Val: Ty))
4603 return EmitSVEPredicateTupleCast(PredTuple: Call, Ty: PredTupleTy);
4604
4605 llvm_unreachable("unsupported element count!");
4606 }
4607
4608 switch (BuiltinID) {
4609 default:
4610 return nullptr;
4611
4612 case SVE::BI__builtin_sve_svreinterpret_b: {
4613 auto SVCountTy =
4614 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4615 Function *CastFromSVCountF =
4616 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4617 return Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]);
4618 }
4619 case SVE::BI__builtin_sve_svreinterpret_c: {
4620 auto SVCountTy =
4621 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4622 Function *CastToSVCountF =
4623 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4624 return Builder.CreateCall(Callee: CastToSVCountF, Args: Ops[0]);
4625 }
4626
4627 case SVE::BI__builtin_sve_svpsel_lane_b8:
4628 case SVE::BI__builtin_sve_svpsel_lane_b16:
4629 case SVE::BI__builtin_sve_svpsel_lane_b32:
4630 case SVE::BI__builtin_sve_svpsel_lane_b64:
4631 case SVE::BI__builtin_sve_svpsel_lane_c8:
4632 case SVE::BI__builtin_sve_svpsel_lane_c16:
4633 case SVE::BI__builtin_sve_svpsel_lane_c32:
4634 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4635 bool IsSVCount = isa<TargetExtType>(Val: Ops[0]->getType());
4636 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4637 "aarch64.svcount")) &&
4638 "Unexpected TargetExtType");
4639 auto SVCountTy =
4640 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4641 Function *CastFromSVCountF =
4642 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4643 Function *CastToSVCountF =
4644 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4645
4646 auto OverloadedTy = getSVEType(TypeFlags: SVETypeFlags(Builtin->TypeModifier));
4647 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_psel, Tys: OverloadedTy);
4648 llvm::Value *Ops0 =
4649 IsSVCount ? Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]) : Ops[0];
4650 llvm::Value *Ops1 = EmitSVEPredicateCast(Pred: Ops[1], VTy: OverloadedTy);
4651 llvm::Value *PSel = Builder.CreateCall(Callee: F, Args: {Ops0, Ops1, Ops[2]});
4652 return IsSVCount ? Builder.CreateCall(Callee: CastToSVCountF, Args: PSel) : PSel;
4653 }
4654 case SVE::BI__builtin_sve_svmov_b_z: {
4655 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4656 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4657 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4658 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_and_z, Tys: OverloadedTy);
4659 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[1]});
4660 }
4661
4662 case SVE::BI__builtin_sve_svnot_b_z: {
4663 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4664 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4665 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4666 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_eor_z, Tys: OverloadedTy);
4667 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[0]});
4668 }
4669
4670 case SVE::BI__builtin_sve_svmovlb_u16:
4671 case SVE::BI__builtin_sve_svmovlb_u32:
4672 case SVE::BI__builtin_sve_svmovlb_u64:
4673 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllb);
4674
4675 case SVE::BI__builtin_sve_svmovlb_s16:
4676 case SVE::BI__builtin_sve_svmovlb_s32:
4677 case SVE::BI__builtin_sve_svmovlb_s64:
4678 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllb);
4679
4680 case SVE::BI__builtin_sve_svmovlt_u16:
4681 case SVE::BI__builtin_sve_svmovlt_u32:
4682 case SVE::BI__builtin_sve_svmovlt_u64:
4683 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllt);
4684
4685 case SVE::BI__builtin_sve_svmovlt_s16:
4686 case SVE::BI__builtin_sve_svmovlt_s32:
4687 case SVE::BI__builtin_sve_svmovlt_s64:
4688 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllt);
4689
4690 case SVE::BI__builtin_sve_svpmullt_u16:
4691 case SVE::BI__builtin_sve_svpmullt_u64:
4692 case SVE::BI__builtin_sve_svpmullt_n_u16:
4693 case SVE::BI__builtin_sve_svpmullt_n_u64:
4694 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullt_pair);
4695
4696 case SVE::BI__builtin_sve_svpmullb_u16:
4697 case SVE::BI__builtin_sve_svpmullb_u64:
4698 case SVE::BI__builtin_sve_svpmullb_n_u16:
4699 case SVE::BI__builtin_sve_svpmullb_n_u64:
4700 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullb_pair);
4701
4702 case SVE::BI__builtin_sve_svdup_n_b8:
4703 case SVE::BI__builtin_sve_svdup_n_b16:
4704 case SVE::BI__builtin_sve_svdup_n_b32:
4705 case SVE::BI__builtin_sve_svdup_n_b64: {
4706 Value *CmpNE =
4707 Builder.CreateICmpNE(LHS: Ops[0], RHS: Constant::getNullValue(Ty: Ops[0]->getType()));
4708 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4709 Value *Dup = EmitSVEDupX(Scalar: CmpNE, Ty: OverloadedTy);
4710 return EmitSVEPredicateCast(Pred: Dup, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4711 }
4712
4713 case SVE::BI__builtin_sve_svdupq_n_b8:
4714 case SVE::BI__builtin_sve_svdupq_n_b16:
4715 case SVE::BI__builtin_sve_svdupq_n_b32:
4716 case SVE::BI__builtin_sve_svdupq_n_b64:
4717 case SVE::BI__builtin_sve_svdupq_n_u8:
4718 case SVE::BI__builtin_sve_svdupq_n_s8:
4719 case SVE::BI__builtin_sve_svdupq_n_u64:
4720 case SVE::BI__builtin_sve_svdupq_n_f64:
4721 case SVE::BI__builtin_sve_svdupq_n_s64:
4722 case SVE::BI__builtin_sve_svdupq_n_u16:
4723 case SVE::BI__builtin_sve_svdupq_n_f16:
4724 case SVE::BI__builtin_sve_svdupq_n_bf16:
4725 case SVE::BI__builtin_sve_svdupq_n_s16:
4726 case SVE::BI__builtin_sve_svdupq_n_u32:
4727 case SVE::BI__builtin_sve_svdupq_n_f32:
4728 case SVE::BI__builtin_sve_svdupq_n_s32: {
4729 // These builtins are implemented by storing each element to an array and using
4730 // ld1rq to materialize a vector.
4731 unsigned NumOpnds = Ops.size();
4732
4733 bool IsBoolTy =
4734 cast<llvm::VectorType>(Val: Ty)->getElementType()->isIntegerTy(Bitwidth: 1);
4735
4736 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4737 // so that the compare can use the width that is natural for the expected
4738 // number of predicate lanes.
4739 llvm::Type *EltTy = Ops[0]->getType();
4740 if (IsBoolTy)
4741 EltTy = IntegerType::get(C&: getLLVMContext(), NumBits: SVEBitsPerBlock / NumOpnds);
4742
4743 SmallVector<llvm::Value *, 16> VecOps;
4744 for (unsigned I = 0; I < NumOpnds; ++I)
4745 VecOps.push_back(Elt: Builder.CreateZExt(V: Ops[I], DestTy: EltTy));
4746 Value *Vec = BuildVector(Ops: VecOps);
4747
4748 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4749 Value *InsertSubVec = Builder.CreateInsertVector(
4750 DstType: OverloadedTy, SrcVec: PoisonValue::get(T: OverloadedTy), SubVec: Vec, Idx: uint64_t(0));
4751
4752 Function *F =
4753 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_dupq_lane, Tys: OverloadedTy);
4754 Value *DupQLane =
4755 Builder.CreateCall(Callee: F, Args: {InsertSubVec, Builder.getInt64(C: 0)});
4756
4757 if (!IsBoolTy)
4758 return DupQLane;
4759
4760 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4761 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4762
4763 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4764 F = CGM.getIntrinsic(IID: NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4765 : Intrinsic::aarch64_sve_cmpne_wide,
4766 Tys: OverloadedTy);
4767 Value *Call = Builder.CreateCall(
4768 Callee: F, Args: {Pred, DupQLane, EmitSVEDupX(Scalar: Builder.getInt64(C: 0))});
4769 return EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4770 }
4771
4772 case SVE::BI__builtin_sve_svpfalse_b:
4773 return ConstantInt::getFalse(Ty);
4774
4775 case SVE::BI__builtin_sve_svpfalse_c: {
4776 auto SVBoolTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
4777 Function *CastToSVCountF =
4778 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: Ty);
4779 return Builder.CreateCall(Callee: CastToSVCountF, Args: ConstantInt::getFalse(Ty: SVBoolTy));
4780 }
4781
4782 case SVE::BI__builtin_sve_svlen_bf16:
4783 case SVE::BI__builtin_sve_svlen_f16:
4784 case SVE::BI__builtin_sve_svlen_f32:
4785 case SVE::BI__builtin_sve_svlen_f64:
4786 case SVE::BI__builtin_sve_svlen_s8:
4787 case SVE::BI__builtin_sve_svlen_s16:
4788 case SVE::BI__builtin_sve_svlen_s32:
4789 case SVE::BI__builtin_sve_svlen_s64:
4790 case SVE::BI__builtin_sve_svlen_u8:
4791 case SVE::BI__builtin_sve_svlen_u16:
4792 case SVE::BI__builtin_sve_svlen_u32:
4793 case SVE::BI__builtin_sve_svlen_u64: {
4794 SVETypeFlags TF(Builtin->TypeModifier);
4795 return Builder.CreateElementCount(Ty, EC: getSVEType(TypeFlags: TF)->getElementCount());
4796 }
4797
4798 case SVE::BI__builtin_sve_svtbl2_u8:
4799 case SVE::BI__builtin_sve_svtbl2_s8:
4800 case SVE::BI__builtin_sve_svtbl2_u16:
4801 case SVE::BI__builtin_sve_svtbl2_s16:
4802 case SVE::BI__builtin_sve_svtbl2_u32:
4803 case SVE::BI__builtin_sve_svtbl2_s32:
4804 case SVE::BI__builtin_sve_svtbl2_u64:
4805 case SVE::BI__builtin_sve_svtbl2_s64:
4806 case SVE::BI__builtin_sve_svtbl2_f16:
4807 case SVE::BI__builtin_sve_svtbl2_bf16:
4808 case SVE::BI__builtin_sve_svtbl2_f32:
4809 case SVE::BI__builtin_sve_svtbl2_f64: {
4810 SVETypeFlags TF(Builtin->TypeModifier);
4811 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_tbl2, Tys: getSVEType(TypeFlags: TF));
4812 return Builder.CreateCall(Callee: F, Args: Ops);
4813 }
4814
4815 case SVE::BI__builtin_sve_svset_neonq_s8:
4816 case SVE::BI__builtin_sve_svset_neonq_s16:
4817 case SVE::BI__builtin_sve_svset_neonq_s32:
4818 case SVE::BI__builtin_sve_svset_neonq_s64:
4819 case SVE::BI__builtin_sve_svset_neonq_u8:
4820 case SVE::BI__builtin_sve_svset_neonq_u16:
4821 case SVE::BI__builtin_sve_svset_neonq_u32:
4822 case SVE::BI__builtin_sve_svset_neonq_u64:
4823 case SVE::BI__builtin_sve_svset_neonq_f16:
4824 case SVE::BI__builtin_sve_svset_neonq_f32:
4825 case SVE::BI__builtin_sve_svset_neonq_f64:
4826 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4827 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[1], Idx: uint64_t(0));
4828 }
4829
4830 case SVE::BI__builtin_sve_svget_neonq_s8:
4831 case SVE::BI__builtin_sve_svget_neonq_s16:
4832 case SVE::BI__builtin_sve_svget_neonq_s32:
4833 case SVE::BI__builtin_sve_svget_neonq_s64:
4834 case SVE::BI__builtin_sve_svget_neonq_u8:
4835 case SVE::BI__builtin_sve_svget_neonq_u16:
4836 case SVE::BI__builtin_sve_svget_neonq_u32:
4837 case SVE::BI__builtin_sve_svget_neonq_u64:
4838 case SVE::BI__builtin_sve_svget_neonq_f16:
4839 case SVE::BI__builtin_sve_svget_neonq_f32:
4840 case SVE::BI__builtin_sve_svget_neonq_f64:
4841 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4842 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx: uint64_t(0));
4843 }
4844
4845 case SVE::BI__builtin_sve_svdup_neonq_s8:
4846 case SVE::BI__builtin_sve_svdup_neonq_s16:
4847 case SVE::BI__builtin_sve_svdup_neonq_s32:
4848 case SVE::BI__builtin_sve_svdup_neonq_s64:
4849 case SVE::BI__builtin_sve_svdup_neonq_u8:
4850 case SVE::BI__builtin_sve_svdup_neonq_u16:
4851 case SVE::BI__builtin_sve_svdup_neonq_u32:
4852 case SVE::BI__builtin_sve_svdup_neonq_u64:
4853 case SVE::BI__builtin_sve_svdup_neonq_f16:
4854 case SVE::BI__builtin_sve_svdup_neonq_f32:
4855 case SVE::BI__builtin_sve_svdup_neonq_f64:
4856 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4857 Value *Insert = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
4858 Idx: uint64_t(0));
4859 return Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_dupq_lane, Types: {Ty},
4860 Args: {Insert, Builder.getInt64(C: 0)});
4861 }
4862 }
4863
4864 /// Should not happen
4865 return nullptr;
4866}
4867
4868static void swapCommutativeSMEOperands(unsigned BuiltinID,
4869 SmallVectorImpl<Value *> &Ops) {
4870 unsigned MultiVec;
4871 switch (BuiltinID) {
4872 default:
4873 return;
4874 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4875 MultiVec = 1;
4876 break;
4877 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4878 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4879 MultiVec = 2;
4880 break;
4881 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4882 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4883 MultiVec = 4;
4884 break;
4885 }
4886
4887 if (MultiVec > 0)
4888 for (unsigned I = 0; I < MultiVec; ++I)
4889 std::swap(a&: Ops[I + 1], b&: Ops[I + 1 + MultiVec]);
4890}
4891
4892Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
4893 const CallExpr *E) {
4894 auto *Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SMEIntrinsicMap, BuiltinID,
4895 MapProvenSorted&: AArch64SMEIntrinsicsProvenSorted);
4896
4897 llvm::SmallVector<Value *, 4> Ops;
4898 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4899 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4900
4901 if (TypeFlags.isLoad() || TypeFlags.isStore())
4902 return EmitSMELd1St1(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4903 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
4904 return EmitSMEReadWrite(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4905 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
4906 BuiltinID == SME::BI__builtin_sme_svzero_za)
4907 return EmitSMEZero(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4908 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
4909 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
4910 BuiltinID == SME::BI__builtin_sme_svldr_za ||
4911 BuiltinID == SME::BI__builtin_sme_svstr_za)
4912 return EmitSMELdrStr(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4913
4914 // Emit set FPMR for intrinsics that require it
4915 if (TypeFlags.setsFPMR())
4916 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
4917 Args: Ops.pop_back_val());
4918 // Handle builtins which require their multi-vector operands to be swapped
4919 swapCommutativeSMEOperands(BuiltinID, Ops);
4920
4921 // Should not happen!
4922 if (Builtin->LLVMIntrinsic == 0)
4923 return nullptr;
4924
4925 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
4926 // If we already know the streaming mode, don't bother with the intrinsic
4927 // and emit a constant instead
4928 const auto *FD = cast<FunctionDecl>(Val: CurFuncDecl);
4929 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
4930 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
4931 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
4932 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
4933 return ConstantInt::getBool(Context&: Builder.getContext(), V: IsStreaming);
4934 }
4935 }
4936 }
4937
4938 // Predicates must match the main datatype.
4939 for (Value *&Op : Ops)
4940 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
4941 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
4942 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
4943
4944 Function *F =
4945 TypeFlags.isOverloadNone()
4946 ? CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic)
4947 : CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: {getSVEType(TypeFlags)});
4948
4949 return Builder.CreateCall(Callee: F, Args: Ops);
4950}
4951
4952/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
4953/// return it as an i8 pointer.
4954Value *readX18AsPtr(CodeGenFunction &CGF) {
4955 LLVMContext &Context = CGF.CGM.getLLVMContext();
4956 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
4957 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
4958 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
4959 llvm::Function *F =
4960 CGF.CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {CGF.Int64Ty});
4961 llvm::Value *X18 = CGF.Builder.CreateCall(Callee: F, Args: Metadata);
4962 return CGF.Builder.CreateIntToPtr(V: X18, DestTy: CGF.Int8PtrTy);
4963}
4964
4965Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4966 const CallExpr *E,
4967 llvm::Triple::ArchType Arch) {
4968 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
4969 BuiltinID <= clang::AArch64::LastSVEBuiltin)
4970 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
4971
4972 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
4973 BuiltinID <= clang::AArch64::LastSMEBuiltin)
4974 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
4975
4976 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
4977 return EmitAArch64CpuSupports(E);
4978
4979 unsigned HintID = static_cast<unsigned>(-1);
4980 switch (BuiltinID) {
4981 default: break;
4982 case clang::AArch64::BI__builtin_arm_nop:
4983 HintID = 0;
4984 break;
4985 case clang::AArch64::BI__builtin_arm_yield:
4986 case clang::AArch64::BI__yield:
4987 HintID = 1;
4988 break;
4989 case clang::AArch64::BI__builtin_arm_wfe:
4990 case clang::AArch64::BI__wfe:
4991 HintID = 2;
4992 break;
4993 case clang::AArch64::BI__builtin_arm_wfi:
4994 case clang::AArch64::BI__wfi:
4995 HintID = 3;
4996 break;
4997 case clang::AArch64::BI__builtin_arm_sev:
4998 case clang::AArch64::BI__sev:
4999 HintID = 4;
5000 break;
5001 case clang::AArch64::BI__builtin_arm_sevl:
5002 case clang::AArch64::BI__sevl:
5003 HintID = 5;
5004 break;
5005 }
5006
5007 if (HintID != static_cast<unsigned>(-1)) {
5008 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hint);
5009 return Builder.CreateCall(Callee: F, Args: llvm::ConstantInt::get(Ty: Int32Ty, V: HintID));
5010 }
5011
5012 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5013 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
5014 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5015 return Builder.CreateCall(Callee: F, Args: Builder.CreateZExt(V: Arg, DestTy: CGM.Int32Ty));
5016 }
5017
5018 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5019 // Create call to __arm_sme_state and store the results to the two pointers.
5020 CallInst *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
5021 Ty: llvm::FunctionType::get(Result: StructType::get(elt1: CGM.Int64Ty, elts: CGM.Int64Ty), Params: {},
5022 isVarArg: false),
5023 Name: "__arm_sme_state"));
5024 auto Attrs = AttributeList().addFnAttribute(C&: getLLVMContext(),
5025 Kind: "aarch64_pstate_sm_compatible");
5026 CI->setAttributes(Attrs);
5027 CI->setCallingConv(
5028 llvm::CallingConv::
5029 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5030 Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 0),
5031 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
5032 return Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 1),
5033 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)));
5034 }
5035
5036 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5037 assert((getContext().getTypeSize(E->getType()) == 32) &&
5038 "rbit of unusual size!");
5039 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5040 return Builder.CreateCall(
5041 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
5042 }
5043 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5044 assert((getContext().getTypeSize(E->getType()) == 64) &&
5045 "rbit of unusual size!");
5046 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5047 return Builder.CreateCall(
5048 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
5049 }
5050
5051 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5052 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5053 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5054 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
5055 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5056 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5057 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
5058 return Res;
5059 }
5060
5061 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5062 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5063 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls), Args: Arg,
5064 Name: "cls");
5065 }
5066 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5067 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5068 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64), Args: Arg,
5069 Name: "cls");
5070 }
5071
5072 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5073 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5074 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5075 llvm::Type *Ty = Arg->getType();
5076 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32z, Tys: Ty),
5077 Args: Arg, Name: "frint32z");
5078 }
5079
5080 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5081 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5082 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5083 llvm::Type *Ty = Arg->getType();
5084 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64z, Tys: Ty),
5085 Args: Arg, Name: "frint64z");
5086 }
5087
5088 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5089 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5090 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5091 llvm::Type *Ty = Arg->getType();
5092 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32x, Tys: Ty),
5093 Args: Arg, Name: "frint32x");
5094 }
5095
5096 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5097 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5098 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5099 llvm::Type *Ty = Arg->getType();
5100 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64x, Tys: Ty),
5101 Args: Arg, Name: "frint64x");
5102 }
5103
5104 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5105 assert((getContext().getTypeSize(E->getType()) == 32) &&
5106 "__jcvt of unusual size!");
5107 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5108 return Builder.CreateCall(
5109 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_fjcvtzs), Args: Arg);
5110 }
5111
5112 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5113 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5114 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5115 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5116 llvm::Value *MemAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5117 llvm::Value *ValPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5118
5119 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5120 // Load from the address via an LLVM intrinsic, receiving a
5121 // tuple of 8 i64 words, and store each one to ValPtr.
5122 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_ld64b);
5123 llvm::Value *Val = Builder.CreateCall(Callee: F, Args: MemAddr);
5124 llvm::Value *ToRet;
5125 for (size_t i = 0; i < 8; i++) {
5126 llvm::Value *ValOffsetPtr =
5127 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5128 Address Addr =
5129 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5130 ToRet = Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: Val, Idxs: i), Addr);
5131 }
5132 return ToRet;
5133 } else {
5134 // Load 8 i64 words from ValPtr, and store them to the address
5135 // via an LLVM intrinsic.
5136 SmallVector<llvm::Value *, 9> Args;
5137 Args.push_back(Elt: MemAddr);
5138 for (size_t i = 0; i < 8; i++) {
5139 llvm::Value *ValOffsetPtr =
5140 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5141 Address Addr =
5142 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5143 Args.push_back(Elt: Builder.CreateLoad(Addr));
5144 }
5145
5146 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5147 ? Intrinsic::aarch64_st64b
5148 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5149 ? Intrinsic::aarch64_st64bv
5150 : Intrinsic::aarch64_st64bv0);
5151 Function *F = CGM.getIntrinsic(IID: Intr);
5152 return Builder.CreateCall(Callee: F, Args);
5153 }
5154 }
5155
5156 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5157 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5158
5159 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5160 ? Intrinsic::aarch64_rndr
5161 : Intrinsic::aarch64_rndrrs);
5162 Function *F = CGM.getIntrinsic(IID: Intr);
5163 llvm::Value *Val = Builder.CreateCall(Callee: F);
5164 Value *RandomValue = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5165 Value *Status = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5166
5167 Address MemAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5168 Builder.CreateStore(Val: RandomValue, Addr: MemAddress);
5169 Status = Builder.CreateZExt(V: Status, DestTy: Int32Ty);
5170 return Status;
5171 }
5172
5173 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5174 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5175 const FunctionDecl *FD = E->getDirectCallee();
5176 Value *Ops[2];
5177 for (unsigned i = 0; i < 2; i++)
5178 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
5179 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
5180 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
5181 StringRef Name = FD->getName();
5182 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
5183 }
5184
5185 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5186 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5187 getContext().getTypeSize(T: E->getType()) == 128) {
5188 Function *F =
5189 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5190 ? Intrinsic::aarch64_ldaxp
5191 : Intrinsic::aarch64_ldxp);
5192
5193 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
5194 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldxp");
5195
5196 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5197 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5198 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5199 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int128Ty);
5200 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int128Ty);
5201
5202 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int128Ty, V: 64);
5203 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
5204 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
5205 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
5206 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5207 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5208 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5209
5210 QualType Ty = E->getType();
5211 llvm::Type *RealResTy = ConvertType(T: Ty);
5212 llvm::Type *IntTy =
5213 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5214
5215 Function *F =
5216 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5217 ? Intrinsic::aarch64_ldaxr
5218 : Intrinsic::aarch64_ldxr,
5219 Tys: UnqualPtrTy);
5220 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldxr");
5221 Val->addParamAttr(
5222 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
5223
5224 if (RealResTy->isPointerTy())
5225 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
5226
5227 llvm::Type *IntResTy = llvm::IntegerType::get(
5228 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
5229 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
5230 DestTy: RealResTy);
5231 }
5232
5233 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5234 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5235 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 128) {
5236 Function *F =
5237 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5238 ? Intrinsic::aarch64_stlxp
5239 : Intrinsic::aarch64_stxp);
5240 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty, elts: Int64Ty);
5241
5242 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
5243 EmitAnyExprToMem(E: E->getArg(Arg: 0), Location: Tmp, Quals: Qualifiers(), /*init*/ IsInitializer: true);
5244
5245 Tmp = Tmp.withElementType(ElemTy: STy);
5246 llvm::Value *Val = Builder.CreateLoad(Addr: Tmp);
5247
5248 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5249 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5250 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5251 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "stxp");
5252 }
5253
5254 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5255 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5256 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
5257 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
5258
5259 QualType Ty = E->getArg(Arg: 0)->getType();
5260 llvm::Type *StoreTy =
5261 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5262
5263 if (StoreVal->getType()->isPointerTy())
5264 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int64Ty);
5265 else {
5266 llvm::Type *IntTy = llvm::IntegerType::get(
5267 C&: getLLVMContext(),
5268 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
5269 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
5270 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int64Ty);
5271 }
5272
5273 Function *F =
5274 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5275 ? Intrinsic::aarch64_stlxr
5276 : Intrinsic::aarch64_stxr,
5277 Tys: StoreAddr->getType());
5278 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "stxr");
5279 CI->addParamAttr(
5280 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
5281 return CI;
5282 }
5283
5284 if (BuiltinID == clang::AArch64::BI__getReg) {
5285 Expr::EvalResult Result;
5286 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5287 llvm_unreachable("Sema will ensure that the parameter is constant");
5288
5289 llvm::APSInt Value = Result.Val.getInt();
5290 LLVMContext &Context = CGM.getLLVMContext();
5291 std::string Reg = Value == 31 ? "sp" : "x" + toString(I: Value, Radix: 10);
5292
5293 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
5294 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5295 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5296
5297 llvm::Function *F =
5298 CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {Int64Ty});
5299 return Builder.CreateCall(Callee: F, Args: Metadata);
5300 }
5301
5302 if (BuiltinID == clang::AArch64::BI__break) {
5303 Expr::EvalResult Result;
5304 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5305 llvm_unreachable("Sema will ensure that the parameter is constant");
5306
5307 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
5308 return Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5309 }
5310
5311 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5312 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_clrex);
5313 return Builder.CreateCall(Callee: F);
5314 }
5315
5316 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5317 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
5318 SSID: llvm::SyncScope::SingleThread);
5319
5320 // CRC32
5321 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5322 switch (BuiltinID) {
5323 case clang::AArch64::BI__builtin_arm_crc32b:
5324 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5325 case clang::AArch64::BI__builtin_arm_crc32cb:
5326 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5327 case clang::AArch64::BI__builtin_arm_crc32h:
5328 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5329 case clang::AArch64::BI__builtin_arm_crc32ch:
5330 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5331 case clang::AArch64::BI__builtin_arm_crc32w:
5332 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5333 case clang::AArch64::BI__builtin_arm_crc32cw:
5334 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5335 case clang::AArch64::BI__builtin_arm_crc32d:
5336 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5337 case clang::AArch64::BI__builtin_arm_crc32cd:
5338 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5339 }
5340
5341 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5342 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
5343 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5344 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
5345
5346 llvm::Type *DataTy = F->getFunctionType()->getParamType(i: 1);
5347 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: DataTy);
5348
5349 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
5350 }
5351
5352 // Memory Operations (MOPS)
5353 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5354 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
5355 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
5356 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
5357 Val = Builder.CreateTrunc(V: Val, DestTy: Int8Ty);
5358 Size = Builder.CreateIntCast(V: Size, DestTy: Int64Ty, isSigned: false);
5359 return Builder.CreateCall(
5360 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_mops_memset_tag), Args: {Dst, Val, Size});
5361 }
5362
5363 // Memory Tagging Extensions (MTE) Intrinsics
5364 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5365 switch (BuiltinID) {
5366 case clang::AArch64::BI__builtin_arm_irg:
5367 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5368 case clang::AArch64::BI__builtin_arm_addg:
5369 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5370 case clang::AArch64::BI__builtin_arm_gmi:
5371 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5372 case clang::AArch64::BI__builtin_arm_ldg:
5373 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5374 case clang::AArch64::BI__builtin_arm_stg:
5375 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5376 case clang::AArch64::BI__builtin_arm_subp:
5377 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5378 }
5379
5380 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5381 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5382 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5383 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 1));
5384
5385 Mask = Builder.CreateZExt(V: Mask, DestTy: Int64Ty);
5386 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5387 Args: {Pointer, Mask});
5388 }
5389 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5390 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5391 Value *TagOffset = EmitScalarExpr(E: E->getArg(Arg: 1));
5392
5393 TagOffset = Builder.CreateZExt(V: TagOffset, DestTy: Int64Ty);
5394 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5395 Args: {Pointer, TagOffset});
5396 }
5397 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5398 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5399 Value *ExcludedMask = EmitScalarExpr(E: E->getArg(Arg: 1));
5400
5401 ExcludedMask = Builder.CreateZExt(V: ExcludedMask, DestTy: Int64Ty);
5402 return Builder.CreateCall(
5403 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, ExcludedMask});
5404 }
5405 // Although it is possible to supply a different return
5406 // address (first arg) to this intrinsic, for now we set
5407 // return address same as input address.
5408 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5409 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5410 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5411 Args: {TagAddress, TagAddress});
5412 }
5413 // Although it is possible to supply a different tag (to set)
5414 // to this intrinsic (as first arg), for now we supply
5415 // the tag that is in input address arg (common use case).
5416 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5417 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5418 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5419 Args: {TagAddress, TagAddress});
5420 }
5421 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5422 Value *PointerA = EmitScalarExpr(E: E->getArg(Arg: 0));
5423 Value *PointerB = EmitScalarExpr(E: E->getArg(Arg: 1));
5424 return Builder.CreateCall(
5425 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {PointerA, PointerB});
5426 }
5427 }
5428
5429 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5430 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5431 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5432 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5433 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5434 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5435 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5436 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5437
5438 SpecialRegisterAccessKind AccessKind = Write;
5439 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5440 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5441 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5442 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5443 AccessKind = VolatileRead;
5444
5445 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5446 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5447
5448 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5449 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5450
5451 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5452 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5453
5454 llvm::Type *ValueType;
5455 llvm::Type *RegisterType = Int64Ty;
5456 if (Is32Bit) {
5457 ValueType = Int32Ty;
5458 } else if (Is128Bit) {
5459 llvm::Type *Int128Ty =
5460 llvm::IntegerType::getInt128Ty(C&: CGM.getLLVMContext());
5461 ValueType = Int128Ty;
5462 RegisterType = Int128Ty;
5463 } else if (IsPointerBuiltin) {
5464 ValueType = VoidPtrTy;
5465 } else {
5466 ValueType = Int64Ty;
5467 };
5468
5469 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
5470 AccessKind);
5471 }
5472
5473 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5474 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5475 BuiltinID == clang::AArch64::BI__sys) {
5476 LLVMContext &Context = CGM.getLLVMContext();
5477
5478 unsigned SysReg =
5479 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
5480
5481 std::string SysRegStr;
5482 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5483 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5484 ? ((1 << 1) | ((SysReg >> 14) & 1))
5485 : 1;
5486 llvm::raw_string_ostream(SysRegStr)
5487 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5488 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5489 << (SysReg & 7);
5490
5491 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysRegStr) };
5492 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5493 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5494
5495 llvm::Type *RegisterType = Int64Ty;
5496 llvm::Type *Types[] = { RegisterType };
5497
5498 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5499 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: Types);
5500
5501 return Builder.CreateCall(Callee: F, Args: Metadata);
5502 }
5503
5504 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
5505 llvm::Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
5506 llvm::Value *Result = Builder.CreateCall(Callee: F, Args: {Metadata, ArgValue});
5507 if (BuiltinID == clang::AArch64::BI__sys) {
5508 // Return 0 for convenience, even though MSVC returns some other undefined
5509 // value.
5510 Result = ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5511 }
5512 return Result;
5513 }
5514
5515 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5516 llvm::Function *F =
5517 CGM.getIntrinsic(IID: Intrinsic::addressofreturnaddress, Tys: AllocaInt8PtrTy);
5518 return Builder.CreateCall(Callee: F);
5519 }
5520
5521 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5522 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
5523 return Builder.CreateCall(Callee: F);
5524 }
5525
5526 if (BuiltinID == clang::AArch64::BI__mulh ||
5527 BuiltinID == clang::AArch64::BI__umulh) {
5528 llvm::Type *ResType = ConvertType(T: E->getType());
5529 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5530
5531 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5532 Value *LHS =
5533 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int128Ty, isSigned: IsSigned);
5534 Value *RHS =
5535 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int128Ty, isSigned: IsSigned);
5536
5537 Value *MulResult, *HigherBits;
5538 if (IsSigned) {
5539 MulResult = Builder.CreateNSWMul(LHS, RHS);
5540 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
5541 } else {
5542 MulResult = Builder.CreateNUWMul(LHS, RHS);
5543 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
5544 }
5545 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
5546
5547 return HigherBits;
5548 }
5549
5550 if (BuiltinID == AArch64::BI__writex18byte ||
5551 BuiltinID == AArch64::BI__writex18word ||
5552 BuiltinID == AArch64::BI__writex18dword ||
5553 BuiltinID == AArch64::BI__writex18qword) {
5554 // Process the args first
5555 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5556 Value *DataArg = EmitScalarExpr(E: E->getArg(Arg: 1));
5557
5558 // Read x18 as i8*
5559 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5560
5561 // Store val at x18 + offset
5562 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5563 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5564 StoreInst *Store =
5565 Builder.CreateAlignedStore(Val: DataArg, Addr: Ptr, Align: CharUnits::One());
5566 return Store;
5567 }
5568
5569 if (BuiltinID == AArch64::BI__readx18byte ||
5570 BuiltinID == AArch64::BI__readx18word ||
5571 BuiltinID == AArch64::BI__readx18dword ||
5572 BuiltinID == AArch64::BI__readx18qword) {
5573 // Process the args first
5574 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5575
5576 // Read x18 as i8*
5577 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5578
5579 // Load x18 + offset
5580 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5581 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5582 llvm::Type *IntTy = ConvertType(T: E->getType());
5583 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5584 return Load;
5585 }
5586
5587 if (BuiltinID == AArch64::BI__addx18byte ||
5588 BuiltinID == AArch64::BI__addx18word ||
5589 BuiltinID == AArch64::BI__addx18dword ||
5590 BuiltinID == AArch64::BI__addx18qword ||
5591 BuiltinID == AArch64::BI__incx18byte ||
5592 BuiltinID == AArch64::BI__incx18word ||
5593 BuiltinID == AArch64::BI__incx18dword ||
5594 BuiltinID == AArch64::BI__incx18qword) {
5595 llvm::Type *IntTy;
5596 bool isIncrement;
5597 switch (BuiltinID) {
5598 case AArch64::BI__incx18byte:
5599 IntTy = Int8Ty;
5600 isIncrement = true;
5601 break;
5602 case AArch64::BI__incx18word:
5603 IntTy = Int16Ty;
5604 isIncrement = true;
5605 break;
5606 case AArch64::BI__incx18dword:
5607 IntTy = Int32Ty;
5608 isIncrement = true;
5609 break;
5610 case AArch64::BI__incx18qword:
5611 IntTy = Int64Ty;
5612 isIncrement = true;
5613 break;
5614 default:
5615 IntTy = ConvertType(T: E->getArg(Arg: 1)->getType());
5616 isIncrement = false;
5617 break;
5618 }
5619 // Process the args first
5620 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5621 Value *ValToAdd =
5622 isIncrement ? ConstantInt::get(Ty: IntTy, V: 1) : EmitScalarExpr(E: E->getArg(Arg: 1));
5623
5624 // Read x18 as i8*
5625 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5626
5627 // Load x18 + offset
5628 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5629 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5630 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5631
5632 // Add values
5633 Value *AddResult = Builder.CreateAdd(LHS: Load, RHS: ValToAdd);
5634
5635 // Store val at x18 + offset
5636 StoreInst *Store =
5637 Builder.CreateAlignedStore(Val: AddResult, Addr: Ptr, Align: CharUnits::One());
5638 return Store;
5639 }
5640
5641 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5642 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5643 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5644 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5645 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5646 llvm::Type *RetTy = ConvertType(T: E->getType());
5647 return Builder.CreateBitCast(V: Arg, DestTy: RetTy);
5648 }
5649
5650 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5651 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5652 BuiltinID == AArch64::BI_CountLeadingZeros ||
5653 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5654 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5655 llvm::Type *ArgType = Arg->getType();
5656
5657 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5658 BuiltinID == AArch64::BI_CountLeadingOnes64)
5659 Arg = Builder.CreateXor(LHS: Arg, RHS: Constant::getAllOnesValue(Ty: ArgType));
5660
5661 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: ArgType);
5662 Value *Result = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5663
5664 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5665 BuiltinID == AArch64::BI_CountLeadingZeros64)
5666 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5667 return Result;
5668 }
5669
5670 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5671 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5672 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5673
5674 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5675 ? CGM.getIntrinsic(IID: Intrinsic::aarch64_cls)
5676 : CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64);
5677
5678 Value *Result = Builder.CreateCall(Callee: F, Args: Arg, Name: "cls");
5679 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5680 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5681 return Result;
5682 }
5683
5684 if (BuiltinID == AArch64::BI_CountOneBits ||
5685 BuiltinID == AArch64::BI_CountOneBits64) {
5686 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
5687 llvm::Type *ArgType = ArgValue->getType();
5688 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctpop, Tys: ArgType);
5689
5690 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
5691 if (BuiltinID == AArch64::BI_CountOneBits64)
5692 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5693 return Result;
5694 }
5695
5696 if (BuiltinID == AArch64::BI__prefetch) {
5697 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
5698 Value *RW = llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
5699 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: 3);
5700 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
5701 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
5702 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
5703 }
5704
5705 if (BuiltinID == AArch64::BI__hlt) {
5706 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hlt);
5707 Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5708
5709 // Return 0 for convenience, even though MSVC returns some other undefined
5710 // value.
5711 return ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5712 }
5713
5714 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5715 return Builder.CreateFPTrunc(
5716 V: Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)),
5717 DestTy: Builder.getFloatTy()),
5718 DestTy: Builder.getBFloatTy());
5719
5720 // Handle MSVC intrinsics before argument evaluation to prevent double
5721 // evaluation.
5722 if (std::optional<MSVCIntrin> MsvcIntId =
5723 translateAarch64ToMsvcIntrin(BuiltinID))
5724 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
5725
5726 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5727 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
5728 return P.first == BuiltinID;
5729 });
5730 if (It != end(arr: NEONEquivalentIntrinsicMap))
5731 BuiltinID = It->second;
5732
5733 // Find out if any arguments are required to be integer constant
5734 // expressions.
5735 unsigned ICEArguments = 0;
5736 ASTContext::GetBuiltinTypeError Error;
5737 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
5738 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5739
5740 llvm::SmallVector<Value*, 4> Ops;
5741 Address PtrOp0 = Address::invalid();
5742 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5743 if (i == 0) {
5744 switch (BuiltinID) {
5745 case NEON::BI__builtin_neon_vld1_v:
5746 case NEON::BI__builtin_neon_vld1q_v:
5747 case NEON::BI__builtin_neon_vld1_dup_v:
5748 case NEON::BI__builtin_neon_vld1q_dup_v:
5749 case NEON::BI__builtin_neon_vld1_lane_v:
5750 case NEON::BI__builtin_neon_vld1q_lane_v:
5751 case NEON::BI__builtin_neon_vst1_v:
5752 case NEON::BI__builtin_neon_vst1q_v:
5753 case NEON::BI__builtin_neon_vst1_lane_v:
5754 case NEON::BI__builtin_neon_vst1q_lane_v:
5755 case NEON::BI__builtin_neon_vldap1_lane_s64:
5756 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5757 case NEON::BI__builtin_neon_vstl1_lane_s64:
5758 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5759 // Get the alignment for the argument in addition to the value;
5760 // we'll use it later.
5761 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5762 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
5763 continue;
5764 }
5765 }
5766 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
5767 }
5768
5769 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5770 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5771 IntrinsicMap: SISDMap, BuiltinID, MapProvenSorted&: AArch64SISDIntrinsicsProvenSorted);
5772
5773 if (Builtin) {
5774 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: E->getNumArgs() - 1)));
5775 Value *Result = EmitCommonNeonSISDBuiltinExpr(CGF&: *this, SISDInfo: *Builtin, Ops, E);
5776 assert(Result && "SISD intrinsic should have been handled");
5777 return Result;
5778 }
5779
5780 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
5781 NeonTypeFlags Type(0);
5782 if (std::optional<llvm::APSInt> Result =
5783 Arg->getIntegerConstantExpr(Ctx: getContext()))
5784 // Determine the type of this overloaded NEON intrinsic.
5785 Type = NeonTypeFlags(Result->getZExtValue());
5786
5787 bool usgn = Type.isUnsigned();
5788 bool quad = Type.isQuad();
5789
5790 // Handle non-overloaded intrinsics first.
5791 switch (BuiltinID) {
5792 default: break;
5793 case NEON::BI__builtin_neon_vabsh_f16:
5794 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5795 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: HalfTy), Ops, name: "vabs");
5796 case NEON::BI__builtin_neon_vaddq_p128: {
5797 llvm::Type *Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags::Poly128);
5798 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5799 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
5800 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
5801 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
5802 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5803 return Builder.CreateBitCast(V: Ops[0], DestTy: Int128Ty);
5804 }
5805 case NEON::BI__builtin_neon_vldrq_p128: {
5806 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5807 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
5808 return Builder.CreateAlignedLoad(Ty: Int128Ty, Addr: Ptr,
5809 Align: CharUnits::fromQuantity(Quantity: 16));
5810 }
5811 case NEON::BI__builtin_neon_vstrq_p128: {
5812 Value *Ptr = Ops[0];
5813 return Builder.CreateDefaultAlignedStore(Val: EmitScalarExpr(E: E->getArg(Arg: 1)), Addr: Ptr);
5814 }
5815 case NEON::BI__builtin_neon_vcvts_f32_u32:
5816 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5817 usgn = true;
5818 [[fallthrough]];
5819 case NEON::BI__builtin_neon_vcvts_f32_s32:
5820 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5821 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5822 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5823 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5824 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5825 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5826 if (usgn)
5827 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5828 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5829 }
5830 case NEON::BI__builtin_neon_vcvth_f16_u16:
5831 case NEON::BI__builtin_neon_vcvth_f16_u32:
5832 case NEON::BI__builtin_neon_vcvth_f16_u64:
5833 usgn = true;
5834 [[fallthrough]];
5835 case NEON::BI__builtin_neon_vcvth_f16_s16:
5836 case NEON::BI__builtin_neon_vcvth_f16_s32:
5837 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5838 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5839 llvm::Type *FTy = HalfTy;
5840 llvm::Type *InTy;
5841 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5842 InTy = Int64Ty;
5843 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5844 InTy = Int32Ty;
5845 else
5846 InTy = Int16Ty;
5847 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5848 if (usgn)
5849 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5850 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5851 }
5852 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5853 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5854 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5855 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5856 case NEON::BI__builtin_neon_vcvth_u16_f16:
5857 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5858 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5859 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5860 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5861 case NEON::BI__builtin_neon_vcvth_s16_f16: {
5862 unsigned Int;
5863 llvm::Type* InTy = Int32Ty;
5864 llvm::Type* FTy = HalfTy;
5865 llvm::Type *Tys[2] = {InTy, FTy};
5866 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5867 switch (BuiltinID) {
5868 default: llvm_unreachable("missing builtin ID in switch!");
5869 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5870 Int = Intrinsic::aarch64_neon_fcvtau; break;
5871 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5872 Int = Intrinsic::aarch64_neon_fcvtmu; break;
5873 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5874 Int = Intrinsic::aarch64_neon_fcvtnu; break;
5875 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5876 Int = Intrinsic::aarch64_neon_fcvtpu; break;
5877 case NEON::BI__builtin_neon_vcvth_u16_f16:
5878 Int = Intrinsic::aarch64_neon_fcvtzu; break;
5879 case NEON::BI__builtin_neon_vcvtah_s16_f16:
5880 Int = Intrinsic::aarch64_neon_fcvtas; break;
5881 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
5882 Int = Intrinsic::aarch64_neon_fcvtms; break;
5883 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
5884 Int = Intrinsic::aarch64_neon_fcvtns; break;
5885 case NEON::BI__builtin_neon_vcvtph_s16_f16:
5886 Int = Intrinsic::aarch64_neon_fcvtps; break;
5887 case NEON::BI__builtin_neon_vcvth_s16_f16:
5888 Int = Intrinsic::aarch64_neon_fcvtzs; break;
5889 }
5890 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvt");
5891 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5892 }
5893 case NEON::BI__builtin_neon_vcaleh_f16:
5894 case NEON::BI__builtin_neon_vcalth_f16:
5895 case NEON::BI__builtin_neon_vcageh_f16:
5896 case NEON::BI__builtin_neon_vcagth_f16: {
5897 unsigned Int;
5898 llvm::Type* InTy = Int32Ty;
5899 llvm::Type* FTy = HalfTy;
5900 llvm::Type *Tys[2] = {InTy, FTy};
5901 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5902 switch (BuiltinID) {
5903 default: llvm_unreachable("missing builtin ID in switch!");
5904 case NEON::BI__builtin_neon_vcageh_f16:
5905 Int = Intrinsic::aarch64_neon_facge; break;
5906 case NEON::BI__builtin_neon_vcagth_f16:
5907 Int = Intrinsic::aarch64_neon_facgt; break;
5908 case NEON::BI__builtin_neon_vcaleh_f16:
5909 Int = Intrinsic::aarch64_neon_facge; std::swap(a&: Ops[0], b&: Ops[1]); break;
5910 case NEON::BI__builtin_neon_vcalth_f16:
5911 Int = Intrinsic::aarch64_neon_facgt; std::swap(a&: Ops[0], b&: Ops[1]); break;
5912 }
5913 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "facg");
5914 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5915 }
5916 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5917 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
5918 unsigned Int;
5919 llvm::Type* InTy = Int32Ty;
5920 llvm::Type* FTy = HalfTy;
5921 llvm::Type *Tys[2] = {InTy, FTy};
5922 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5923 switch (BuiltinID) {
5924 default: llvm_unreachable("missing builtin ID in switch!");
5925 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
5926 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
5927 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
5928 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
5929 }
5930 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
5931 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
5932 }
5933 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5934 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
5935 unsigned Int;
5936 llvm::Type* FTy = HalfTy;
5937 llvm::Type* InTy = Int32Ty;
5938 llvm::Type *Tys[2] = {FTy, InTy};
5939 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5940 switch (BuiltinID) {
5941 default: llvm_unreachable("missing builtin ID in switch!");
5942 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
5943 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
5944 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: InTy, Name: "sext");
5945 break;
5946 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
5947 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
5948 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: InTy);
5949 break;
5950 }
5951 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
5952 }
5953 case NEON::BI__builtin_neon_vpaddd_s64: {
5954 auto *Ty = llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2);
5955 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
5956 // The vector is v2f64, so make sure it's bitcast to that.
5957 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2i64");
5958 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5959 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5960 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
5961 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
5962 // Pairwise addition of a v2f64 into a scalar f64.
5963 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vpaddd");
5964 }
5965 case NEON::BI__builtin_neon_vpaddd_f64: {
5966 auto *Ty = llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2);
5967 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
5968 // The vector is v2f64, so make sure it's bitcast to that.
5969 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2f64");
5970 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5971 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5972 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
5973 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
5974 // Pairwise addition of a v2f64 into a scalar f64.
5975 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
5976 }
5977 case NEON::BI__builtin_neon_vpadds_f32: {
5978 auto *Ty = llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2);
5979 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
5980 // The vector is v2f32, so make sure it's bitcast to that.
5981 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2f32");
5982 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
5983 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
5984 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
5985 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
5986 // Pairwise addition of a v2f32 into a scalar f32.
5987 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
5988 }
5989 case NEON::BI__builtin_neon_vceqzd_s64:
5990 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5991 return EmitAArch64CompareBuiltinExpr(
5992 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
5993 Pred: ICmpInst::ICMP_EQ, Name: "vceqz");
5994 case NEON::BI__builtin_neon_vceqzd_f64:
5995 case NEON::BI__builtin_neon_vceqzs_f32:
5996 case NEON::BI__builtin_neon_vceqzh_f16:
5997 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
5998 return EmitAArch64CompareBuiltinExpr(
5999 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6000 Pred: ICmpInst::FCMP_OEQ, Name: "vceqz");
6001 case NEON::BI__builtin_neon_vcgezd_s64:
6002 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6003 return EmitAArch64CompareBuiltinExpr(
6004 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6005 Pred: ICmpInst::ICMP_SGE, Name: "vcgez");
6006 case NEON::BI__builtin_neon_vcgezd_f64:
6007 case NEON::BI__builtin_neon_vcgezs_f32:
6008 case NEON::BI__builtin_neon_vcgezh_f16:
6009 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6010 return EmitAArch64CompareBuiltinExpr(
6011 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6012 Pred: ICmpInst::FCMP_OGE, Name: "vcgez");
6013 case NEON::BI__builtin_neon_vclezd_s64:
6014 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6015 return EmitAArch64CompareBuiltinExpr(
6016 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6017 Pred: ICmpInst::ICMP_SLE, Name: "vclez");
6018 case NEON::BI__builtin_neon_vclezd_f64:
6019 case NEON::BI__builtin_neon_vclezs_f32:
6020 case NEON::BI__builtin_neon_vclezh_f16:
6021 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6022 return EmitAArch64CompareBuiltinExpr(
6023 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6024 Pred: ICmpInst::FCMP_OLE, Name: "vclez");
6025 case NEON::BI__builtin_neon_vcgtzd_s64:
6026 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6027 return EmitAArch64CompareBuiltinExpr(
6028 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6029 Pred: ICmpInst::ICMP_SGT, Name: "vcgtz");
6030 case NEON::BI__builtin_neon_vcgtzd_f64:
6031 case NEON::BI__builtin_neon_vcgtzs_f32:
6032 case NEON::BI__builtin_neon_vcgtzh_f16:
6033 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6034 return EmitAArch64CompareBuiltinExpr(
6035 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6036 Pred: ICmpInst::FCMP_OGT, Name: "vcgtz");
6037 case NEON::BI__builtin_neon_vcltzd_s64:
6038 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6039 return EmitAArch64CompareBuiltinExpr(
6040 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6041 Pred: ICmpInst::ICMP_SLT, Name: "vcltz");
6042
6043 case NEON::BI__builtin_neon_vcltzd_f64:
6044 case NEON::BI__builtin_neon_vcltzs_f32:
6045 case NEON::BI__builtin_neon_vcltzh_f16:
6046 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6047 return EmitAArch64CompareBuiltinExpr(
6048 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6049 Pred: ICmpInst::FCMP_OLT, Name: "vcltz");
6050
6051 case NEON::BI__builtin_neon_vceqzd_u64: {
6052 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6053 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6054 Ops[0] =
6055 Builder.CreateICmpEQ(LHS: Ops[0], RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6056 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqzd");
6057 }
6058 case NEON::BI__builtin_neon_vceqd_f64:
6059 case NEON::BI__builtin_neon_vcled_f64:
6060 case NEON::BI__builtin_neon_vcltd_f64:
6061 case NEON::BI__builtin_neon_vcged_f64:
6062 case NEON::BI__builtin_neon_vcgtd_f64: {
6063 llvm::CmpInst::Predicate P;
6064 switch (BuiltinID) {
6065 default: llvm_unreachable("missing builtin ID in switch!");
6066 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6067 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6068 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6069 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6070 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6071 }
6072 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6073 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6074 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6075 if (P == llvm::FCmpInst::FCMP_OEQ)
6076 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6077 else
6078 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6079 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vcmpd");
6080 }
6081 case NEON::BI__builtin_neon_vceqs_f32:
6082 case NEON::BI__builtin_neon_vcles_f32:
6083 case NEON::BI__builtin_neon_vclts_f32:
6084 case NEON::BI__builtin_neon_vcges_f32:
6085 case NEON::BI__builtin_neon_vcgts_f32: {
6086 llvm::CmpInst::Predicate P;
6087 switch (BuiltinID) {
6088 default: llvm_unreachable("missing builtin ID in switch!");
6089 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6090 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6091 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6092 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6093 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6094 }
6095 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6096 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: FloatTy);
6097 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: FloatTy);
6098 if (P == llvm::FCmpInst::FCMP_OEQ)
6099 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6100 else
6101 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6102 return Builder.CreateSExt(V: Ops[0], DestTy: Int32Ty, Name: "vcmpd");
6103 }
6104 case NEON::BI__builtin_neon_vceqh_f16:
6105 case NEON::BI__builtin_neon_vcleh_f16:
6106 case NEON::BI__builtin_neon_vclth_f16:
6107 case NEON::BI__builtin_neon_vcgeh_f16:
6108 case NEON::BI__builtin_neon_vcgth_f16: {
6109 llvm::CmpInst::Predicate P;
6110 switch (BuiltinID) {
6111 default: llvm_unreachable("missing builtin ID in switch!");
6112 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6113 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6114 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6115 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6116 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6117 }
6118 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6119 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: HalfTy);
6120 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: HalfTy);
6121 if (P == llvm::FCmpInst::FCMP_OEQ)
6122 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6123 else
6124 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6125 return Builder.CreateSExt(V: Ops[0], DestTy: Int16Ty, Name: "vcmpd");
6126 }
6127 case NEON::BI__builtin_neon_vceqd_s64:
6128 case NEON::BI__builtin_neon_vceqd_u64:
6129 case NEON::BI__builtin_neon_vcgtd_s64:
6130 case NEON::BI__builtin_neon_vcgtd_u64:
6131 case NEON::BI__builtin_neon_vcltd_s64:
6132 case NEON::BI__builtin_neon_vcltd_u64:
6133 case NEON::BI__builtin_neon_vcged_u64:
6134 case NEON::BI__builtin_neon_vcged_s64:
6135 case NEON::BI__builtin_neon_vcled_u64:
6136 case NEON::BI__builtin_neon_vcled_s64: {
6137 llvm::CmpInst::Predicate P;
6138 switch (BuiltinID) {
6139 default: llvm_unreachable("missing builtin ID in switch!");
6140 case NEON::BI__builtin_neon_vceqd_s64:
6141 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6142 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6143 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6144 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6145 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6146 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6147 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6148 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6149 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6150 }
6151 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6152 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6153 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6154 Ops[0] = Builder.CreateICmp(P, LHS: Ops[0], RHS: Ops[1]);
6155 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqd");
6156 }
6157 case NEON::BI__builtin_neon_vtstd_s64:
6158 case NEON::BI__builtin_neon_vtstd_u64: {
6159 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6160 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6161 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6162 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
6163 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
6164 RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6165 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vtstd");
6166 }
6167 case NEON::BI__builtin_neon_vset_lane_i8:
6168 case NEON::BI__builtin_neon_vset_lane_i16:
6169 case NEON::BI__builtin_neon_vset_lane_i32:
6170 case NEON::BI__builtin_neon_vset_lane_i64:
6171 case NEON::BI__builtin_neon_vset_lane_bf16:
6172 case NEON::BI__builtin_neon_vset_lane_f32:
6173 case NEON::BI__builtin_neon_vsetq_lane_i8:
6174 case NEON::BI__builtin_neon_vsetq_lane_i16:
6175 case NEON::BI__builtin_neon_vsetq_lane_i32:
6176 case NEON::BI__builtin_neon_vsetq_lane_i64:
6177 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6178 case NEON::BI__builtin_neon_vsetq_lane_f32:
6179 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6180 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6181 case NEON::BI__builtin_neon_vset_lane_f64:
6182 // The vector type needs a cast for the v1f64 variant.
6183 Ops[1] =
6184 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6185 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6186 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6187 case NEON::BI__builtin_neon_vset_lane_mf8:
6188 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6189 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6190 // The input vector type needs a cast to scalar type.
6191 Ops[0] =
6192 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::Type::getInt8Ty(C&: getLLVMContext()));
6193 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6194 case NEON::BI__builtin_neon_vsetq_lane_f64:
6195 // The vector type needs a cast for the v2f64 variant.
6196 Ops[1] =
6197 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6198 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6199 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6200
6201 case NEON::BI__builtin_neon_vget_lane_i8:
6202 case NEON::BI__builtin_neon_vdupb_lane_i8:
6203 Ops[0] =
6204 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8));
6205 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6206 Name: "vget_lane");
6207 case NEON::BI__builtin_neon_vgetq_lane_i8:
6208 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6209 Ops[0] =
6210 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16));
6211 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6212 Name: "vgetq_lane");
6213 case NEON::BI__builtin_neon_vget_lane_mf8:
6214 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6215 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6216 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6217 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6218 Name: "vget_lane");
6219 case NEON::BI__builtin_neon_vget_lane_i16:
6220 case NEON::BI__builtin_neon_vduph_lane_i16:
6221 Ops[0] =
6222 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4));
6223 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6224 Name: "vget_lane");
6225 case NEON::BI__builtin_neon_vgetq_lane_i16:
6226 case NEON::BI__builtin_neon_vduph_laneq_i16:
6227 Ops[0] =
6228 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8));
6229 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6230 Name: "vgetq_lane");
6231 case NEON::BI__builtin_neon_vget_lane_i32:
6232 case NEON::BI__builtin_neon_vdups_lane_i32:
6233 Ops[0] =
6234 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 2));
6235 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6236 Name: "vget_lane");
6237 case NEON::BI__builtin_neon_vdups_lane_f32:
6238 Ops[0] =
6239 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6240 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6241 Name: "vdups_lane");
6242 case NEON::BI__builtin_neon_vgetq_lane_i32:
6243 case NEON::BI__builtin_neon_vdups_laneq_i32:
6244 Ops[0] =
6245 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
6246 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6247 Name: "vgetq_lane");
6248 case NEON::BI__builtin_neon_vget_lane_i64:
6249 case NEON::BI__builtin_neon_vdupd_lane_i64:
6250 Ops[0] =
6251 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 1));
6252 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6253 Name: "vget_lane");
6254 case NEON::BI__builtin_neon_vdupd_lane_f64:
6255 Ops[0] =
6256 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6257 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6258 Name: "vdupd_lane");
6259 case NEON::BI__builtin_neon_vgetq_lane_i64:
6260 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6261 Ops[0] =
6262 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
6263 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6264 Name: "vgetq_lane");
6265 case NEON::BI__builtin_neon_vget_lane_f32:
6266 Ops[0] =
6267 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6268 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6269 Name: "vget_lane");
6270 case NEON::BI__builtin_neon_vget_lane_f64:
6271 Ops[0] =
6272 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6273 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6274 Name: "vget_lane");
6275 case NEON::BI__builtin_neon_vgetq_lane_f32:
6276 case NEON::BI__builtin_neon_vdups_laneq_f32:
6277 Ops[0] =
6278 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4));
6279 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6280 Name: "vgetq_lane");
6281 case NEON::BI__builtin_neon_vgetq_lane_f64:
6282 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6283 Ops[0] =
6284 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6285 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6286 Name: "vgetq_lane");
6287 case NEON::BI__builtin_neon_vaddh_f16:
6288 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6289 return Builder.CreateFAdd(L: Ops[0], R: Ops[1], Name: "vaddh");
6290 case NEON::BI__builtin_neon_vsubh_f16:
6291 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6292 return Builder.CreateFSub(L: Ops[0], R: Ops[1], Name: "vsubh");
6293 case NEON::BI__builtin_neon_vmulh_f16:
6294 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6295 return Builder.CreateFMul(L: Ops[0], R: Ops[1], Name: "vmulh");
6296 case NEON::BI__builtin_neon_vdivh_f16:
6297 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6298 return Builder.CreateFDiv(L: Ops[0], R: Ops[1], Name: "vdivh");
6299 case NEON::BI__builtin_neon_vfmah_f16:
6300 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6301 return emitCallMaybeConstrainedFPBuiltin(
6302 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
6303 Args: {EmitScalarExpr(E: E->getArg(Arg: 1)), EmitScalarExpr(E: E->getArg(Arg: 2)), Ops[0]});
6304 case NEON::BI__builtin_neon_vfmsh_f16: {
6305 Value* Neg = Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubh");
6306
6307 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6308 return emitCallMaybeConstrainedFPBuiltin(
6309 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
6310 Args: {Neg, EmitScalarExpr(E: E->getArg(Arg: 2)), Ops[0]});
6311 }
6312 case NEON::BI__builtin_neon_vaddd_s64:
6313 case NEON::BI__builtin_neon_vaddd_u64:
6314 return Builder.CreateAdd(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vaddd");
6315 case NEON::BI__builtin_neon_vsubd_s64:
6316 case NEON::BI__builtin_neon_vsubd_u64:
6317 return Builder.CreateSub(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubd");
6318 case NEON::BI__builtin_neon_vqdmlalh_s16:
6319 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6320 SmallVector<Value *, 2> ProductOps;
6321 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6322 ProductOps.push_back(Elt: vectorWrapScalar16(Op: EmitScalarExpr(E: E->getArg(Arg: 2))));
6323 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6324 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
6325 Ops&: ProductOps, name: "vqdmlXl");
6326 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6327 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6328
6329 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6330 ? Intrinsic::aarch64_neon_sqadd
6331 : Intrinsic::aarch64_neon_sqsub;
6332 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6333 }
6334 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6335 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6336 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6337 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqshlu, Tys: Int64Ty),
6338 Ops, name: "vqshlu_n");
6339 }
6340 case NEON::BI__builtin_neon_vqshld_n_u64:
6341 case NEON::BI__builtin_neon_vqshld_n_s64: {
6342 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6343 ? Intrinsic::aarch64_neon_uqshl
6344 : Intrinsic::aarch64_neon_sqshl;
6345 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6346 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6347 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vqshl_n");
6348 }
6349 case NEON::BI__builtin_neon_vrshrd_n_u64:
6350 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6351 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6352 ? Intrinsic::aarch64_neon_urshl
6353 : Intrinsic::aarch64_neon_srshl;
6354 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6355 int SV = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
6356 Ops[1] = ConstantInt::get(Ty: Int64Ty, V: -SV);
6357 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vrshr_n");
6358 }
6359 case NEON::BI__builtin_neon_vrsrad_n_u64:
6360 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6361 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6362 ? Intrinsic::aarch64_neon_urshl
6363 : Intrinsic::aarch64_neon_srshl;
6364 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6365 Ops.push_back(Elt: Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 2))));
6366 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Int64Ty),
6367 Args: {Ops[1], Builder.CreateSExt(V: Ops[2], DestTy: Int64Ty)});
6368 return Builder.CreateAdd(LHS: Ops[0], RHS: Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty));
6369 }
6370 case NEON::BI__builtin_neon_vshld_n_s64:
6371 case NEON::BI__builtin_neon_vshld_n_u64: {
6372 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6373 return Builder.CreateShl(
6374 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: Amt->getZExtValue()), Name: "shld_n");
6375 }
6376 case NEON::BI__builtin_neon_vshrd_n_s64: {
6377 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6378 return Builder.CreateAShr(
6379 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6380 b: Amt->getZExtValue())),
6381 Name: "shrd_n");
6382 }
6383 case NEON::BI__builtin_neon_vshrd_n_u64: {
6384 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6385 uint64_t ShiftAmt = Amt->getZExtValue();
6386 // Right-shifting an unsigned value by its size yields 0.
6387 if (ShiftAmt == 64)
6388 return ConstantInt::get(Ty: Int64Ty, V: 0);
6389 return Builder.CreateLShr(LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6390 Name: "shrd_n");
6391 }
6392 case NEON::BI__builtin_neon_vsrad_n_s64: {
6393 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
6394 Ops[1] = Builder.CreateAShr(
6395 LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6396 b: Amt->getZExtValue())),
6397 Name: "shrd_n");
6398 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6399 }
6400 case NEON::BI__builtin_neon_vsrad_n_u64: {
6401 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
6402 uint64_t ShiftAmt = Amt->getZExtValue();
6403 // Right-shifting an unsigned value by its size yields 0.
6404 // As Op + 0 = Op, return Ops[0] directly.
6405 if (ShiftAmt == 64)
6406 return Ops[0];
6407 Ops[1] = Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6408 Name: "shrd_n");
6409 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6410 }
6411 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6412 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6413 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6414 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6415 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: EmitScalarExpr(E: E->getArg(Arg: 3)),
6416 Name: "lane");
6417 SmallVector<Value *, 2> ProductOps;
6418 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6419 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
6420 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6421 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
6422 Ops&: ProductOps, name: "vqdmlXl");
6423 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6424 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6425 Ops.pop_back();
6426
6427 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6428 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6429 ? Intrinsic::aarch64_neon_sqadd
6430 : Intrinsic::aarch64_neon_sqsub;
6431 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6432 }
6433 case NEON::BI__builtin_neon_vqdmlals_s32:
6434 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6435 SmallVector<Value *, 2> ProductOps;
6436 ProductOps.push_back(Elt: Ops[1]);
6437 ProductOps.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6438 Ops[1] =
6439 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
6440 Ops&: ProductOps, name: "vqdmlXl");
6441
6442 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6443 ? Intrinsic::aarch64_neon_sqadd
6444 : Intrinsic::aarch64_neon_sqsub;
6445 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6446 }
6447 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6448 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6449 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6450 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6451 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: EmitScalarExpr(E: E->getArg(Arg: 3)),
6452 Name: "lane");
6453 SmallVector<Value *, 2> ProductOps;
6454 ProductOps.push_back(Elt: Ops[1]);
6455 ProductOps.push_back(Elt: Ops[2]);
6456 Ops[1] =
6457 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
6458 Ops&: ProductOps, name: "vqdmlXl");
6459 Ops.pop_back();
6460
6461 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6462 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6463 ? Intrinsic::aarch64_neon_sqadd
6464 : Intrinsic::aarch64_neon_sqsub;
6465 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6466 }
6467 case NEON::BI__builtin_neon_vget_lane_bf16:
6468 case NEON::BI__builtin_neon_vduph_lane_bf16:
6469 case NEON::BI__builtin_neon_vduph_lane_f16: {
6470 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6471 Name: "vget_lane");
6472 }
6473 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6474 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6475 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6476 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6477 Name: "vgetq_lane");
6478 }
6479 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6480 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6481 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6482 return Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6483 }
6484 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6485 SmallVector<int, 16> ConcatMask(8);
6486 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6487 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6488 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6489 llvm::Value *Trunc =
6490 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6491 return Builder.CreateShuffleVector(
6492 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
6493 }
6494 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6495 SmallVector<int, 16> ConcatMask(8);
6496 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6497 SmallVector<int, 16> LoMask(4);
6498 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
6499 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6500 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6501 llvm::Type *V8BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 8);
6502 llvm::Value *Inactive = Builder.CreateShuffleVector(
6503 V: Builder.CreateBitCast(V: Ops[0], DestTy: V8BF16), Mask: LoMask);
6504 llvm::Value *Trunc =
6505 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[1], DestTy: V4F32), DestTy: V4BF16);
6506 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
6507 }
6508
6509 case clang::AArch64::BI_InterlockedAdd:
6510 case clang::AArch64::BI_InterlockedAdd_acq:
6511 case clang::AArch64::BI_InterlockedAdd_rel:
6512 case clang::AArch64::BI_InterlockedAdd_nf:
6513 case clang::AArch64::BI_InterlockedAdd64:
6514 case clang::AArch64::BI_InterlockedAdd64_acq:
6515 case clang::AArch64::BI_InterlockedAdd64_rel:
6516 case clang::AArch64::BI_InterlockedAdd64_nf: {
6517 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
6518 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
6519 llvm::AtomicOrdering Ordering;
6520 switch (BuiltinID) {
6521 case clang::AArch64::BI_InterlockedAdd:
6522 case clang::AArch64::BI_InterlockedAdd64:
6523 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6524 break;
6525 case clang::AArch64::BI_InterlockedAdd_acq:
6526 case clang::AArch64::BI_InterlockedAdd64_acq:
6527 Ordering = llvm::AtomicOrdering::Acquire;
6528 break;
6529 case clang::AArch64::BI_InterlockedAdd_rel:
6530 case clang::AArch64::BI_InterlockedAdd64_rel:
6531 Ordering = llvm::AtomicOrdering::Release;
6532 break;
6533 case clang::AArch64::BI_InterlockedAdd_nf:
6534 case clang::AArch64::BI_InterlockedAdd64_nf:
6535 Ordering = llvm::AtomicOrdering::Monotonic;
6536 break;
6537 default:
6538 llvm_unreachable("missing builtin ID in switch!");
6539 }
6540 AtomicRMWInst *RMWI =
6541 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Addr: DestAddr, Val, Ordering);
6542 return Builder.CreateAdd(LHS: RMWI, RHS: Val);
6543 }
6544 }
6545
6546 llvm::FixedVectorType *VTy = GetNeonType(CGF: this, TypeFlags: Type);
6547 llvm::Type *Ty = VTy;
6548 if (!Ty)
6549 return nullptr;
6550
6551 // Not all intrinsics handled by the common case work for AArch64 yet, so only
6552 // defer to common code if it's been added to our special map.
6553 Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SIMDIntrinsicMap, BuiltinID,
6554 MapProvenSorted&: AArch64SIMDIntrinsicsProvenSorted);
6555
6556 if (Builtin)
6557 return EmitCommonNeonBuiltinExpr(
6558 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
6559 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops,
6560 /*never use addresses*/ PtrOp0: Address::invalid(), PtrOp1: Address::invalid(), Arch);
6561
6562 if (Value *V = EmitAArch64TblBuiltinExpr(CGF&: *this, BuiltinID, E, Ops, Arch))
6563 return V;
6564
6565 unsigned Int;
6566 bool ExtractLow = false;
6567 bool ExtendLaneArg = false;
6568 switch (BuiltinID) {
6569 default: return nullptr;
6570 case NEON::BI__builtin_neon_vbsl_v:
6571 case NEON::BI__builtin_neon_vbslq_v: {
6572 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6573 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: BitTy, Name: "vbsl");
6574 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: BitTy, Name: "vbsl");
6575 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: BitTy, Name: "vbsl");
6576
6577 Ops[1] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1], Name: "vbsl");
6578 Ops[2] = Builder.CreateAnd(LHS: Builder.CreateNot(V: Ops[0]), RHS: Ops[2], Name: "vbsl");
6579 Ops[0] = Builder.CreateOr(LHS: Ops[1], RHS: Ops[2], Name: "vbsl");
6580 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6581 }
6582 case NEON::BI__builtin_neon_vfma_lane_v:
6583 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6584 // The ARM builtins (and instructions) have the addend as the first
6585 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6586 Value *Addend = Ops[0];
6587 Value *Multiplicand = Ops[1];
6588 Value *LaneSource = Ops[2];
6589 Ops[0] = Multiplicand;
6590 Ops[1] = LaneSource;
6591 Ops[2] = Addend;
6592
6593 // Now adjust things to handle the lane access.
6594 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6595 ? llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6596 NumElts: VTy->getNumElements() / 2)
6597 : VTy;
6598 llvm::Constant *cst = cast<Constant>(Val: Ops[3]);
6599 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(), Elt: cst);
6600 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SourceTy);
6601 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV, Name: "lane");
6602
6603 Ops.pop_back();
6604 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6605 : Intrinsic::fma;
6606 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fmla");
6607 }
6608 case NEON::BI__builtin_neon_vfma_laneq_v: {
6609 auto *VTy = cast<llvm::FixedVectorType>(Val: Ty);
6610 // v1f64 fma should be mapped to Neon scalar f64 fma
6611 if (VTy && VTy->getElementType() == DoubleTy) {
6612 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6613 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6614 llvm::FixedVectorType *VTy =
6615 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6616 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: VTy);
6617 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6618 Value *Result;
6619 Result = emitCallMaybeConstrainedFPBuiltin(
6620 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma,
6621 Ty: DoubleTy, Args: {Ops[1], Ops[2], Ops[0]});
6622 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6623 }
6624 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6625 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6626
6627 auto *STy = llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6628 NumElts: VTy->getNumElements() * 2);
6629 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: STy);
6630 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(),
6631 Elt: cast<ConstantInt>(Val: Ops[3]));
6632 Ops[2] = Builder.CreateShuffleVector(V1: Ops[2], V2: Ops[2], Mask: SV, Name: "lane");
6633
6634 return emitCallMaybeConstrainedFPBuiltin(
6635 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6636 Args: {Ops[2], Ops[1], Ops[0]});
6637 }
6638 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6639 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6640 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6641
6642 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6643 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]));
6644 return emitCallMaybeConstrainedFPBuiltin(
6645 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6646 Args: {Ops[2], Ops[1], Ops[0]});
6647 }
6648 case NEON::BI__builtin_neon_vfmah_lane_f16:
6649 case NEON::BI__builtin_neon_vfmas_lane_f32:
6650 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6651 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6652 case NEON::BI__builtin_neon_vfmad_lane_f64:
6653 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6654 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 3)));
6655 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
6656 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6657 return emitCallMaybeConstrainedFPBuiltin(
6658 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6659 Args: {Ops[1], Ops[2], Ops[0]});
6660 }
6661 case NEON::BI__builtin_neon_vmull_v:
6662 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6663 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6664 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6665 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
6666 case NEON::BI__builtin_neon_vmax_v:
6667 case NEON::BI__builtin_neon_vmaxq_v:
6668 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6669 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6670 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6671 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmax");
6672 case NEON::BI__builtin_neon_vmaxh_f16: {
6673 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6674 Int = Intrinsic::aarch64_neon_fmax;
6675 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmax");
6676 }
6677 case NEON::BI__builtin_neon_vmin_v:
6678 case NEON::BI__builtin_neon_vminq_v:
6679 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6680 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6681 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6682 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmin");
6683 case NEON::BI__builtin_neon_vminh_f16: {
6684 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6685 Int = Intrinsic::aarch64_neon_fmin;
6686 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmin");
6687 }
6688 case NEON::BI__builtin_neon_vabd_v:
6689 case NEON::BI__builtin_neon_vabdq_v:
6690 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6691 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6692 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6693 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vabd");
6694 case NEON::BI__builtin_neon_vpadal_v:
6695 case NEON::BI__builtin_neon_vpadalq_v: {
6696 unsigned ArgElts = VTy->getNumElements();
6697 llvm::IntegerType *EltTy = cast<IntegerType>(Val: VTy->getElementType());
6698 unsigned BitWidth = EltTy->getBitWidth();
6699 auto *ArgTy = llvm::FixedVectorType::get(
6700 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: BitWidth / 2), NumElts: 2 * ArgElts);
6701 llvm::Type* Tys[2] = { VTy, ArgTy };
6702 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6703 SmallVector<llvm::Value*, 1> TmpOps;
6704 TmpOps.push_back(Elt: Ops[1]);
6705 Function *F = CGM.getIntrinsic(IID: Int, Tys);
6706 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vpadal");
6707 llvm::Value *addend = Builder.CreateBitCast(V: Ops[0], DestTy: tmp->getType());
6708 return Builder.CreateAdd(LHS: tmp, RHS: addend);
6709 }
6710 case NEON::BI__builtin_neon_vpmin_v:
6711 case NEON::BI__builtin_neon_vpminq_v:
6712 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6713 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6714 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6715 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmin");
6716 case NEON::BI__builtin_neon_vpmax_v:
6717 case NEON::BI__builtin_neon_vpmaxq_v:
6718 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6719 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6720 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6721 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmax");
6722 case NEON::BI__builtin_neon_vminnm_v:
6723 case NEON::BI__builtin_neon_vminnmq_v:
6724 Int = Intrinsic::aarch64_neon_fminnm;
6725 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vminnm");
6726 case NEON::BI__builtin_neon_vminnmh_f16:
6727 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6728 Int = Intrinsic::aarch64_neon_fminnm;
6729 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vminnm");
6730 case NEON::BI__builtin_neon_vmaxnm_v:
6731 case NEON::BI__builtin_neon_vmaxnmq_v:
6732 Int = Intrinsic::aarch64_neon_fmaxnm;
6733 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmaxnm");
6734 case NEON::BI__builtin_neon_vmaxnmh_f16:
6735 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6736 Int = Intrinsic::aarch64_neon_fmaxnm;
6737 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmaxnm");
6738 case NEON::BI__builtin_neon_vrecpss_f32: {
6739 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6740 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: FloatTy),
6741 Ops, name: "vrecps");
6742 }
6743 case NEON::BI__builtin_neon_vrecpsd_f64:
6744 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6745 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: DoubleTy),
6746 Ops, name: "vrecps");
6747 case NEON::BI__builtin_neon_vrecpsh_f16:
6748 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6749 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: HalfTy),
6750 Ops, name: "vrecps");
6751 case NEON::BI__builtin_neon_vqshrun_n_v:
6752 Int = Intrinsic::aarch64_neon_sqshrun;
6753 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrun_n");
6754 case NEON::BI__builtin_neon_vqrshrun_n_v:
6755 Int = Intrinsic::aarch64_neon_sqrshrun;
6756 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrun_n");
6757 case NEON::BI__builtin_neon_vqshrn_n_v:
6758 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6759 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n");
6760 case NEON::BI__builtin_neon_vrshrn_n_v:
6761 Int = Intrinsic::aarch64_neon_rshrn;
6762 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshrn_n");
6763 case NEON::BI__builtin_neon_vqrshrn_n_v:
6764 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6765 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n");
6766 case NEON::BI__builtin_neon_vrndah_f16: {
6767 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6768 Int = Builder.getIsFPConstrained()
6769 ? Intrinsic::experimental_constrained_round
6770 : Intrinsic::round;
6771 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrnda");
6772 }
6773 case NEON::BI__builtin_neon_vrnda_v:
6774 case NEON::BI__builtin_neon_vrndaq_v: {
6775 Int = Builder.getIsFPConstrained()
6776 ? Intrinsic::experimental_constrained_round
6777 : Intrinsic::round;
6778 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnda");
6779 }
6780 case NEON::BI__builtin_neon_vrndih_f16: {
6781 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6782 Int = Builder.getIsFPConstrained()
6783 ? Intrinsic::experimental_constrained_nearbyint
6784 : Intrinsic::nearbyint;
6785 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndi");
6786 }
6787 case NEON::BI__builtin_neon_vrndmh_f16: {
6788 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6789 Int = Builder.getIsFPConstrained()
6790 ? Intrinsic::experimental_constrained_floor
6791 : Intrinsic::floor;
6792 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndm");
6793 }
6794 case NEON::BI__builtin_neon_vrndm_v:
6795 case NEON::BI__builtin_neon_vrndmq_v: {
6796 Int = Builder.getIsFPConstrained()
6797 ? Intrinsic::experimental_constrained_floor
6798 : Intrinsic::floor;
6799 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndm");
6800 }
6801 case NEON::BI__builtin_neon_vrndnh_f16: {
6802 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6803 Int = Builder.getIsFPConstrained()
6804 ? Intrinsic::experimental_constrained_roundeven
6805 : Intrinsic::roundeven;
6806 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndn");
6807 }
6808 case NEON::BI__builtin_neon_vrndn_v:
6809 case NEON::BI__builtin_neon_vrndnq_v: {
6810 Int = Builder.getIsFPConstrained()
6811 ? Intrinsic::experimental_constrained_roundeven
6812 : Intrinsic::roundeven;
6813 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndn");
6814 }
6815 case NEON::BI__builtin_neon_vrndns_f32: {
6816 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6817 Int = Builder.getIsFPConstrained()
6818 ? Intrinsic::experimental_constrained_roundeven
6819 : Intrinsic::roundeven;
6820 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: FloatTy), Ops, name: "vrndn");
6821 }
6822 case NEON::BI__builtin_neon_vrndph_f16: {
6823 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6824 Int = Builder.getIsFPConstrained()
6825 ? Intrinsic::experimental_constrained_ceil
6826 : Intrinsic::ceil;
6827 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndp");
6828 }
6829 case NEON::BI__builtin_neon_vrndp_v:
6830 case NEON::BI__builtin_neon_vrndpq_v: {
6831 Int = Builder.getIsFPConstrained()
6832 ? Intrinsic::experimental_constrained_ceil
6833 : Intrinsic::ceil;
6834 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndp");
6835 }
6836 case NEON::BI__builtin_neon_vrndxh_f16: {
6837 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6838 Int = Builder.getIsFPConstrained()
6839 ? Intrinsic::experimental_constrained_rint
6840 : Intrinsic::rint;
6841 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndx");
6842 }
6843 case NEON::BI__builtin_neon_vrndx_v:
6844 case NEON::BI__builtin_neon_vrndxq_v: {
6845 Int = Builder.getIsFPConstrained()
6846 ? Intrinsic::experimental_constrained_rint
6847 : Intrinsic::rint;
6848 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndx");
6849 }
6850 case NEON::BI__builtin_neon_vrndh_f16: {
6851 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6852 Int = Builder.getIsFPConstrained()
6853 ? Intrinsic::experimental_constrained_trunc
6854 : Intrinsic::trunc;
6855 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndz");
6856 }
6857 case NEON::BI__builtin_neon_vrnd32x_f32:
6858 case NEON::BI__builtin_neon_vrnd32xq_f32:
6859 case NEON::BI__builtin_neon_vrnd32x_f64:
6860 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6861 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6862 Int = Intrinsic::aarch64_neon_frint32x;
6863 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32x");
6864 }
6865 case NEON::BI__builtin_neon_vrnd32z_f32:
6866 case NEON::BI__builtin_neon_vrnd32zq_f32:
6867 case NEON::BI__builtin_neon_vrnd32z_f64:
6868 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6869 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6870 Int = Intrinsic::aarch64_neon_frint32z;
6871 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32z");
6872 }
6873 case NEON::BI__builtin_neon_vrnd64x_f32:
6874 case NEON::BI__builtin_neon_vrnd64xq_f32:
6875 case NEON::BI__builtin_neon_vrnd64x_f64:
6876 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6877 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6878 Int = Intrinsic::aarch64_neon_frint64x;
6879 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64x");
6880 }
6881 case NEON::BI__builtin_neon_vrnd64z_f32:
6882 case NEON::BI__builtin_neon_vrnd64zq_f32:
6883 case NEON::BI__builtin_neon_vrnd64z_f64:
6884 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6885 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
6886 Int = Intrinsic::aarch64_neon_frint64z;
6887 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64z");
6888 }
6889 case NEON::BI__builtin_neon_vrnd_v:
6890 case NEON::BI__builtin_neon_vrndq_v: {
6891 Int = Builder.getIsFPConstrained()
6892 ? Intrinsic::experimental_constrained_trunc
6893 : Intrinsic::trunc;
6894 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndz");
6895 }
6896 case NEON::BI__builtin_neon_vcvt_f64_v:
6897 case NEON::BI__builtin_neon_vcvtq_f64_v:
6898 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6899 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6900 return usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
6901 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
6902 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6903 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6904 "unexpected vcvt_f64_f32 builtin");
6905 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6906 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
6907
6908 return Builder.CreateFPExt(V: Ops[0], DestTy: Ty, Name: "vcvt");
6909 }
6910 case NEON::BI__builtin_neon_vcvt_f32_f64: {
6911 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6912 "unexpected vcvt_f32_f64 builtin");
6913 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6914 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
6915
6916 return Builder.CreateFPTrunc(V: Ops[0], DestTy: Ty, Name: "vcvt");
6917 }
6918 case NEON::BI__builtin_neon_vcvt_s32_v:
6919 case NEON::BI__builtin_neon_vcvt_u32_v:
6920 case NEON::BI__builtin_neon_vcvt_s64_v:
6921 case NEON::BI__builtin_neon_vcvt_u64_v:
6922 case NEON::BI__builtin_neon_vcvt_s16_f16:
6923 case NEON::BI__builtin_neon_vcvt_u16_f16:
6924 case NEON::BI__builtin_neon_vcvtq_s32_v:
6925 case NEON::BI__builtin_neon_vcvtq_u32_v:
6926 case NEON::BI__builtin_neon_vcvtq_s64_v:
6927 case NEON::BI__builtin_neon_vcvtq_u64_v:
6928 case NEON::BI__builtin_neon_vcvtq_s16_f16:
6929 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
6930 Int =
6931 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
6932 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type)};
6933 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtz");
6934 }
6935 case NEON::BI__builtin_neon_vcvta_s16_f16:
6936 case NEON::BI__builtin_neon_vcvta_u16_f16:
6937 case NEON::BI__builtin_neon_vcvta_s32_v:
6938 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
6939 case NEON::BI__builtin_neon_vcvtaq_s32_v:
6940 case NEON::BI__builtin_neon_vcvta_u32_v:
6941 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
6942 case NEON::BI__builtin_neon_vcvtaq_u32_v:
6943 case NEON::BI__builtin_neon_vcvta_s64_v:
6944 case NEON::BI__builtin_neon_vcvtaq_s64_v:
6945 case NEON::BI__builtin_neon_vcvta_u64_v:
6946 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6947 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6948 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6949 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvta");
6950 }
6951 case NEON::BI__builtin_neon_vcvtm_s16_f16:
6952 case NEON::BI__builtin_neon_vcvtm_s32_v:
6953 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
6954 case NEON::BI__builtin_neon_vcvtmq_s32_v:
6955 case NEON::BI__builtin_neon_vcvtm_u16_f16:
6956 case NEON::BI__builtin_neon_vcvtm_u32_v:
6957 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
6958 case NEON::BI__builtin_neon_vcvtmq_u32_v:
6959 case NEON::BI__builtin_neon_vcvtm_s64_v:
6960 case NEON::BI__builtin_neon_vcvtmq_s64_v:
6961 case NEON::BI__builtin_neon_vcvtm_u64_v:
6962 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6963 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6964 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6965 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtm");
6966 }
6967 case NEON::BI__builtin_neon_vcvtn_s16_f16:
6968 case NEON::BI__builtin_neon_vcvtn_s32_v:
6969 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
6970 case NEON::BI__builtin_neon_vcvtnq_s32_v:
6971 case NEON::BI__builtin_neon_vcvtn_u16_f16:
6972 case NEON::BI__builtin_neon_vcvtn_u32_v:
6973 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
6974 case NEON::BI__builtin_neon_vcvtnq_u32_v:
6975 case NEON::BI__builtin_neon_vcvtn_s64_v:
6976 case NEON::BI__builtin_neon_vcvtnq_s64_v:
6977 case NEON::BI__builtin_neon_vcvtn_u64_v:
6978 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6979 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6980 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6981 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtn");
6982 }
6983 case NEON::BI__builtin_neon_vcvtp_s16_f16:
6984 case NEON::BI__builtin_neon_vcvtp_s32_v:
6985 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
6986 case NEON::BI__builtin_neon_vcvtpq_s32_v:
6987 case NEON::BI__builtin_neon_vcvtp_u16_f16:
6988 case NEON::BI__builtin_neon_vcvtp_u32_v:
6989 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
6990 case NEON::BI__builtin_neon_vcvtpq_u32_v:
6991 case NEON::BI__builtin_neon_vcvtp_s64_v:
6992 case NEON::BI__builtin_neon_vcvtpq_s64_v:
6993 case NEON::BI__builtin_neon_vcvtp_u64_v:
6994 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6995 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6996 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
6997 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtp");
6998 }
6999 case NEON::BI__builtin_neon_vmulx_v:
7000 case NEON::BI__builtin_neon_vmulxq_v: {
7001 Int = Intrinsic::aarch64_neon_fmulx;
7002 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmulx");
7003 }
7004 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7005 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7006 // vmulx_lane should be mapped to Neon scalar mulx after
7007 // extracting the scalar element
7008 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
7009 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
7010 Ops.pop_back();
7011 Int = Intrinsic::aarch64_neon_fmulx;
7012 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmulx");
7013 }
7014 case NEON::BI__builtin_neon_vmul_lane_v:
7015 case NEON::BI__builtin_neon_vmul_laneq_v: {
7016 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7017 bool Quad = false;
7018 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7019 Quad = true;
7020 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7021 llvm::FixedVectorType *VTy =
7022 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
7023 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7024 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
7025 Value *Result = Builder.CreateFMul(L: Ops[0], R: Ops[1]);
7026 return Builder.CreateBitCast(V: Result, DestTy: Ty);
7027 }
7028 case NEON::BI__builtin_neon_vnegd_s64:
7029 return Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegd");
7030 case NEON::BI__builtin_neon_vnegh_f16:
7031 return Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegh");
7032 case NEON::BI__builtin_neon_vpmaxnm_v:
7033 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7034 Int = Intrinsic::aarch64_neon_fmaxnmp;
7035 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmaxnm");
7036 }
7037 case NEON::BI__builtin_neon_vpminnm_v:
7038 case NEON::BI__builtin_neon_vpminnmq_v: {
7039 Int = Intrinsic::aarch64_neon_fminnmp;
7040 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpminnm");
7041 }
7042 case NEON::BI__builtin_neon_vsqrth_f16: {
7043 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7044 Int = Builder.getIsFPConstrained()
7045 ? Intrinsic::experimental_constrained_sqrt
7046 : Intrinsic::sqrt;
7047 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vsqrt");
7048 }
7049 case NEON::BI__builtin_neon_vsqrt_v:
7050 case NEON::BI__builtin_neon_vsqrtq_v: {
7051 Int = Builder.getIsFPConstrained()
7052 ? Intrinsic::experimental_constrained_sqrt
7053 : Intrinsic::sqrt;
7054 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7055 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqrt");
7056 }
7057 case NEON::BI__builtin_neon_vrbit_v:
7058 case NEON::BI__builtin_neon_vrbitq_v: {
7059 Int = Intrinsic::bitreverse;
7060 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrbit");
7061 }
7062 case NEON::BI__builtin_neon_vaddv_u8:
7063 // FIXME: These are handled by the AArch64 scalar code.
7064 usgn = true;
7065 [[fallthrough]];
7066 case NEON::BI__builtin_neon_vaddv_s8: {
7067 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7068 Ty = Int32Ty;
7069 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7070 llvm::Type *Tys[2] = { Ty, VTy };
7071 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7072 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7073 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7074 }
7075 case NEON::BI__builtin_neon_vaddv_u16:
7076 usgn = true;
7077 [[fallthrough]];
7078 case NEON::BI__builtin_neon_vaddv_s16: {
7079 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7080 Ty = Int32Ty;
7081 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7082 llvm::Type *Tys[2] = { Ty, VTy };
7083 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7084 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7085 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7086 }
7087 case NEON::BI__builtin_neon_vaddvq_u8:
7088 usgn = true;
7089 [[fallthrough]];
7090 case NEON::BI__builtin_neon_vaddvq_s8: {
7091 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7092 Ty = Int32Ty;
7093 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7094 llvm::Type *Tys[2] = { Ty, VTy };
7095 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7096 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7097 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7098 }
7099 case NEON::BI__builtin_neon_vaddvq_u16:
7100 usgn = true;
7101 [[fallthrough]];
7102 case NEON::BI__builtin_neon_vaddvq_s16: {
7103 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7104 Ty = Int32Ty;
7105 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7106 llvm::Type *Tys[2] = { Ty, VTy };
7107 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7108 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
7109 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7110 }
7111 case NEON::BI__builtin_neon_vmaxv_u8: {
7112 Int = Intrinsic::aarch64_neon_umaxv;
7113 Ty = Int32Ty;
7114 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7115 llvm::Type *Tys[2] = { Ty, VTy };
7116 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7117 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7118 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7119 }
7120 case NEON::BI__builtin_neon_vmaxv_u16: {
7121 Int = Intrinsic::aarch64_neon_umaxv;
7122 Ty = Int32Ty;
7123 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7124 llvm::Type *Tys[2] = { Ty, VTy };
7125 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7126 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7127 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7128 }
7129 case NEON::BI__builtin_neon_vmaxvq_u8: {
7130 Int = Intrinsic::aarch64_neon_umaxv;
7131 Ty = Int32Ty;
7132 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7133 llvm::Type *Tys[2] = { Ty, VTy };
7134 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7135 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7136 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7137 }
7138 case NEON::BI__builtin_neon_vmaxvq_u16: {
7139 Int = Intrinsic::aarch64_neon_umaxv;
7140 Ty = Int32Ty;
7141 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7142 llvm::Type *Tys[2] = { Ty, VTy };
7143 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7144 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7145 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7146 }
7147 case NEON::BI__builtin_neon_vmaxv_s8: {
7148 Int = Intrinsic::aarch64_neon_smaxv;
7149 Ty = Int32Ty;
7150 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7151 llvm::Type *Tys[2] = { Ty, VTy };
7152 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7153 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7154 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7155 }
7156 case NEON::BI__builtin_neon_vmaxv_s16: {
7157 Int = Intrinsic::aarch64_neon_smaxv;
7158 Ty = Int32Ty;
7159 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7160 llvm::Type *Tys[2] = { Ty, VTy };
7161 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7162 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7163 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7164 }
7165 case NEON::BI__builtin_neon_vmaxvq_s8: {
7166 Int = Intrinsic::aarch64_neon_smaxv;
7167 Ty = Int32Ty;
7168 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7169 llvm::Type *Tys[2] = { Ty, VTy };
7170 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7171 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7172 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7173 }
7174 case NEON::BI__builtin_neon_vmaxvq_s16: {
7175 Int = Intrinsic::aarch64_neon_smaxv;
7176 Ty = Int32Ty;
7177 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7178 llvm::Type *Tys[2] = { Ty, VTy };
7179 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7180 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7181 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7182 }
7183 case NEON::BI__builtin_neon_vmaxv_f16: {
7184 Int = Intrinsic::aarch64_neon_fmaxv;
7185 Ty = HalfTy;
7186 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7187 llvm::Type *Tys[2] = { Ty, VTy };
7188 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7189 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7190 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7191 }
7192 case NEON::BI__builtin_neon_vmaxvq_f16: {
7193 Int = Intrinsic::aarch64_neon_fmaxv;
7194 Ty = HalfTy;
7195 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7196 llvm::Type *Tys[2] = { Ty, VTy };
7197 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7198 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7199 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7200 }
7201 case NEON::BI__builtin_neon_vminv_u8: {
7202 Int = Intrinsic::aarch64_neon_uminv;
7203 Ty = Int32Ty;
7204 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7205 llvm::Type *Tys[2] = { Ty, VTy };
7206 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7207 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7208 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7209 }
7210 case NEON::BI__builtin_neon_vminv_u16: {
7211 Int = Intrinsic::aarch64_neon_uminv;
7212 Ty = Int32Ty;
7213 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7214 llvm::Type *Tys[2] = { Ty, VTy };
7215 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7216 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7217 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7218 }
7219 case NEON::BI__builtin_neon_vminvq_u8: {
7220 Int = Intrinsic::aarch64_neon_uminv;
7221 Ty = Int32Ty;
7222 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7223 llvm::Type *Tys[2] = { Ty, VTy };
7224 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7225 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7226 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7227 }
7228 case NEON::BI__builtin_neon_vminvq_u16: {
7229 Int = Intrinsic::aarch64_neon_uminv;
7230 Ty = Int32Ty;
7231 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7232 llvm::Type *Tys[2] = { Ty, VTy };
7233 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7234 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7235 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7236 }
7237 case NEON::BI__builtin_neon_vminv_s8: {
7238 Int = Intrinsic::aarch64_neon_sminv;
7239 Ty = Int32Ty;
7240 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7241 llvm::Type *Tys[2] = { Ty, VTy };
7242 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7243 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7244 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7245 }
7246 case NEON::BI__builtin_neon_vminv_s16: {
7247 Int = Intrinsic::aarch64_neon_sminv;
7248 Ty = Int32Ty;
7249 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7250 llvm::Type *Tys[2] = { Ty, VTy };
7251 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7252 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7253 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7254 }
7255 case NEON::BI__builtin_neon_vminvq_s8: {
7256 Int = Intrinsic::aarch64_neon_sminv;
7257 Ty = Int32Ty;
7258 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7259 llvm::Type *Tys[2] = { Ty, VTy };
7260 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7261 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7262 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
7263 }
7264 case NEON::BI__builtin_neon_vminvq_s16: {
7265 Int = Intrinsic::aarch64_neon_sminv;
7266 Ty = Int32Ty;
7267 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7268 llvm::Type *Tys[2] = { Ty, VTy };
7269 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7270 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7271 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7272 }
7273 case NEON::BI__builtin_neon_vminv_f16: {
7274 Int = Intrinsic::aarch64_neon_fminv;
7275 Ty = HalfTy;
7276 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7277 llvm::Type *Tys[2] = { Ty, VTy };
7278 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7279 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7280 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7281 }
7282 case NEON::BI__builtin_neon_vminvq_f16: {
7283 Int = Intrinsic::aarch64_neon_fminv;
7284 Ty = HalfTy;
7285 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7286 llvm::Type *Tys[2] = { Ty, VTy };
7287 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7288 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7289 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7290 }
7291 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7292 Int = Intrinsic::aarch64_neon_fmaxnmv;
7293 Ty = HalfTy;
7294 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7295 llvm::Type *Tys[2] = { Ty, VTy };
7296 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7297 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7298 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7299 }
7300 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7301 Int = Intrinsic::aarch64_neon_fmaxnmv;
7302 Ty = HalfTy;
7303 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7304 llvm::Type *Tys[2] = { Ty, VTy };
7305 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7306 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7307 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7308 }
7309 case NEON::BI__builtin_neon_vminnmv_f16: {
7310 Int = Intrinsic::aarch64_neon_fminnmv;
7311 Ty = HalfTy;
7312 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7313 llvm::Type *Tys[2] = { Ty, VTy };
7314 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7315 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7316 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7317 }
7318 case NEON::BI__builtin_neon_vminnmvq_f16: {
7319 Int = Intrinsic::aarch64_neon_fminnmv;
7320 Ty = HalfTy;
7321 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7322 llvm::Type *Tys[2] = { Ty, VTy };
7323 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7324 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7325 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7326 }
7327 case NEON::BI__builtin_neon_vmul_n_f64: {
7328 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7329 Value *RHS = Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: DoubleTy);
7330 return Builder.CreateFMul(L: Ops[0], R: RHS);
7331 }
7332 case NEON::BI__builtin_neon_vaddlv_u8: {
7333 Int = Intrinsic::aarch64_neon_uaddlv;
7334 Ty = Int32Ty;
7335 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7336 llvm::Type *Tys[2] = { Ty, VTy };
7337 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7338 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7339 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7340 }
7341 case NEON::BI__builtin_neon_vaddlv_u16: {
7342 Int = Intrinsic::aarch64_neon_uaddlv;
7343 Ty = Int32Ty;
7344 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7345 llvm::Type *Tys[2] = { Ty, VTy };
7346 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7347 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7348 }
7349 case NEON::BI__builtin_neon_vaddlvq_u8: {
7350 Int = Intrinsic::aarch64_neon_uaddlv;
7351 Ty = Int32Ty;
7352 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7353 llvm::Type *Tys[2] = { Ty, VTy };
7354 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7355 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7356 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7357 }
7358 case NEON::BI__builtin_neon_vaddlvq_u16: {
7359 Int = Intrinsic::aarch64_neon_uaddlv;
7360 Ty = Int32Ty;
7361 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7362 llvm::Type *Tys[2] = { Ty, VTy };
7363 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7364 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7365 }
7366 case NEON::BI__builtin_neon_vaddlv_s8: {
7367 Int = Intrinsic::aarch64_neon_saddlv;
7368 Ty = Int32Ty;
7369 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7370 llvm::Type *Tys[2] = { Ty, VTy };
7371 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7372 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7373 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7374 }
7375 case NEON::BI__builtin_neon_vaddlv_s16: {
7376 Int = Intrinsic::aarch64_neon_saddlv;
7377 Ty = Int32Ty;
7378 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7379 llvm::Type *Tys[2] = { Ty, VTy };
7380 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7381 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7382 }
7383 case NEON::BI__builtin_neon_vaddlvq_s8: {
7384 Int = Intrinsic::aarch64_neon_saddlv;
7385 Ty = Int32Ty;
7386 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7387 llvm::Type *Tys[2] = { Ty, VTy };
7388 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7389 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7390 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7391 }
7392 case NEON::BI__builtin_neon_vaddlvq_s16: {
7393 Int = Intrinsic::aarch64_neon_saddlv;
7394 Ty = Int32Ty;
7395 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7396 llvm::Type *Tys[2] = { Ty, VTy };
7397 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
7398 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7399 }
7400 case NEON::BI__builtin_neon_vsri_n_v:
7401 case NEON::BI__builtin_neon_vsriq_n_v: {
7402 Int = Intrinsic::aarch64_neon_vsri;
7403 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7404 return EmitNeonCall(F: Intrin, Ops, name: "vsri_n");
7405 }
7406 case NEON::BI__builtin_neon_vsli_n_v:
7407 case NEON::BI__builtin_neon_vsliq_n_v: {
7408 Int = Intrinsic::aarch64_neon_vsli;
7409 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7410 return EmitNeonCall(F: Intrin, Ops, name: "vsli_n");
7411 }
7412 case NEON::BI__builtin_neon_vsra_n_v:
7413 case NEON::BI__builtin_neon_vsraq_n_v:
7414 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7415 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
7416 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
7417 case NEON::BI__builtin_neon_vrsra_n_v:
7418 case NEON::BI__builtin_neon_vrsraq_n_v: {
7419 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7420 SmallVector<llvm::Value*,2> TmpOps;
7421 TmpOps.push_back(Elt: Ops[1]);
7422 TmpOps.push_back(Elt: Ops[2]);
7423 Function* F = CGM.getIntrinsic(IID: Int, Tys: Ty);
7424 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vrshr_n", shift: 1, rightshift: true);
7425 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
7426 return Builder.CreateAdd(LHS: Ops[0], RHS: tmp);
7427 }
7428 case NEON::BI__builtin_neon_vld1_v:
7429 case NEON::BI__builtin_neon_vld1q_v: {
7430 return Builder.CreateAlignedLoad(Ty: VTy, Addr: Ops[0], Align: PtrOp0.getAlignment());
7431 }
7432 case NEON::BI__builtin_neon_vst1_v:
7433 case NEON::BI__builtin_neon_vst1q_v:
7434 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7435 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7436 case NEON::BI__builtin_neon_vld1_lane_v:
7437 case NEON::BI__builtin_neon_vld1q_lane_v: {
7438 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7439 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7440 Align: PtrOp0.getAlignment());
7441 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vld1_lane");
7442 }
7443 case NEON::BI__builtin_neon_vldap1_lane_s64:
7444 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7445 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7446 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7447 Ty: VTy->getElementType(), Addr: Ops[0], Align: PtrOp0.getAlignment());
7448 LI->setAtomic(Ordering: llvm::AtomicOrdering::Acquire);
7449 Ops[0] = LI;
7450 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vldap1_lane");
7451 }
7452 case NEON::BI__builtin_neon_vld1_dup_v:
7453 case NEON::BI__builtin_neon_vld1q_dup_v: {
7454 Value *V = PoisonValue::get(T: Ty);
7455 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7456 Align: PtrOp0.getAlignment());
7457 llvm::Constant *CI = ConstantInt::get(Ty: Int32Ty, V: 0);
7458 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ops[0], Idx: CI);
7459 return EmitNeonSplat(V: Ops[0], C: CI);
7460 }
7461 case NEON::BI__builtin_neon_vst1_lane_v:
7462 case NEON::BI__builtin_neon_vst1q_lane_v:
7463 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7464 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7465 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7466 case NEON::BI__builtin_neon_vstl1_lane_s64:
7467 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7468 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7469 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7470 llvm::StoreInst *SI =
7471 Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7472 SI->setAtomic(Ordering: llvm::AtomicOrdering::Release);
7473 return SI;
7474 }
7475 case NEON::BI__builtin_neon_vld2_v:
7476 case NEON::BI__builtin_neon_vld2q_v: {
7477 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7478 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2, Tys);
7479 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7480 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7481 }
7482 case NEON::BI__builtin_neon_vld3_v:
7483 case NEON::BI__builtin_neon_vld3q_v: {
7484 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7485 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3, Tys);
7486 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7487 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7488 }
7489 case NEON::BI__builtin_neon_vld4_v:
7490 case NEON::BI__builtin_neon_vld4q_v: {
7491 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7492 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4, Tys);
7493 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7494 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7495 }
7496 case NEON::BI__builtin_neon_vld2_dup_v:
7497 case NEON::BI__builtin_neon_vld2q_dup_v: {
7498 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7499 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2r, Tys);
7500 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7501 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7502 }
7503 case NEON::BI__builtin_neon_vld3_dup_v:
7504 case NEON::BI__builtin_neon_vld3q_dup_v: {
7505 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7506 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3r, Tys);
7507 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7508 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7509 }
7510 case NEON::BI__builtin_neon_vld4_dup_v:
7511 case NEON::BI__builtin_neon_vld4q_dup_v: {
7512 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7513 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4r, Tys);
7514 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7515 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7516 }
7517 case NEON::BI__builtin_neon_vld2_lane_v:
7518 case NEON::BI__builtin_neon_vld2q_lane_v: {
7519 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7520 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2lane, Tys);
7521 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7522 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7523 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7524 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7525 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld2_lane");
7526 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7527 }
7528 case NEON::BI__builtin_neon_vld3_lane_v:
7529 case NEON::BI__builtin_neon_vld3q_lane_v: {
7530 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7531 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3lane, Tys);
7532 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7533 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7534 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7535 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7536 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7537 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld3_lane");
7538 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7539 }
7540 case NEON::BI__builtin_neon_vld4_lane_v:
7541 case NEON::BI__builtin_neon_vld4q_lane_v: {
7542 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7543 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4lane, Tys);
7544 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7545 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7546 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7547 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7548 Ops[4] = Builder.CreateBitCast(V: Ops[4], DestTy: Ty);
7549 Ops[5] = Builder.CreateZExt(V: Ops[5], DestTy: Int64Ty);
7550 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld4_lane");
7551 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7552 }
7553 case NEON::BI__builtin_neon_vst2_v:
7554 case NEON::BI__builtin_neon_vst2q_v: {
7555 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7556 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7557 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2, Tys),
7558 Ops, name: "");
7559 }
7560 case NEON::BI__builtin_neon_vst2_lane_v:
7561 case NEON::BI__builtin_neon_vst2q_lane_v: {
7562 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7563 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
7564 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7565 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2lane, Tys),
7566 Ops, name: "");
7567 }
7568 case NEON::BI__builtin_neon_vst3_v:
7569 case NEON::BI__builtin_neon_vst3q_v: {
7570 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7571 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7572 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3, Tys),
7573 Ops, name: "");
7574 }
7575 case NEON::BI__builtin_neon_vst3_lane_v:
7576 case NEON::BI__builtin_neon_vst3q_lane_v: {
7577 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7578 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7579 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7580 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3lane, Tys),
7581 Ops, name: "");
7582 }
7583 case NEON::BI__builtin_neon_vst4_v:
7584 case NEON::BI__builtin_neon_vst4q_v: {
7585 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7586 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7587 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4, Tys),
7588 Ops, name: "");
7589 }
7590 case NEON::BI__builtin_neon_vst4_lane_v:
7591 case NEON::BI__builtin_neon_vst4q_lane_v: {
7592 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7593 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7594 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7595 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4lane, Tys),
7596 Ops, name: "");
7597 }
7598 case NEON::BI__builtin_neon_vtrn_v:
7599 case NEON::BI__builtin_neon_vtrnq_v: {
7600 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7601 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7602 Value *SV = nullptr;
7603
7604 for (unsigned vi = 0; vi != 2; ++vi) {
7605 SmallVector<int, 16> Indices;
7606 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7607 Indices.push_back(Elt: i+vi);
7608 Indices.push_back(Elt: i+e+vi);
7609 }
7610 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7611 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
7612 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7613 }
7614 return SV;
7615 }
7616 case NEON::BI__builtin_neon_vuzp_v:
7617 case NEON::BI__builtin_neon_vuzpq_v: {
7618 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7619 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7620 Value *SV = nullptr;
7621
7622 for (unsigned vi = 0; vi != 2; ++vi) {
7623 SmallVector<int, 16> Indices;
7624 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7625 Indices.push_back(Elt: 2*i+vi);
7626
7627 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7628 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
7629 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7630 }
7631 return SV;
7632 }
7633 case NEON::BI__builtin_neon_vzip_v:
7634 case NEON::BI__builtin_neon_vzipq_v: {
7635 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7636 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7637 Value *SV = nullptr;
7638
7639 for (unsigned vi = 0; vi != 2; ++vi) {
7640 SmallVector<int, 16> Indices;
7641 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7642 Indices.push_back(Elt: (i + vi*e) >> 1);
7643 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
7644 }
7645 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7646 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
7647 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7648 }
7649 return SV;
7650 }
7651 case NEON::BI__builtin_neon_vqtbl1q_v: {
7652 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl1, Tys: Ty),
7653 Ops, name: "vtbl1");
7654 }
7655 case NEON::BI__builtin_neon_vqtbl2q_v: {
7656 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl2, Tys: Ty),
7657 Ops, name: "vtbl2");
7658 }
7659 case NEON::BI__builtin_neon_vqtbl3q_v: {
7660 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl3, Tys: Ty),
7661 Ops, name: "vtbl3");
7662 }
7663 case NEON::BI__builtin_neon_vqtbl4q_v: {
7664 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl4, Tys: Ty),
7665 Ops, name: "vtbl4");
7666 }
7667 case NEON::BI__builtin_neon_vqtbx1q_v: {
7668 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx1, Tys: Ty),
7669 Ops, name: "vtbx1");
7670 }
7671 case NEON::BI__builtin_neon_vqtbx2q_v: {
7672 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx2, Tys: Ty),
7673 Ops, name: "vtbx2");
7674 }
7675 case NEON::BI__builtin_neon_vqtbx3q_v: {
7676 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx3, Tys: Ty),
7677 Ops, name: "vtbx3");
7678 }
7679 case NEON::BI__builtin_neon_vqtbx4q_v: {
7680 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx4, Tys: Ty),
7681 Ops, name: "vtbx4");
7682 }
7683 case NEON::BI__builtin_neon_vsqadd_v:
7684 case NEON::BI__builtin_neon_vsqaddq_v: {
7685 Int = Intrinsic::aarch64_neon_usqadd;
7686 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqadd");
7687 }
7688 case NEON::BI__builtin_neon_vuqadd_v:
7689 case NEON::BI__builtin_neon_vuqaddq_v: {
7690 Int = Intrinsic::aarch64_neon_suqadd;
7691 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vuqadd");
7692 }
7693
7694 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7695 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7696 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7697 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7698 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7699 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7700 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7701 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7702 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7703 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7704 llvm::Type *Tys[2];
7705 Tys[0] = Ty;
7706 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7707 /*isQuad*/ false));
7708 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7709 }
7710 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7711 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7712 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7713 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7714 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7715 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7716 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7717 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7718 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7719 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7720 llvm::Type *Tys[2];
7721 Tys[0] = Ty;
7722 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7723 /*isQuad*/ true));
7724 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7725 }
7726 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7727 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7728 case NEON::BI__builtin_neon_vluti2_lane_f16:
7729 case NEON::BI__builtin_neon_vluti2_lane_p16:
7730 case NEON::BI__builtin_neon_vluti2_lane_p8:
7731 case NEON::BI__builtin_neon_vluti2_lane_s16:
7732 case NEON::BI__builtin_neon_vluti2_lane_s8:
7733 case NEON::BI__builtin_neon_vluti2_lane_u16:
7734 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7735 Int = Intrinsic::aarch64_neon_vluti2_lane;
7736 llvm::Type *Tys[2];
7737 Tys[0] = Ty;
7738 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7739 /*isQuad*/ false));
7740 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7741 }
7742 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7743 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7744 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7745 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7746 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7747 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7748 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7749 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7750 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7751 Int = Intrinsic::aarch64_neon_vluti2_lane;
7752 llvm::Type *Tys[2];
7753 Tys[0] = Ty;
7754 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7755 /*isQuad*/ true));
7756 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7757 }
7758 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7759 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7760 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7761 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7762 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7763 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane");
7764 }
7765 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7766 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7767 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7768 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7769 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7770 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq");
7771 }
7772 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7773 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7774 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7775 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7776 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7777 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7778 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane_x2");
7779 }
7780 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7781 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7782 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7783 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7784 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7785 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7786 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq_x2");
7787 }
7788 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7789 ExtractLow = true;
7790 LLVM_FALLTHROUGH;
7791 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7792 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7793 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
7794 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
7795 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
7796 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7797 ExtractLow = true;
7798 LLVM_FALLTHROUGH;
7799 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7800 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7801 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
7802 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
7803 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
7804 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7805 ExtractLow = true;
7806 LLVM_FALLTHROUGH;
7807 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7808 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7809 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
7810 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7811 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
7812 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7813 ExtractLow = true;
7814 LLVM_FALLTHROUGH;
7815 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7816 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7817 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
7818 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7819 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
7820 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7821 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7822 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
7823 Ty1: Ops[0]->getType(), Extract: false, Ops, E, name: "vfcvtn");
7824 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7825 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7826 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
7827 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4), Extract: false, Ops,
7828 E, name: "vfcvtn");
7829 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7830 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7831 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16),
7832 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8), Extract: false, Ops,
7833 E, name: "vfcvtn");
7834 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7835 llvm::Type *Ty = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7836 Ops[0] = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
7837 Idx: uint64_t(0));
7838 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn2, Ty0: Ty,
7839 Ty1: Ops[1]->getType(), Extract: false, Ops, E, name: "vfcvtn2");
7840 }
7841
7842 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7843 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7844 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2, ExtendLaneArg: false, RetTy: HalfTy,
7845 Ops, E, name: "fdot2");
7846 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7847 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7848 ExtendLaneArg = true;
7849 LLVM_FALLTHROUGH;
7850 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7851 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7852 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2_lane,
7853 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "fdot2_lane");
7854 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7855 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7856 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4, ExtendLaneArg: false,
7857 RetTy: FloatTy, Ops, E, name: "fdot4");
7858 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7859 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7860 ExtendLaneArg = true;
7861 LLVM_FALLTHROUGH;
7862 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7863 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7864 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4_lane,
7865 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "fdot4_lane");
7866
7867 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7868 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalb,
7869 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7870 name: "vmlal");
7871 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7872 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalt,
7873 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7874 name: "vmlal");
7875 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7876 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb,
7877 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7878 name: "vmlall");
7879 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7880 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt,
7881 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7882 name: "vmlall");
7883 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7884 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb,
7885 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7886 name: "vmlall");
7887 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7888 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt,
7889 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7890 name: "vmlall");
7891 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7892 ExtendLaneArg = true;
7893 LLVM_FALLTHROUGH;
7894 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7895 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7896 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7897 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7898 ExtendLaneArg = true;
7899 LLVM_FALLTHROUGH;
7900 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7901 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7902 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7903 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7904 ExtendLaneArg = true;
7905 LLVM_FALLTHROUGH;
7906 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7907 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7908 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7909 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7910 ExtendLaneArg = true;
7911 LLVM_FALLTHROUGH;
7912 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7913 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7914 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7915 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7916 ExtendLaneArg = true;
7917 LLVM_FALLTHROUGH;
7918 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7919 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7920 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7921 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7922 ExtendLaneArg = true;
7923 LLVM_FALLTHROUGH;
7924 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7925 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7926 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7927 case NEON::BI__builtin_neon_vamin_f16:
7928 case NEON::BI__builtin_neon_vaminq_f16:
7929 case NEON::BI__builtin_neon_vamin_f32:
7930 case NEON::BI__builtin_neon_vaminq_f32:
7931 case NEON::BI__builtin_neon_vaminq_f64: {
7932 Int = Intrinsic::aarch64_neon_famin;
7933 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famin");
7934 }
7935 case NEON::BI__builtin_neon_vamax_f16:
7936 case NEON::BI__builtin_neon_vamaxq_f16:
7937 case NEON::BI__builtin_neon_vamax_f32:
7938 case NEON::BI__builtin_neon_vamaxq_f32:
7939 case NEON::BI__builtin_neon_vamaxq_f64: {
7940 Int = Intrinsic::aarch64_neon_famax;
7941 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famax");
7942 }
7943 case NEON::BI__builtin_neon_vscale_f16:
7944 case NEON::BI__builtin_neon_vscaleq_f16:
7945 case NEON::BI__builtin_neon_vscale_f32:
7946 case NEON::BI__builtin_neon_vscaleq_f32:
7947 case NEON::BI__builtin_neon_vscaleq_f64: {
7948 Int = Intrinsic::aarch64_neon_fp8_fscale;
7949 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fscale");
7950 }
7951 }
7952}
7953
7954Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
7955 const CallExpr *E) {
7956 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7957 BuiltinID == BPF::BI__builtin_btf_type_id ||
7958 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7959 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7960 "unexpected BPF builtin");
7961
7962 // A sequence number, injected into IR builtin functions, to
7963 // prevent CSE given the only difference of the function
7964 // may just be the debuginfo metadata.
7965 static uint32_t BuiltinSeqNum;
7966
7967 switch (BuiltinID) {
7968 default:
7969 llvm_unreachable("Unexpected BPF builtin");
7970 case BPF::BI__builtin_preserve_field_info: {
7971 const Expr *Arg = E->getArg(Arg: 0);
7972 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7973
7974 if (!getDebugInfo()) {
7975 CGM.Error(loc: E->getExprLoc(),
7976 error: "using __builtin_preserve_field_info() without -g");
7977 return IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7978 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7979 }
7980
7981 // Enable underlying preserve_*_access_index() generation.
7982 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7983 IsInPreservedAIRegion = true;
7984 Value *FieldAddr = IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7985 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7986 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7987
7988 ConstantInt *C = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7989 Value *InfoKind = ConstantInt::get(Ty: Int64Ty, V: C->getSExtValue());
7990
7991 // Built the IR for the preserve_field_info intrinsic.
7992 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7993 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_field_info,
7994 Tys: {FieldAddr->getType()});
7995 return Builder.CreateCall(Callee: FnGetFieldInfo, Args: {FieldAddr, InfoKind});
7996 }
7997 case BPF::BI__builtin_btf_type_id:
7998 case BPF::BI__builtin_preserve_type_info: {
7999 if (!getDebugInfo()) {
8000 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
8001 return nullptr;
8002 }
8003
8004 const Expr *Arg0 = E->getArg(Arg: 0);
8005 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
8006 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
8007
8008 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
8009 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
8010 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
8011
8012 llvm::Function *FnDecl;
8013 if (BuiltinID == BPF::BI__builtin_btf_type_id)
8014 FnDecl = Intrinsic::getOrInsertDeclaration(
8015 M: &CGM.getModule(), id: Intrinsic::bpf_btf_type_id, Tys: {});
8016 else
8017 FnDecl = Intrinsic::getOrInsertDeclaration(
8018 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_type_info, Tys: {});
8019 CallInst *Fn = Builder.CreateCall(Callee: FnDecl, Args: {SeqNumVal, FlagValue});
8020 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
8021 return Fn;
8022 }
8023 case BPF::BI__builtin_preserve_enum_value: {
8024 if (!getDebugInfo()) {
8025 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
8026 return nullptr;
8027 }
8028
8029 const Expr *Arg0 = E->getArg(Arg: 0);
8030 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
8031 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
8032
8033 // Find enumerator
8034 const auto *UO = cast<UnaryOperator>(Val: Arg0->IgnoreParens());
8035 const auto *CE = cast<CStyleCastExpr>(Val: UO->getSubExpr());
8036 const auto *DR = cast<DeclRefExpr>(Val: CE->getSubExpr());
8037 const auto *Enumerator = cast<EnumConstantDecl>(Val: DR->getDecl());
8038
8039 auto InitVal = Enumerator->getInitVal();
8040 std::string InitValStr;
8041 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
8042 InitValStr = std::to_string(val: InitVal.getSExtValue());
8043 else
8044 InitValStr = std::to_string(val: InitVal.getZExtValue());
8045 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
8046 Value *EnumStrVal = Builder.CreateGlobalString(Str: EnumStr);
8047
8048 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
8049 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
8050 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
8051
8052 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
8053 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_enum_value, Tys: {});
8054 CallInst *Fn =
8055 Builder.CreateCall(Callee: IntrinsicFn, Args: {SeqNumVal, EnumStrVal, FlagValue});
8056 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
8057 return Fn;
8058 }
8059 }
8060}
8061
8062llvm::Value *CodeGenFunction::
8063BuildVector(ArrayRef<llvm::Value*> Ops) {
8064 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8065 "Not a power-of-two sized vector!");
8066 bool AllConstants = true;
8067 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8068 AllConstants &= isa<Constant>(Val: Ops[i]);
8069
8070 // If this is a constant vector, create a ConstantVector.
8071 if (AllConstants) {
8072 SmallVector<llvm::Constant*, 16> CstOps;
8073 for (llvm::Value *Op : Ops)
8074 CstOps.push_back(Elt: cast<Constant>(Val: Op));
8075 return llvm::ConstantVector::get(V: CstOps);
8076 }
8077
8078 // Otherwise, insertelement the values to build the vector.
8079 Value *Result = llvm::PoisonValue::get(
8080 T: llvm::FixedVectorType::get(ElementType: Ops[0]->getType(), NumElts: Ops.size()));
8081
8082 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8083 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Ops[i], Idx: Builder.getInt64(C: i));
8084
8085 return Result;
8086}
8087
8088Value *CodeGenFunction::EmitAArch64CpuInit() {
8089 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
8090 llvm::FunctionCallee Func =
8091 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__init_cpu_features_resolver");
8092 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
8093 cast<llvm::GlobalValue>(Val: Func.getCallee())
8094 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
8095 return Builder.CreateCall(Callee: Func);
8096}
8097
8098Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
8099 const Expr *ArgExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
8100 StringRef ArgStr = cast<StringLiteral>(Val: ArgExpr)->getString();
8101 llvm::SmallVector<StringRef, 8> Features;
8102 ArgStr.split(A&: Features, Separator: "+");
8103 for (auto &Feature : Features) {
8104 Feature = Feature.trim();
8105 if (!llvm::AArch64::parseFMVExtension(Extension: Feature))
8106 return Builder.getFalse();
8107 if (Feature != "default")
8108 Features.push_back(Elt: Feature);
8109 }
8110 return EmitAArch64CpuSupports(FeatureStrs: Features);
8111}
8112
8113llvm::Value *
8114CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8115 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(Features: FeaturesStrs);
8116 Value *Result = Builder.getTrue();
8117 if (FeaturesMask != 0) {
8118 // Get features from structure in runtime library
8119 // struct {
8120 // unsigned long long features;
8121 // } __aarch64_cpu_features;
8122 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty);
8123 llvm::Constant *AArch64CPUFeatures =
8124 CGM.CreateRuntimeVariable(Ty: STy, Name: "__aarch64_cpu_features");
8125 cast<llvm::GlobalValue>(Val: AArch64CPUFeatures)->setDSOLocal(true);
8126 llvm::Value *CpuFeatures = Builder.CreateGEP(
8127 Ty: STy, Ptr: AArch64CPUFeatures,
8128 IdxList: {ConstantInt::get(Ty: Int32Ty, V: 0), ConstantInt::get(Ty: Int32Ty, V: 0)});
8129 Value *Features = Builder.CreateAlignedLoad(Ty: Int64Ty, Addr: CpuFeatures,
8130 Align: CharUnits::fromQuantity(Quantity: 8));
8131 Value *Mask = Builder.getInt64(C: FeaturesMask);
8132 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
8133 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
8134 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
8135 }
8136 return Result;
8137}
8138